Coverage Report

Created: 2023-06-07 07:17

/src/icu/source/common/ucnv.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
*
6
*   Copyright (C) 1998-2016, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
******************************************************************************
10
*
11
*  ucnv.c:
12
*  Implements APIs for the ICU's codeset conversion library;
13
*  mostly calls through internal functions;
14
*  created by Bertrand A. Damiba
15
*
16
* Modification History:
17
*
18
*   Date        Name        Description
19
*   04/04/99    helena      Fixed internal header inclusion.
20
*   05/09/00    helena      Added implementation to handle fallback mappings.
21
*   06/20/2000  helena      OS/400 port changes; mostly typecast.
22
*/
23
24
#include "unicode/utypes.h"
25
26
#if !UCONFIG_NO_CONVERSION
27
28
#include "unicode/ustring.h"
29
#include "unicode/ucnv.h"
30
#include "unicode/ucnv_err.h"
31
#include "unicode/uset.h"
32
#include "unicode/utf.h"
33
#include "unicode/utf16.h"
34
#include "putilimp.h"
35
#include "cmemory.h"
36
#include "cstring.h"
37
#include "uassert.h"
38
#include "utracimp.h"
39
#include "ustr_imp.h"
40
#include "ucnv_imp.h"
41
#include "ucnv_cnv.h"
42
#include "ucnv_bld.h"
43
44
/* size of intermediate and preflighting buffers in ucnv_convert() */
45
0
#define CHUNK_SIZE 1024
46
47
typedef struct UAmbiguousConverter {
48
    const char *name;
49
    const UChar variant5c;
50
} UAmbiguousConverter;
51
52
static const UAmbiguousConverter ambiguousConverters[]={
53
    { "ibm-897_P100-1995", 0xa5 },
54
    { "ibm-942_P120-1999", 0xa5 },
55
    { "ibm-943_P130-1999", 0xa5 },
56
    { "ibm-946_P100-1995", 0xa5 },
57
    { "ibm-33722_P120-1999", 0xa5 },
58
    { "ibm-1041_P100-1995", 0xa5 },
59
    /*{ "ibm-54191_P100-2006", 0xa5 },*/
60
    /*{ "ibm-62383_P100-2007", 0xa5 },*/
61
    /*{ "ibm-891_P100-1995", 0x20a9 },*/
62
    { "ibm-944_P100-1995", 0x20a9 },
63
    { "ibm-949_P110-1999", 0x20a9 },
64
    { "ibm-1363_P110-1997", 0x20a9 },
65
    { "ISO_2022,locale=ko,version=0", 0x20a9 },
66
    { "ibm-1088_P100-1995", 0x20a9 }
67
};
68
69
/*Calls through createConverter */
70
U_CAPI UConverter* U_EXPORT2
71
ucnv_open (const char *name,
72
                       UErrorCode * err)
73
4.76k
{
74
4.76k
    UConverter *r;
75
76
4.76k
    if (err == NULL || U_FAILURE (*err)) {
77
0
        return NULL;
78
0
    }
79
80
4.76k
    r =  ucnv_createConverter(NULL, name, err);
81
4.76k
    return r;
82
4.76k
}
83
84
U_CAPI UConverter* U_EXPORT2 
85
ucnv_openPackage   (const char *packageName, const char *converterName, UErrorCode * err)
86
0
{
87
0
    return ucnv_createConverterFromPackage(packageName, converterName,  err);
88
0
}
89
90
/*Extracts the UChar* to a char* and calls through createConverter */
91
U_CAPI UConverter*   U_EXPORT2
92
ucnv_openU (const UChar * name,
93
                         UErrorCode * err)
94
0
{
95
0
    char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
96
97
0
    if (err == NULL || U_FAILURE(*err))
98
0
        return NULL;
99
0
    if (name == NULL)
100
0
        return ucnv_open (NULL, err);
101
0
    if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
102
0
    {
103
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
104
0
        return NULL;
105
0
    }
106
0
    return ucnv_open(u_austrcpy(asciiName, name), err);
107
0
}
108
109
/* Copy the string that is represented by the UConverterPlatform enum
110
 * @param platformString An output buffer
111
 * @param platform An enum representing a platform
112
 * @return the length of the copied string.
113
 */
114
static int32_t
115
ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
116
0
{
117
0
    switch (pltfrm)
118
0
    {
119
0
    case UCNV_IBM:
120
0
        uprv_strcpy(platformString, "ibm-");
121
0
        return 4;
122
0
    case UCNV_UNKNOWN:
123
0
        break;
124
0
    }
125
126
    /* default to empty string */
127
0
    *platformString = 0;
128
0
    return 0;
129
0
}
130
131
/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
132
 *through createConverter*/
133
U_CAPI UConverter*   U_EXPORT2
134
ucnv_openCCSID (int32_t codepage,
135
                UConverterPlatform platform,
136
                UErrorCode * err)
137
0
{
138
0
    char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
139
0
    int32_t myNameLen;
140
141
0
    if (err == NULL || U_FAILURE (*err))
142
0
        return NULL;
143
144
    /* ucnv_copyPlatformString could return "ibm-" or "cp" */
145
0
    myNameLen = ucnv_copyPlatformString(myName, platform);
146
0
    T_CString_integerToString(myName + myNameLen, codepage, 10);
147
148
0
    return ucnv_createConverter(NULL, myName, err);
149
0
}
150
151
/* Creating a temporary stack-based object that can be used in one thread, 
152
and created from a converter that is shared across threads.
153
*/
154
155
U_CAPI UConverter* U_EXPORT2
156
ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
157
0
{
158
0
    UConverter *localConverter, *allocatedConverter;
159
0
    int32_t stackBufferSize;
160
0
    int32_t bufferSizeNeeded;
161
0
    char *stackBufferChars = (char *)stackBuffer;
162
0
    UErrorCode cbErr;
163
0
    UConverterToUnicodeArgs toUArgs = {
164
0
        sizeof(UConverterToUnicodeArgs),
165
0
            TRUE,
166
0
            NULL,
167
0
            NULL,
168
0
            NULL,
169
0
            NULL,
170
0
            NULL,
171
0
            NULL
172
0
    };
173
0
    UConverterFromUnicodeArgs fromUArgs = {
174
0
        sizeof(UConverterFromUnicodeArgs),
175
0
            TRUE,
176
0
            NULL,
177
0
            NULL,
178
0
            NULL,
179
0
            NULL,
180
0
            NULL,
181
0
            NULL
182
0
    };
183
184
0
    UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
185
186
0
    if (status == NULL || U_FAILURE(*status)){
187
0
        UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
188
0
        return NULL;
189
0
    }
190
191
0
    if (cnv == NULL) {
192
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
193
0
        UTRACE_EXIT_STATUS(*status);
194
0
        return NULL;
195
0
    }
196
197
0
    UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
198
0
                                    ucnv_getName(cnv, status), cnv, stackBuffer);
199
200
0
    if (cnv->sharedData->impl->safeClone != NULL) {
201
        /* call the custom safeClone function for sizing */
202
0
        bufferSizeNeeded = 0;
203
0
        cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
204
0
        if (U_FAILURE(*status)) {
205
0
            UTRACE_EXIT_STATUS(*status);
206
0
            return NULL;
207
0
        }
208
0
    }
209
0
    else
210
0
    {
211
        /* inherent sizing */
212
0
        bufferSizeNeeded = sizeof(UConverter);
213
0
    }
214
215
0
    if (pBufferSize == NULL) {
216
0
        stackBufferSize = 1;
217
0
        pBufferSize = &stackBufferSize;
218
0
    } else {
219
0
        stackBufferSize = *pBufferSize;
220
0
        if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
221
0
            *pBufferSize = bufferSizeNeeded;
222
0
            UTRACE_EXIT_VALUE(bufferSizeNeeded);
223
0
            return NULL;
224
0
        }
225
0
    }
226
227
228
    /* Pointers on 64-bit platforms need to be aligned
229
     * on a 64-bit boundary in memory.
230
     */
231
0
    if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
232
0
        int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
233
0
        if(stackBufferSize > offsetUp) {
234
0
            stackBufferSize -= offsetUp;
235
0
            stackBufferChars += offsetUp;
236
0
        } else {
237
            /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
238
0
            stackBufferSize = 1;
239
0
        }
240
0
    }
241
242
0
    stackBuffer = (void *)stackBufferChars;
243
    
244
    /* Now, see if we must allocate any memory */
245
0
    if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL)
246
0
    {
247
        /* allocate one here...*/
248
0
        localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
249
250
0
        if(localConverter == NULL) {
251
0
            *status = U_MEMORY_ALLOCATION_ERROR;
252
0
            UTRACE_EXIT_STATUS(*status);
253
0
            return NULL;
254
0
        }
255
0
        *status = U_SAFECLONE_ALLOCATED_WARNING;
256
257
        /* record the fact that memory was allocated */
258
0
        *pBufferSize = bufferSizeNeeded;
259
0
    } else {
260
        /* just use the stack buffer */
261
0
        localConverter = (UConverter*) stackBuffer;
262
0
        allocatedConverter = NULL;
263
0
    }
264
265
0
    uprv_memset(localConverter, 0, bufferSizeNeeded);
266
267
    /* Copy initial state */
268
0
    uprv_memcpy(localConverter, cnv, sizeof(UConverter));
269
0
    localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
270
271
    /* copy the substitution string */
272
0
    if (cnv->subChars == (uint8_t *)cnv->subUChars) {
273
0
        localConverter->subChars = (uint8_t *)localConverter->subUChars;
274
0
    } else {
275
0
        localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
276
0
        if (localConverter->subChars == NULL) {
277
0
            uprv_free(allocatedConverter);
278
0
            UTRACE_EXIT_STATUS(*status);
279
0
            return NULL;
280
0
        }
281
0
        uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
282
0
    }
283
284
    /* now either call the safeclone fcn or not */
285
0
    if (cnv->sharedData->impl->safeClone != NULL) {
286
        /* call the custom safeClone function */
287
0
        localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
288
0
    }
289
290
0
    if(localConverter==NULL || U_FAILURE(*status)) {
291
0
        if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
292
0
            uprv_free(allocatedConverter->subChars);
293
0
        }
294
0
        uprv_free(allocatedConverter);
295
0
        UTRACE_EXIT_STATUS(*status);
296
0
        return NULL;
297
0
    }
298
299
    /* increment refcount of shared data if needed */
300
0
    if (cnv->sharedData->isReferenceCounted) {
301
0
        ucnv_incrementRefCount(cnv->sharedData);
302
0
    }
303
304
0
    if(localConverter == (UConverter*)stackBuffer) {
305
        /* we're using user provided data - set to not destroy */
306
0
        localConverter->isCopyLocal = TRUE;
307
0
    }
308
309
    /* allow callback functions to handle any memory allocation */
310
0
    toUArgs.converter = fromUArgs.converter = localConverter;
311
0
    cbErr = U_ZERO_ERROR;
312
0
    cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);
313
0
    cbErr = U_ZERO_ERROR;
314
0
    cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
315
316
0
    UTRACE_EXIT_PTR_STATUS(localConverter, *status);
317
0
    return localConverter;
318
0
}
319
320
321
322
/*Decreases the reference counter in the shared immutable section of the object
323
 *and frees the mutable part*/
324
325
U_CAPI void  U_EXPORT2
326
ucnv_close (UConverter * converter)
327
4.76k
{
328
4.76k
    UErrorCode errorCode = U_ZERO_ERROR;
329
330
4.76k
    UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
331
332
4.76k
    if (converter == NULL)
333
0
    {
334
0
        UTRACE_EXIT();
335
0
        return;
336
0
    }
337
338
4.76k
    UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
339
4.76k
        ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
340
341
    /* In order to speed up the close, only call the callbacks when they have been changed.
342
    This performance check will only work when the callbacks are set within a shared library
343
    or from user code that statically links this code. */
344
    /* first, notify the callback functions that the converter is closed */
345
4.76k
    if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
346
0
        UConverterToUnicodeArgs toUArgs = {
347
0
            sizeof(UConverterToUnicodeArgs),
348
0
                TRUE,
349
0
                NULL,
350
0
                NULL,
351
0
                NULL,
352
0
                NULL,
353
0
                NULL,
354
0
                NULL
355
0
        };
356
357
0
        toUArgs.converter = converter;
358
0
        errorCode = U_ZERO_ERROR;
359
0
        converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
360
0
    }
361
4.76k
    if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
362
0
        UConverterFromUnicodeArgs fromUArgs = {
363
0
            sizeof(UConverterFromUnicodeArgs),
364
0
                TRUE,
365
0
                NULL,
366
0
                NULL,
367
0
                NULL,
368
0
                NULL,
369
0
                NULL,
370
0
                NULL
371
0
        };
372
0
        fromUArgs.converter = converter;
373
0
        errorCode = U_ZERO_ERROR;
374
0
        converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
375
0
    }
376
377
4.76k
    if (converter->sharedData->impl->close != NULL) {
378
0
        converter->sharedData->impl->close(converter);
379
0
    }
380
381
4.76k
    if (converter->subChars != (uint8_t *)converter->subUChars) {
382
0
        uprv_free(converter->subChars);
383
0
    }
384
385
4.76k
    if (converter->sharedData->isReferenceCounted) {
386
0
        ucnv_unloadSharedDataIfReady(converter->sharedData);
387
0
    }
388
389
4.76k
    if(!converter->isCopyLocal){
390
4.76k
        uprv_free(converter);
391
4.76k
    }
392
393
4.76k
    UTRACE_EXIT();
394
4.76k
}
395
396
/*returns a single Name from the list, will return NULL if out of bounds
397
 */
398
U_CAPI const char*   U_EXPORT2
399
ucnv_getAvailableName (int32_t n)
400
0
{
401
0
    if (0 <= n && n <= 0xffff) {
402
0
        UErrorCode err = U_ZERO_ERROR;
403
0
        const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
404
0
        if (U_SUCCESS(err)) {
405
0
            return name;
406
0
        }
407
0
    }
408
0
    return NULL;
409
0
}
410
411
U_CAPI int32_t   U_EXPORT2
412
ucnv_countAvailable ()
413
0
{
414
0
    UErrorCode err = U_ZERO_ERROR;
415
0
    return ucnv_bld_countAvailableConverters(&err);
416
0
}
417
418
U_CAPI void    U_EXPORT2
419
ucnv_getSubstChars (const UConverter * converter,
420
                    char *mySubChar,
421
                    int8_t * len,
422
                    UErrorCode * err)
423
0
{
424
0
    if (U_FAILURE (*err))
425
0
        return;
426
427
0
    if (converter->subCharLen <= 0) {
428
        /* Unicode string or empty string from ucnv_setSubstString(). */
429
0
        *len = 0;
430
0
        return;
431
0
    }
432
433
0
    if (*len < converter->subCharLen) /*not enough space in subChars */
434
0
    {
435
0
        *err = U_INDEX_OUTOFBOUNDS_ERROR;
436
0
        return;
437
0
    }
438
439
0
    uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen);   /*fills in the subchars */
440
0
    *len = converter->subCharLen; /*store # of bytes copied to buffer */
441
0
}
442
443
U_CAPI void    U_EXPORT2
444
ucnv_setSubstChars (UConverter * converter,
445
                    const char *mySubChar,
446
                    int8_t len,
447
                    UErrorCode * err)
448
0
{
449
0
    if (U_FAILURE (*err))
450
0
        return;
451
    
452
    /*Makes sure that the subChar is within the codepages char length boundaries */
453
0
    if ((len > converter->sharedData->staticData->maxBytesPerChar)
454
0
     || (len < converter->sharedData->staticData->minBytesPerChar))
455
0
    {
456
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
457
0
        return;
458
0
    }
459
    
460
0
    uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
461
0
    converter->subCharLen = len;  /*sets the new len */
462
463
    /*
464
    * There is currently (2001Feb) no separate API to set/get subChar1.
465
    * In order to always have subChar written after it is explicitly set,
466
    * we set subChar1 to 0.
467
    */
468
0
    converter->subChar1 = 0;
469
    
470
0
    return;
471
0
}
472
473
U_CAPI void U_EXPORT2
474
ucnv_setSubstString(UConverter *cnv,
475
                    const UChar *s,
476
                    int32_t length,
477
0
                    UErrorCode *err) {
478
0
    UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1];
479
0
    char chars[UCNV_ERROR_BUFFER_LENGTH];
480
481
0
    UConverter *clone;
482
0
    uint8_t *subChars;
483
0
    int32_t cloneSize, length8;
484
485
    /* Let the following functions check all arguments. */
486
0
    cloneSize = sizeof(cloneBuffer);
487
0
    clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
488
0
    ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
489
0
    length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
490
0
    ucnv_close(clone);
491
0
    if (U_FAILURE(*err)) {
492
0
        return;
493
0
    }
494
495
0
    if (cnv->sharedData->impl->writeSub == NULL
496
0
#if !UCONFIG_NO_LEGACY_CONVERSION
497
0
        || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
498
0
         ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
499
0
#endif
500
0
    ) {
501
        /* The converter is not stateful. Store the charset bytes as a fixed string. */
502
0
        subChars = (uint8_t *)chars;
503
0
    } else {
504
        /*
505
         * The converter has a non-default writeSub() function, indicating
506
         * that it is stateful.
507
         * Store the Unicode string for on-the-fly conversion for correct
508
         * state handling.
509
         */
510
0
        if (length > UCNV_ERROR_BUFFER_LENGTH) {
511
            /*
512
             * Should not occur. The converter should output at least one byte
513
             * per UChar, which means that ucnv_fromUChars() should catch all
514
             * overflows.
515
             */
516
0
            *err = U_BUFFER_OVERFLOW_ERROR;
517
0
            return;
518
0
        }
519
0
        subChars = (uint8_t *)s;
520
0
        if (length < 0) {
521
0
            length = u_strlen(s);
522
0
        }
523
0
        length8 = length * U_SIZEOF_UCHAR;
524
0
    }
525
526
    /*
527
     * For storing the substitution string, select either the small buffer inside
528
     * UConverter or allocate a subChars buffer.
529
     */
530
0
    if (length8 > UCNV_MAX_SUBCHAR_LEN) {
531
        /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
532
0
        if (cnv->subChars == (uint8_t *)cnv->subUChars) {
533
            /* Allocate a new buffer for the string. */
534
0
            cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
535
0
            if (cnv->subChars == NULL) {
536
0
                cnv->subChars = (uint8_t *)cnv->subUChars;
537
0
                *err = U_MEMORY_ALLOCATION_ERROR;
538
0
                return;
539
0
            }
540
0
            uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
541
0
        }
542
0
    }
543
544
    /* Copy the substitution string into the UConverter or its subChars buffer. */
545
0
    if (length8 == 0) {
546
0
        cnv->subCharLen = 0;
547
0
    } else {
548
0
        uprv_memcpy(cnv->subChars, subChars, length8);
549
0
        if (subChars == (uint8_t *)chars) {
550
0
            cnv->subCharLen = (int8_t)length8;
551
0
        } else /* subChars == s */ {
552
0
            cnv->subCharLen = (int8_t)-length;
553
0
        }
554
0
    }
555
556
    /* See comment in ucnv_setSubstChars(). */
557
0
    cnv->subChar1 = 0;
558
0
}
559
560
/*resets the internal states of a converter
561
 *goal : have the same behaviour than a freshly created converter
562
 */
563
static void _reset(UConverter *converter, UConverterResetChoice choice,
564
7.14k
                   UBool callCallback) {
565
7.14k
    if(converter == NULL) {
566
0
        return;
567
0
    }
568
569
7.14k
    if(callCallback) {
570
        /* first, notify the callback functions that the converter is reset */
571
3.57k
        UErrorCode errorCode;
572
573
3.57k
        if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
574
0
            UConverterToUnicodeArgs toUArgs = {
575
0
                sizeof(UConverterToUnicodeArgs),
576
0
                TRUE,
577
0
                NULL,
578
0
                NULL,
579
0
                NULL,
580
0
                NULL,
581
0
                NULL,
582
0
                NULL
583
0
            };
584
0
            toUArgs.converter = converter;
585
0
            errorCode = U_ZERO_ERROR;
586
0
            converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
587
0
        }
588
3.57k
        if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
589
0
            UConverterFromUnicodeArgs fromUArgs = {
590
0
                sizeof(UConverterFromUnicodeArgs),
591
0
                TRUE,
592
0
                NULL,
593
0
                NULL,
594
0
                NULL,
595
0
                NULL,
596
0
                NULL,
597
0
                NULL
598
0
            };
599
0
            fromUArgs.converter = converter;
600
0
            errorCode = U_ZERO_ERROR;
601
0
            converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
602
0
        }
603
3.57k
    }
604
605
    /* now reset the converter itself */
606
7.14k
    if(choice<=UCNV_RESET_TO_UNICODE) {
607
7.14k
        converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
608
7.14k
        converter->mode = 0;
609
7.14k
        converter->toULength = 0;
610
7.14k
        converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
611
7.14k
        converter->preToULength = 0;
612
7.14k
    }
613
7.14k
    if(choice!=UCNV_RESET_TO_UNICODE) {
614
0
        converter->fromUnicodeStatus = 0;
615
0
        converter->fromUChar32 = 0;
616
0
        converter->invalidUCharLength = converter->charErrorBufferLength = 0;
617
0
        converter->preFromUFirstCP = U_SENTINEL;
618
0
        converter->preFromULength = 0;
619
0
    }
620
621
7.14k
    if (converter->sharedData->impl->reset != NULL) {
622
        /* call the custom reset function */
623
0
        converter->sharedData->impl->reset(converter, choice);
624
0
    }
625
7.14k
}
626
627
U_CAPI void  U_EXPORT2
628
ucnv_reset(UConverter *converter)
629
0
{
630
0
    _reset(converter, UCNV_RESET_BOTH, TRUE);
631
0
}
632
633
U_CAPI void  U_EXPORT2
634
ucnv_resetToUnicode(UConverter *converter)
635
3.57k
{
636
3.57k
    _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
637
3.57k
}
638
639
U_CAPI void  U_EXPORT2
640
ucnv_resetFromUnicode(UConverter *converter)
641
0
{
642
0
    _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
643
0
}
644
645
U_CAPI int8_t   U_EXPORT2
646
ucnv_getMaxCharSize (const UConverter * converter)
647
0
{
648
0
    return converter->maxBytesPerUChar;
649
0
}
650
651
652
U_CAPI int8_t   U_EXPORT2
653
ucnv_getMinCharSize (const UConverter * converter)
654
0
{
655
0
    return converter->sharedData->staticData->minBytesPerChar;
656
0
}
657
658
U_CAPI const char*   U_EXPORT2
659
ucnv_getName (const UConverter * converter, UErrorCode * err)
660
     
661
1.19k
{
662
1.19k
    if (U_FAILURE (*err))
663
0
        return NULL;
664
1.19k
    if(converter->sharedData->impl->getName){
665
0
        const char* temp= converter->sharedData->impl->getName(converter);
666
0
        if(temp)
667
0
            return temp;
668
0
    }
669
1.19k
    return converter->sharedData->staticData->name;
670
1.19k
}
671
672
U_CAPI int32_t U_EXPORT2
673
ucnv_getCCSID(const UConverter * converter,
674
              UErrorCode * err)
675
0
{
676
0
    int32_t ccsid;
677
0
    if (U_FAILURE (*err))
678
0
        return -1;
679
680
0
    ccsid = converter->sharedData->staticData->codepage;
681
0
    if (ccsid == 0) {
682
        /* Rare case. This is for cases like gb18030,
683
        which doesn't have an IBM canonical name, but does have an IBM alias. */
684
0
        const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
685
0
        if (U_SUCCESS(*err) && standardName) {
686
0
            const char *ccsidStr = uprv_strchr(standardName, '-');
687
0
            if (ccsidStr) {
688
0
                ccsid = (int32_t)atol(ccsidStr+1);  /* +1 to skip '-' */
689
0
            }
690
0
        }
691
0
    }
692
0
    return ccsid;
693
0
}
694
695
696
U_CAPI UConverterPlatform   U_EXPORT2
697
ucnv_getPlatform (const UConverter * converter,
698
                                      UErrorCode * err)
699
0
{
700
0
    if (U_FAILURE (*err))
701
0
        return UCNV_UNKNOWN;
702
703
0
    return (UConverterPlatform)converter->sharedData->staticData->platform;
704
0
}
705
706
U_CAPI void U_EXPORT2
707
    ucnv_getToUCallBack (const UConverter * converter,
708
                         UConverterToUCallback *action,
709
                         const void **context)
710
0
{
711
0
    *action = converter->fromCharErrorBehaviour;
712
0
    *context = converter->toUContext;
713
0
}
714
715
U_CAPI void U_EXPORT2
716
    ucnv_getFromUCallBack (const UConverter * converter,
717
                           UConverterFromUCallback *action,
718
                           const void **context)
719
0
{
720
0
    *action = converter->fromUCharErrorBehaviour;
721
0
    *context = converter->fromUContext;
722
0
}
723
724
U_CAPI void    U_EXPORT2
725
ucnv_setToUCallBack (UConverter * converter,
726
                            UConverterToUCallback newAction,
727
                            const void* newContext,
728
                            UConverterToUCallback *oldAction,
729
                            const void** oldContext,
730
                            UErrorCode * err)
731
0
{
732
0
    if (U_FAILURE (*err))
733
0
        return;
734
0
    if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
735
0
    converter->fromCharErrorBehaviour = newAction;
736
0
    if (oldContext) *oldContext = converter->toUContext;
737
0
    converter->toUContext = newContext;
738
0
}
739
740
U_CAPI void  U_EXPORT2
741
ucnv_setFromUCallBack (UConverter * converter,
742
                            UConverterFromUCallback newAction,
743
                            const void* newContext,
744
                            UConverterFromUCallback *oldAction,
745
                            const void** oldContext,
746
                            UErrorCode * err)
747
0
{
748
0
    if (U_FAILURE (*err))
749
0
        return;
750
0
    if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
751
0
    converter->fromUCharErrorBehaviour = newAction;
752
0
    if (oldContext) *oldContext = converter->fromUContext;
753
0
    converter->fromUContext = newContext;
754
0
}
755
756
static void
757
_updateOffsets(int32_t *offsets, int32_t length,
758
0
               int32_t sourceIndex, int32_t errorInputLength) {
759
0
    int32_t *limit;
760
0
    int32_t delta, offset;
761
762
0
    if(sourceIndex>=0) {
763
        /*
764
         * adjust each offset by adding the previous sourceIndex
765
         * minus the length of the input sequence that caused an
766
         * error, if any
767
         */
768
0
        delta=sourceIndex-errorInputLength;
769
0
    } else {
770
        /*
771
         * set each offset to -1 because this conversion function
772
         * does not handle offsets
773
         */
774
0
        delta=-1;
775
0
    }
776
777
0
    limit=offsets+length;
778
0
    if(delta==0) {
779
        /* most common case, nothing to do */
780
0
    } else if(delta>0) {
781
        /* add the delta to each offset (but not if the offset is <0) */
782
0
        while(offsets<limit) {
783
0
            offset=*offsets;
784
0
            if(offset>=0) {
785
0
                *offsets=offset+delta;
786
0
            }
787
0
            ++offsets;
788
0
        }
789
0
    } else /* delta<0 */ {
790
        /*
791
         * set each offset to -1 because this conversion function
792
         * does not handle offsets
793
         * or the error input sequence started in a previous buffer
794
         */
795
0
        while(offsets<limit) {
796
0
            *offsets++=-1;
797
0
        }
798
0
    }
799
0
}
800
801
/* ucnv_fromUnicode --------------------------------------------------------- */
802
803
/*
804
 * Implementation note for m:n conversions
805
 *
806
 * While collecting source units to find the longest match for m:n conversion,
807
 * some source units may need to be stored for a partial match.
808
 * When a second buffer does not yield a match on all of the previously stored
809
 * source units, then they must be "replayed", i.e., fed back into the converter.
810
 *
811
 * The code relies on the fact that replaying will not nest -
812
 * converting a replay buffer will not result in a replay.
813
 * This is because a replay is necessary only after the _continuation_ of a
814
 * partial match failed, but a replay buffer is converted as a whole.
815
 * It may result in some of its units being stored again for a partial match,
816
 * but there will not be a continuation _during_ the replay which could fail.
817
 *
818
 * It is conceivable that a callback function could call the converter
819
 * recursively in a way that causes another replay to be stored, but that
820
 * would be an error in the callback function.
821
 * Such violations will cause assertion failures in a debug build,
822
 * and wrong output, but they will not cause a crash.
823
 */
824
825
static void
826
0
_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
827
0
    UConverterFromUnicode fromUnicode;
828
0
    UConverter *cnv;
829
0
    const UChar *s;
830
0
    char *t;
831
0
    int32_t *offsets;
832
0
    int32_t sourceIndex;
833
0
    int32_t errorInputLength;
834
0
    UBool converterSawEndOfInput, calledCallback;
835
836
    /* variables for m:n conversion */
837
0
    UChar replay[UCNV_EXT_MAX_UCHARS];
838
0
    const UChar *realSource, *realSourceLimit;
839
0
    int32_t realSourceIndex;
840
0
    UBool realFlush;
841
842
0
    cnv=pArgs->converter;
843
0
    s=pArgs->source;
844
0
    t=pArgs->target;
845
0
    offsets=pArgs->offsets;
846
847
    /* get the converter implementation function */
848
0
    sourceIndex=0;
849
0
    if(offsets==NULL) {
850
0
        fromUnicode=cnv->sharedData->impl->fromUnicode;
851
0
    } else {
852
0
        fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
853
0
        if(fromUnicode==NULL) {
854
            /* there is no WithOffsets implementation */
855
0
            fromUnicode=cnv->sharedData->impl->fromUnicode;
856
            /* we will write -1 for each offset */
857
0
            sourceIndex=-1;
858
0
        }
859
0
    }
860
861
0
    if(cnv->preFromULength>=0) {
862
        /* normal mode */
863
0
        realSource=NULL;
864
865
        /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
866
0
        realSourceLimit=NULL;
867
0
        realFlush=FALSE;
868
0
        realSourceIndex=0;
869
0
    } else {
870
        /*
871
         * Previous m:n conversion stored source units from a partial match
872
         * and failed to consume all of them.
873
         * We need to "replay" them from a temporary buffer and convert them first.
874
         */
875
0
        realSource=pArgs->source;
876
0
        realSourceLimit=pArgs->sourceLimit;
877
0
        realFlush=pArgs->flush;
878
0
        realSourceIndex=sourceIndex;
879
880
0
        uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
881
0
        pArgs->source=replay;
882
0
        pArgs->sourceLimit=replay-cnv->preFromULength;
883
0
        pArgs->flush=FALSE;
884
0
        sourceIndex=-1;
885
886
0
        cnv->preFromULength=0;
887
0
    }
888
889
    /*
890
     * loop for conversion and error handling
891
     *
892
     * loop {
893
     *   convert
894
     *   loop {
895
     *     update offsets
896
     *     handle end of input
897
     *     handle errors/call callback
898
     *   }
899
     * }
900
     */
901
0
    for(;;) {
902
0
        if(U_SUCCESS(*err)) {
903
            /* convert */
904
0
            fromUnicode(pArgs, err);
905
906
            /*
907
             * set a flag for whether the converter
908
             * successfully processed the end of the input
909
             *
910
             * need not check cnv->preFromULength==0 because a replay (<0) will cause
911
             * s<sourceLimit before converterSawEndOfInput is checked
912
             */
913
0
            converterSawEndOfInput=
914
0
                (UBool)(U_SUCCESS(*err) &&
915
0
                        pArgs->flush && pArgs->source==pArgs->sourceLimit &&
916
0
                        cnv->fromUChar32==0);
917
0
        } else {
918
            /* handle error from ucnv_convertEx() */
919
0
            converterSawEndOfInput=FALSE;
920
0
        }
921
922
        /* no callback called yet for this iteration */
923
0
        calledCallback=FALSE;
924
925
        /* no sourceIndex adjustment for conversion, only for callback output */
926
0
        errorInputLength=0;
927
928
        /*
929
         * loop for offsets and error handling
930
         *
931
         * iterates at most 3 times:
932
         * 1. to clean up after the conversion function
933
         * 2. after the callback
934
         * 3. after the callback again if there was truncated input
935
         */
936
0
        for(;;) {
937
            /* update offsets if we write any */
938
0
            if(offsets!=NULL) {
939
0
                int32_t length=(int32_t)(pArgs->target-t);
940
0
                if(length>0) {
941
0
                    _updateOffsets(offsets, length, sourceIndex, errorInputLength);
942
943
                    /*
944
                     * if a converter handles offsets and updates the offsets
945
                     * pointer at the end, then pArgs->offset should not change
946
                     * here;
947
                     * however, some converters do not handle offsets at all
948
                     * (sourceIndex<0) or may not update the offsets pointer
949
                     */
950
0
                    pArgs->offsets=offsets+=length;
951
0
                }
952
953
0
                if(sourceIndex>=0) {
954
0
                    sourceIndex+=(int32_t)(pArgs->source-s);
955
0
                }
956
0
            }
957
958
0
            if(cnv->preFromULength<0) {
959
                /*
960
                 * switch the source to new replay units (cannot occur while replaying)
961
                 * after offset handling and before end-of-input and callback handling
962
                 */
963
0
                if(realSource==NULL) {
964
0
                    realSource=pArgs->source;
965
0
                    realSourceLimit=pArgs->sourceLimit;
966
0
                    realFlush=pArgs->flush;
967
0
                    realSourceIndex=sourceIndex;
968
969
0
                    uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
970
0
                    pArgs->source=replay;
971
0
                    pArgs->sourceLimit=replay-cnv->preFromULength;
972
0
                    pArgs->flush=FALSE;
973
0
                    if((sourceIndex+=cnv->preFromULength)<0) {
974
0
                        sourceIndex=-1;
975
0
                    }
976
977
0
                    cnv->preFromULength=0;
978
0
                } else {
979
                    /* see implementation note before _fromUnicodeWithCallback() */
980
0
                    U_ASSERT(realSource==NULL);
981
0
                    *err=U_INTERNAL_PROGRAM_ERROR;
982
0
                }
983
0
            }
984
985
            /* update pointers */
986
0
            s=pArgs->source;
987
0
            t=pArgs->target;
988
989
0
            if(U_SUCCESS(*err)) {
990
0
                if(s<pArgs->sourceLimit) {
991
                    /*
992
                     * continue with the conversion loop while there is still input left
993
                     * (continue converting by breaking out of only the inner loop)
994
                     */
995
0
                    break;
996
0
                } else if(realSource!=NULL) {
997
                    /* switch back from replaying to the real source and continue */
998
0
                    pArgs->source=realSource;
999
0
                    pArgs->sourceLimit=realSourceLimit;
1000
0
                    pArgs->flush=realFlush;
1001
0
                    sourceIndex=realSourceIndex;
1002
1003
0
                    realSource=NULL;
1004
0
                    break;
1005
0
                } else if(pArgs->flush && cnv->fromUChar32!=0) {
1006
                    /*
1007
                     * the entire input stream is consumed
1008
                     * and there is a partial, truncated input sequence left
1009
                     */
1010
1011
                    /* inject an error and continue with callback handling */
1012
0
                    *err=U_TRUNCATED_CHAR_FOUND;
1013
0
                    calledCallback=FALSE; /* new error condition */
1014
0
                } else {
1015
                    /* input consumed */
1016
0
                    if(pArgs->flush) {
1017
                        /*
1018
                         * return to the conversion loop once more if the flush
1019
                         * flag is set and the conversion function has not
1020
                         * successfully processed the end of the input yet
1021
                         *
1022
                         * (continue converting by breaking out of only the inner loop)
1023
                         */
1024
0
                        if(!converterSawEndOfInput) {
1025
0
                            break;
1026
0
                        }
1027
1028
                        /* reset the converter without calling the callback function */
1029
0
                        _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
1030
0
                    }
1031
1032
                    /* done successfully */
1033
0
                    return;
1034
0
                }
1035
0
            }
1036
1037
            /* U_FAILURE(*err) */
1038
0
            {
1039
0
                UErrorCode e;
1040
1041
0
                if( calledCallback ||
1042
0
                    (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1043
0
                    (e!=U_INVALID_CHAR_FOUND &&
1044
0
                     e!=U_ILLEGAL_CHAR_FOUND &&
1045
0
                     e!=U_TRUNCATED_CHAR_FOUND)
1046
0
                ) {
1047
                    /*
1048
                     * the callback did not or cannot resolve the error:
1049
                     * set output pointers and return
1050
                     *
1051
                     * the check for buffer overflow is redundant but it is
1052
                     * a high-runner case and hopefully documents the intent
1053
                     * well
1054
                     *
1055
                     * if we were replaying, then the replay buffer must be
1056
                     * copied back into the UConverter
1057
                     * and the real arguments must be restored
1058
                     */
1059
0
                    if(realSource!=NULL) {
1060
0
                        int32_t length;
1061
1062
0
                        U_ASSERT(cnv->preFromULength==0);
1063
1064
0
                        length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1065
0
                        if(length>0) {
1066
0
                            u_memcpy(cnv->preFromU, pArgs->source, length);
1067
0
                            cnv->preFromULength=(int8_t)-length;
1068
0
                        }
1069
1070
0
                        pArgs->source=realSource;
1071
0
                        pArgs->sourceLimit=realSourceLimit;
1072
0
                        pArgs->flush=realFlush;
1073
0
                    }
1074
1075
0
                    return;
1076
0
                }
1077
0
            }
1078
1079
            /* callback handling */
1080
0
            {
1081
0
                UChar32 codePoint;
1082
1083
                /* get and write the code point */
1084
0
                codePoint=cnv->fromUChar32;
1085
0
                errorInputLength=0;
1086
0
                U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
1087
0
                cnv->invalidUCharLength=(int8_t)errorInputLength;
1088
1089
                /* set the converter state to deal with the next character */
1090
0
                cnv->fromUChar32=0;
1091
1092
                /* call the callback function */
1093
0
                cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
1094
0
                    cnv->invalidUCharBuffer, errorInputLength, codePoint,
1095
0
                    *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
1096
0
                    err);
1097
0
            }
1098
1099
            /*
1100
             * loop back to the offset handling
1101
             *
1102
             * this flag will indicate after offset handling
1103
             * that a callback was called;
1104
             * if the callback did not resolve the error, then we return
1105
             */
1106
0
            calledCallback=TRUE;
1107
0
        }
1108
0
    }
1109
0
}
1110
1111
/*
1112
 * Output the fromUnicode overflow buffer.
1113
 * Call this function if(cnv->charErrorBufferLength>0).
1114
 * @return TRUE if overflow
1115
 */
1116
static UBool
1117
ucnv_outputOverflowFromUnicode(UConverter *cnv,
1118
                               char **target, const char *targetLimit,
1119
                               int32_t **pOffsets,
1120
0
                               UErrorCode *err) {
1121
0
    int32_t *offsets;
1122
0
    char *overflow, *t;
1123
0
    int32_t i, length;
1124
1125
0
    t=*target;
1126
0
    if(pOffsets!=NULL) {
1127
0
        offsets=*pOffsets;
1128
0
    } else {
1129
0
        offsets=NULL;
1130
0
    }
1131
1132
0
    overflow=(char *)cnv->charErrorBuffer;
1133
0
    length=cnv->charErrorBufferLength;
1134
0
    i=0;
1135
0
    while(i<length) {
1136
0
        if(t==targetLimit) {
1137
            /* the overflow buffer contains too much, keep the rest */
1138
0
            int32_t j=0;
1139
1140
0
            do {
1141
0
                overflow[j++]=overflow[i++];
1142
0
            } while(i<length);
1143
1144
0
            cnv->charErrorBufferLength=(int8_t)j;
1145
0
            *target=t;
1146
0
            if(offsets!=NULL) {
1147
0
                *pOffsets=offsets;
1148
0
            }
1149
0
            *err=U_BUFFER_OVERFLOW_ERROR;
1150
0
            return TRUE;
1151
0
        }
1152
1153
        /* copy the overflow contents to the target */
1154
0
        *t++=overflow[i++];
1155
0
        if(offsets!=NULL) {
1156
0
            *offsets++=-1; /* no source index available for old output */
1157
0
        }
1158
0
    }
1159
1160
    /* the overflow buffer is completely copied to the target */
1161
0
    cnv->charErrorBufferLength=0;
1162
0
    *target=t;
1163
0
    if(offsets!=NULL) {
1164
0
        *pOffsets=offsets;
1165
0
    }
1166
0
    return FALSE;
1167
0
}
1168
1169
U_CAPI void U_EXPORT2
1170
ucnv_fromUnicode(UConverter *cnv,
1171
                 char **target, const char *targetLimit,
1172
                 const UChar **source, const UChar *sourceLimit,
1173
                 int32_t *offsets,
1174
                 UBool flush,
1175
0
                 UErrorCode *err) {
1176
0
    UConverterFromUnicodeArgs args;
1177
0
    const UChar *s;
1178
0
    char *t;
1179
1180
    /* check parameters */
1181
0
    if(err==NULL || U_FAILURE(*err)) {
1182
0
        return;
1183
0
    }
1184
1185
0
    if(cnv==NULL || target==NULL || source==NULL) {
1186
0
        *err=U_ILLEGAL_ARGUMENT_ERROR;
1187
0
        return;
1188
0
    }
1189
1190
0
    s=*source;
1191
0
    t=*target;
1192
1193
0
    if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
1194
        /*
1195
        Prevent code from going into an infinite loop in case we do hit this
1196
        limit. The limit pointer is expected to be on a UChar * boundary.
1197
        This also prevents the next argument check from failing.
1198
        */
1199
0
        sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);
1200
0
    }
1201
1202
    /*
1203
     * All these conditions should never happen.
1204
     *
1205
     * 1) Make sure that the limits are >= to the address source or target
1206
     *
1207
     * 2) Make sure that the buffer sizes do not exceed the number range for
1208
     * int32_t because some functions use the size (in units or bytes)
1209
     * rather than comparing pointers, and because offsets are int32_t values.
1210
     *
1211
     * size_t is guaranteed to be unsigned and large enough for the job.
1212
     *
1213
     * Return with an error instead of adjusting the limits because we would
1214
     * not be able to maintain the semantics that either the source must be
1215
     * consumed or the target filled (unless an error occurs).
1216
     * An adjustment would be targetLimit=t+0x7fffffff; for example.
1217
     *
1218
     * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1219
     * to a char * pointer and provide an incomplete UChar code unit.
1220
     */
1221
0
    if (sourceLimit<s || targetLimit<t ||
1222
0
        ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
1223
0
        ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
1224
0
        (((const char *)sourceLimit-(const char *)s) & 1) != 0)
1225
0
    {
1226
0
        *err=U_ILLEGAL_ARGUMENT_ERROR;
1227
0
        return;
1228
0
    }
1229
    
1230
    /* output the target overflow buffer */
1231
0
    if( cnv->charErrorBufferLength>0 &&
1232
0
        ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
1233
0
    ) {
1234
        /* U_BUFFER_OVERFLOW_ERROR */
1235
0
        return;
1236
0
    }
1237
    /* *target may have moved, therefore stop using t */
1238
1239
0
    if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
1240
        /* the overflow buffer is emptied and there is no new input: we are done */
1241
0
        return;
1242
0
    }
1243
1244
    /*
1245
     * Do not simply return with a buffer overflow error if
1246
     * !flush && t==targetLimit
1247
     * because it is possible that the source will not generate any output.
1248
     * For example, the skip callback may be called;
1249
     * it does not output anything.
1250
     */
1251
1252
    /* prepare the converter arguments */
1253
0
    args.converter=cnv;
1254
0
    args.flush=flush;
1255
0
    args.offsets=offsets;
1256
0
    args.source=s;
1257
0
    args.sourceLimit=sourceLimit;
1258
0
    args.target=*target;
1259
0
    args.targetLimit=targetLimit;
1260
0
    args.size=sizeof(args);
1261
1262
0
    _fromUnicodeWithCallback(&args, err);
1263
1264
0
    *source=args.source;
1265
0
    *target=args.target;
1266
0
}
1267
1268
/* ucnv_toUnicode() --------------------------------------------------------- */
1269
1270
static void
1271
3.57k
_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
1272
3.57k
    UConverterToUnicode toUnicode;
1273
3.57k
    UConverter *cnv;
1274
3.57k
    const char *s;
1275
3.57k
    UChar *t;
1276
3.57k
    int32_t *offsets;
1277
3.57k
    int32_t sourceIndex;
1278
3.57k
    int32_t errorInputLength;
1279
3.57k
    UBool converterSawEndOfInput, calledCallback;
1280
1281
    /* variables for m:n conversion */
1282
3.57k
    char replay[UCNV_EXT_MAX_BYTES];
1283
3.57k
    const char *realSource, *realSourceLimit;
1284
3.57k
    int32_t realSourceIndex;
1285
3.57k
    UBool realFlush;
1286
1287
3.57k
    cnv=pArgs->converter;
1288
3.57k
    s=pArgs->source;
1289
3.57k
    t=pArgs->target;
1290
3.57k
    offsets=pArgs->offsets;
1291
1292
    /* get the converter implementation function */
1293
3.57k
    sourceIndex=0;
1294
3.57k
    if(offsets==NULL) {
1295
3.57k
        toUnicode=cnv->sharedData->impl->toUnicode;
1296
3.57k
    } else {
1297
0
        toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
1298
0
        if(toUnicode==NULL) {
1299
            /* there is no WithOffsets implementation */
1300
0
            toUnicode=cnv->sharedData->impl->toUnicode;
1301
            /* we will write -1 for each offset */
1302
0
            sourceIndex=-1;
1303
0
        }
1304
0
    }
1305
1306
3.57k
    if(cnv->preToULength>=0) {
1307
        /* normal mode */
1308
3.57k
        realSource=NULL;
1309
1310
        /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
1311
3.57k
        realSourceLimit=NULL;
1312
3.57k
        realFlush=FALSE;
1313
3.57k
        realSourceIndex=0;
1314
3.57k
    } else {
1315
        /*
1316
         * Previous m:n conversion stored source units from a partial match
1317
         * and failed to consume all of them.
1318
         * We need to "replay" them from a temporary buffer and convert them first.
1319
         */
1320
0
        realSource=pArgs->source;
1321
0
        realSourceLimit=pArgs->sourceLimit;
1322
0
        realFlush=pArgs->flush;
1323
0
        realSourceIndex=sourceIndex;
1324
1325
0
        uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1326
0
        pArgs->source=replay;
1327
0
        pArgs->sourceLimit=replay-cnv->preToULength;
1328
0
        pArgs->flush=FALSE;
1329
0
        sourceIndex=-1;
1330
1331
0
        cnv->preToULength=0;
1332
0
    }
1333
1334
    /*
1335
     * loop for conversion and error handling
1336
     *
1337
     * loop {
1338
     *   convert
1339
     *   loop {
1340
     *     update offsets
1341
     *     handle end of input
1342
     *     handle errors/call callback
1343
     *   }
1344
     * }
1345
     */
1346
1.74M
    for(;;) {
1347
1.74M
        if(U_SUCCESS(*err)) {
1348
            /* convert */
1349
1.74M
            toUnicode(pArgs, err);
1350
1351
            /*
1352
             * set a flag for whether the converter
1353
             * successfully processed the end of the input
1354
             *
1355
             * need not check cnv->preToULength==0 because a replay (<0) will cause
1356
             * s<sourceLimit before converterSawEndOfInput is checked
1357
             */
1358
1.74M
            converterSawEndOfInput=
1359
1.74M
                (UBool)(U_SUCCESS(*err) &&
1360
1.74M
                        pArgs->flush && pArgs->source==pArgs->sourceLimit &&
1361
1.74M
                        cnv->toULength==0);
1362
1.74M
        } else {
1363
            /* handle error from getNextUChar() or ucnv_convertEx() */
1364
0
            converterSawEndOfInput=FALSE;
1365
0
        }
1366
1367
        /* no callback called yet for this iteration */
1368
1.74M
        calledCallback=FALSE;
1369
1370
        /* no sourceIndex adjustment for conversion, only for callback output */
1371
1.74M
        errorInputLength=0;
1372
1373
        /*
1374
         * loop for offsets and error handling
1375
         *
1376
         * iterates at most 3 times:
1377
         * 1. to clean up after the conversion function
1378
         * 2. after the callback
1379
         * 3. after the callback again if there was truncated input
1380
         */
1381
3.48M
        for(;;) {
1382
            /* update offsets if we write any */
1383
3.48M
            if(offsets!=NULL) {
1384
0
                int32_t length=(int32_t)(pArgs->target-t);
1385
0
                if(length>0) {
1386
0
                    _updateOffsets(offsets, length, sourceIndex, errorInputLength);
1387
1388
                    /*
1389
                     * if a converter handles offsets and updates the offsets
1390
                     * pointer at the end, then pArgs->offset should not change
1391
                     * here;
1392
                     * however, some converters do not handle offsets at all
1393
                     * (sourceIndex<0) or may not update the offsets pointer
1394
                     */
1395
0
                    pArgs->offsets=offsets+=length;
1396
0
                }
1397
1398
0
                if(sourceIndex>=0) {
1399
0
                    sourceIndex+=(int32_t)(pArgs->source-s);
1400
0
                }
1401
0
            }
1402
1403
3.48M
            if(cnv->preToULength<0) {
1404
                /*
1405
                 * switch the source to new replay units (cannot occur while replaying)
1406
                 * after offset handling and before end-of-input and callback handling
1407
                 */
1408
0
                if(realSource==NULL) {
1409
0
                    realSource=pArgs->source;
1410
0
                    realSourceLimit=pArgs->sourceLimit;
1411
0
                    realFlush=pArgs->flush;
1412
0
                    realSourceIndex=sourceIndex;
1413
1414
0
                    uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1415
0
                    pArgs->source=replay;
1416
0
                    pArgs->sourceLimit=replay-cnv->preToULength;
1417
0
                    pArgs->flush=FALSE;
1418
0
                    if((sourceIndex+=cnv->preToULength)<0) {
1419
0
                        sourceIndex=-1;
1420
0
                    }
1421
1422
0
                    cnv->preToULength=0;
1423
0
                } else {
1424
                    /* see implementation note before _fromUnicodeWithCallback() */
1425
0
                    U_ASSERT(realSource==NULL);
1426
0
                    *err=U_INTERNAL_PROGRAM_ERROR;
1427
0
                }
1428
0
            }
1429
1430
            /* update pointers */
1431
3.48M
            s=pArgs->source;
1432
3.48M
            t=pArgs->target;
1433
1434
3.48M
            if(U_SUCCESS(*err)) {
1435
1.74M
                if(s<pArgs->sourceLimit) {
1436
                    /*
1437
                     * continue with the conversion loop while there is still input left
1438
                     * (continue converting by breaking out of only the inner loop)
1439
                     */
1440
1.74M
                    break;
1441
1.74M
                } else if(realSource!=NULL) {
1442
                    /* switch back from replaying to the real source and continue */
1443
0
                    pArgs->source=realSource;
1444
0
                    pArgs->sourceLimit=realSourceLimit;
1445
0
                    pArgs->flush=realFlush;
1446
0
                    sourceIndex=realSourceIndex;
1447
1448
0
                    realSource=NULL;
1449
0
                    break;
1450
5.15k
                } else if(pArgs->flush && cnv->toULength>0) {
1451
                    /*
1452
                     * the entire input stream is consumed
1453
                     * and there is a partial, truncated input sequence left
1454
                     */
1455
1456
                    /* inject an error and continue with callback handling */
1457
212
                    *err=U_TRUNCATED_CHAR_FOUND;
1458
212
                    calledCallback=FALSE; /* new error condition */
1459
4.94k
                } else {
1460
                    /* input consumed */
1461
4.94k
                    if(pArgs->flush) {
1462
                        /*
1463
                         * return to the conversion loop once more if the flush
1464
                         * flag is set and the conversion function has not
1465
                         * successfully processed the end of the input yet
1466
                         *
1467
                         * (continue converting by breaking out of only the inner loop)
1468
                         */
1469
4.94k
                        if(!converterSawEndOfInput) {
1470
1.36k
                            break;
1471
1.36k
                        }
1472
1473
                        /* reset the converter without calling the callback function */
1474
3.57k
                        _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1475
3.57k
                    }
1476
1477
                    /* done successfully */
1478
3.57k
                    return;
1479
4.94k
                }
1480
1.74M
            }
1481
1482
            /* U_FAILURE(*err) */
1483
1.74M
            {
1484
1.74M
                UErrorCode e;
1485
1486
1.74M
                if( calledCallback ||
1487
1.74M
                    (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1488
1.74M
                    (e!=U_INVALID_CHAR_FOUND &&
1489
1.74M
                     e!=U_ILLEGAL_CHAR_FOUND &&
1490
1.74M
                     e!=U_TRUNCATED_CHAR_FOUND &&
1491
1.74M
                     e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
1492
1.74M
                     e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
1493
1.74M
                ) {
1494
                    /*
1495
                     * the callback did not or cannot resolve the error:
1496
                     * set output pointers and return
1497
                     *
1498
                     * the check for buffer overflow is redundant but it is
1499
                     * a high-runner case and hopefully documents the intent
1500
                     * well
1501
                     *
1502
                     * if we were replaying, then the replay buffer must be
1503
                     * copied back into the UConverter
1504
                     * and the real arguments must be restored
1505
                     */
1506
0
                    if(realSource!=NULL) {
1507
0
                        int32_t length;
1508
1509
0
                        U_ASSERT(cnv->preToULength==0);
1510
1511
0
                        length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1512
0
                        if(length>0) {
1513
0
                            uprv_memcpy(cnv->preToU, pArgs->source, length);
1514
0
                            cnv->preToULength=(int8_t)-length;
1515
0
                        }
1516
1517
0
                        pArgs->source=realSource;
1518
0
                        pArgs->sourceLimit=realSourceLimit;
1519
0
                        pArgs->flush=realFlush;
1520
0
                    }
1521
1522
0
                    return;
1523
0
                }
1524
1.74M
            }
1525
1526
            /* copy toUBytes[] to invalidCharBuffer[] */
1527
1.74M
            errorInputLength=cnv->invalidCharLength=cnv->toULength;
1528
1.74M
            if(errorInputLength>0) {
1529
1.74M
                uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
1530
1.74M
            }
1531
1532
            /* set the converter state to deal with the next character */
1533
1.74M
            cnv->toULength=0;
1534
1535
            /* call the callback function */
1536
1.74M
            if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
1537
0
                cnv->toUCallbackReason = UCNV_UNASSIGNED;
1538
0
            }
1539
1.74M
            cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
1540
1.74M
                cnv->invalidCharBuffer, errorInputLength,
1541
1.74M
                cnv->toUCallbackReason,
1542
1.74M
                err);
1543
1.74M
            cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
1544
1545
            /*
1546
             * loop back to the offset handling
1547
             *
1548
             * this flag will indicate after offset handling
1549
             * that a callback was called;
1550
             * if the callback did not resolve the error, then we return
1551
             */
1552
1.74M
            calledCallback=TRUE;
1553
1.74M
        }
1554
1.74M
    }
1555
3.57k
}
1556
1557
/*
1558
 * Output the toUnicode overflow buffer.
1559
 * Call this function if(cnv->UCharErrorBufferLength>0).
1560
 * @return TRUE if overflow
1561
 */
1562
static UBool
1563
ucnv_outputOverflowToUnicode(UConverter *cnv,
1564
                             UChar **target, const UChar *targetLimit,
1565
                             int32_t **pOffsets,
1566
0
                             UErrorCode *err) {
1567
0
    int32_t *offsets;
1568
0
    UChar *overflow, *t;
1569
0
    int32_t i, length;
1570
1571
0
    t=*target;
1572
0
    if(pOffsets!=NULL) {
1573
0
        offsets=*pOffsets;
1574
0
    } else {
1575
0
        offsets=NULL;
1576
0
    }
1577
1578
0
    overflow=cnv->UCharErrorBuffer;
1579
0
    length=cnv->UCharErrorBufferLength;
1580
0
    i=0;
1581
0
    while(i<length) {
1582
0
        if(t==targetLimit) {
1583
            /* the overflow buffer contains too much, keep the rest */
1584
0
            int32_t j=0;
1585
1586
0
            do {
1587
0
                overflow[j++]=overflow[i++];
1588
0
            } while(i<length);
1589
1590
0
            cnv->UCharErrorBufferLength=(int8_t)j;
1591
0
            *target=t;
1592
0
            if(offsets!=NULL) {
1593
0
                *pOffsets=offsets;
1594
0
            }
1595
0
            *err=U_BUFFER_OVERFLOW_ERROR;
1596
0
            return TRUE;
1597
0
        }
1598
1599
        /* copy the overflow contents to the target */
1600
0
        *t++=overflow[i++];
1601
0
        if(offsets!=NULL) {
1602
0
            *offsets++=-1; /* no source index available for old output */
1603
0
        }
1604
0
    }
1605
1606
    /* the overflow buffer is completely copied to the target */
1607
0
    cnv->UCharErrorBufferLength=0;
1608
0
    *target=t;
1609
0
    if(offsets!=NULL) {
1610
0
        *pOffsets=offsets;
1611
0
    }
1612
0
    return FALSE;
1613
0
}
1614
1615
U_CAPI void U_EXPORT2
1616
ucnv_toUnicode(UConverter *cnv,
1617
               UChar **target, const UChar *targetLimit,
1618
               const char **source, const char *sourceLimit,
1619
               int32_t *offsets,
1620
               UBool flush,
1621
3.57k
               UErrorCode *err) {
1622
3.57k
    UConverterToUnicodeArgs args;
1623
3.57k
    const char *s;
1624
3.57k
    UChar *t;
1625
1626
    /* check parameters */
1627
3.57k
    if(err==NULL || U_FAILURE(*err)) {
1628
0
        return;
1629
0
    }
1630
1631
3.57k
    if(cnv==NULL || target==NULL || source==NULL) {
1632
0
        *err=U_ILLEGAL_ARGUMENT_ERROR;
1633
0
        return;
1634
0
    }
1635
1636
3.57k
    s=*source;
1637
3.57k
    t=*target;
1638
1639
3.57k
    if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {
1640
        /*
1641
        Prevent code from going into an infinite loop in case we do hit this
1642
        limit. The limit pointer is expected to be on a UChar * boundary.
1643
        This also prevents the next argument check from failing.
1644
        */
1645
0
        targetLimit = (const UChar *)(((const char *)targetLimit) - 1);
1646
0
    }
1647
1648
    /*
1649
     * All these conditions should never happen.
1650
     *
1651
     * 1) Make sure that the limits are >= to the address source or target
1652
     *
1653
     * 2) Make sure that the buffer sizes do not exceed the number range for
1654
     * int32_t because some functions use the size (in units or bytes)
1655
     * rather than comparing pointers, and because offsets are int32_t values.
1656
     *
1657
     * size_t is guaranteed to be unsigned and large enough for the job.
1658
     *
1659
     * Return with an error instead of adjusting the limits because we would
1660
     * not be able to maintain the semantics that either the source must be
1661
     * consumed or the target filled (unless an error occurs).
1662
     * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1663
     *
1664
     * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1665
     * to a char * pointer and provide an incomplete UChar code unit.
1666
     */
1667
3.57k
    if (sourceLimit<s || targetLimit<t ||
1668
3.57k
        ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
1669
3.57k
        ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
1670
3.57k
        (((const char *)targetLimit-(const char *)t) & 1) != 0
1671
3.57k
    ) {
1672
0
        *err=U_ILLEGAL_ARGUMENT_ERROR;
1673
0
        return;
1674
0
    }
1675
    
1676
    /* output the target overflow buffer */
1677
3.57k
    if( cnv->UCharErrorBufferLength>0 &&
1678
3.57k
        ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
1679
3.57k
    ) {
1680
        /* U_BUFFER_OVERFLOW_ERROR */
1681
0
        return;
1682
0
    }
1683
    /* *target may have moved, therefore stop using t */
1684
1685
3.57k
    if(!flush && s==sourceLimit && cnv->preToULength>=0) {
1686
        /* the overflow buffer is emptied and there is no new input: we are done */
1687
0
        return;
1688
0
    }
1689
1690
    /*
1691
     * Do not simply return with a buffer overflow error if
1692
     * !flush && t==targetLimit
1693
     * because it is possible that the source will not generate any output.
1694
     * For example, the skip callback may be called;
1695
     * it does not output anything.
1696
     */
1697
1698
    /* prepare the converter arguments */
1699
3.57k
    args.converter=cnv;
1700
3.57k
    args.flush=flush;
1701
3.57k
    args.offsets=offsets;
1702
3.57k
    args.source=s;
1703
3.57k
    args.sourceLimit=sourceLimit;
1704
3.57k
    args.target=*target;
1705
3.57k
    args.targetLimit=targetLimit;
1706
3.57k
    args.size=sizeof(args);
1707
1708
3.57k
    _toUnicodeWithCallback(&args, err);
1709
1710
3.57k
    *source=args.source;
1711
3.57k
    *target=args.target;
1712
3.57k
}
1713
1714
/* ucnv_to/fromUChars() ----------------------------------------------------- */
1715
1716
U_CAPI int32_t U_EXPORT2
1717
ucnv_fromUChars(UConverter *cnv,
1718
                char *dest, int32_t destCapacity,
1719
                const UChar *src, int32_t srcLength,
1720
0
                UErrorCode *pErrorCode) {
1721
0
    const UChar *srcLimit;
1722
0
    char *originalDest, *destLimit;
1723
0
    int32_t destLength;
1724
1725
    /* check arguments */
1726
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1727
0
        return 0;
1728
0
    }
1729
1730
0
    if( cnv==NULL ||
1731
0
        destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1732
0
        srcLength<-1 || (srcLength!=0 && src==NULL)
1733
0
    ) {
1734
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1735
0
        return 0;
1736
0
    }
1737
1738
    /* initialize */
1739
0
    ucnv_resetFromUnicode(cnv);
1740
0
    originalDest=dest;
1741
0
    if(srcLength==-1) {
1742
0
        srcLength=u_strlen(src);
1743
0
    }
1744
0
    if(srcLength>0) {
1745
0
        srcLimit=src+srcLength;
1746
0
        destLimit=dest+destCapacity;
1747
1748
        /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1749
0
        if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1750
0
            destLimit=(char *)U_MAX_PTR(dest);
1751
0
        }
1752
1753
        /* perform the conversion */
1754
0
        ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1755
0
        destLength=(int32_t)(dest-originalDest);
1756
1757
        /* if an overflow occurs, then get the preflighting length */
1758
0
        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1759
0
            char buffer[1024];
1760
1761
0
            destLimit=buffer+sizeof(buffer);
1762
0
            do {
1763
0
                dest=buffer;
1764
0
                *pErrorCode=U_ZERO_ERROR;
1765
0
                ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1766
0
                destLength+=(int32_t)(dest-buffer);
1767
0
            } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1768
0
        }
1769
0
    } else {
1770
0
        destLength=0;
1771
0
    }
1772
1773
0
    return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
1774
0
}
1775
1776
U_CAPI int32_t U_EXPORT2
1777
ucnv_toUChars(UConverter *cnv,
1778
              UChar *dest, int32_t destCapacity,
1779
              const char *src, int32_t srcLength,
1780
3.57k
              UErrorCode *pErrorCode) {
1781
3.57k
    const char *srcLimit;
1782
3.57k
    UChar *originalDest, *destLimit;
1783
3.57k
    int32_t destLength;
1784
1785
    /* check arguments */
1786
3.57k
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1787
0
        return 0;
1788
0
    }
1789
1790
3.57k
    if( cnv==NULL ||
1791
3.57k
        destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1792
3.57k
        srcLength<-1 || (srcLength!=0 && src==NULL))
1793
0
    {
1794
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1795
0
        return 0;
1796
0
    }
1797
1798
    /* initialize */
1799
3.57k
    ucnv_resetToUnicode(cnv);
1800
3.57k
    originalDest=dest;
1801
3.57k
    if(srcLength==-1) {
1802
0
        srcLength=(int32_t)uprv_strlen(src);
1803
0
    }
1804
3.57k
    if(srcLength>0) {
1805
3.57k
        srcLimit=src+srcLength;
1806
3.57k
        destLimit=dest+destCapacity;
1807
1808
        /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1809
3.57k
        if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1810
0
            destLimit=(UChar *)U_MAX_PTR(dest);
1811
0
        }
1812
1813
        /* perform the conversion */
1814
3.57k
        ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1815
3.57k
        destLength=(int32_t)(dest-originalDest);
1816
1817
        /* if an overflow occurs, then get the preflighting length */
1818
3.57k
        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
1819
0
        {
1820
0
            UChar buffer[1024];
1821
1822
0
            destLimit=buffer+UPRV_LENGTHOF(buffer);
1823
0
            do {
1824
0
                dest=buffer;
1825
0
                *pErrorCode=U_ZERO_ERROR;
1826
0
                ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1827
0
                destLength+=(int32_t)(dest-buffer);
1828
0
            }
1829
0
            while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1830
0
        }
1831
3.57k
    } else {
1832
0
        destLength=0;
1833
0
    }
1834
1835
3.57k
    return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
1836
3.57k
}
1837
1838
/* ucnv_getNextUChar() ------------------------------------------------------ */
1839
1840
U_CAPI UChar32 U_EXPORT2
1841
ucnv_getNextUChar(UConverter *cnv,
1842
                  const char **source, const char *sourceLimit,
1843
0
                  UErrorCode *err) {
1844
0
    UConverterToUnicodeArgs args;
1845
0
    UChar buffer[U16_MAX_LENGTH];
1846
0
    const char *s;
1847
0
    UChar32 c;
1848
0
    int32_t i, length;
1849
1850
    /* check parameters */
1851
0
    if(err==NULL || U_FAILURE(*err)) {
1852
0
        return 0xffff;
1853
0
    }
1854
1855
0
    if(cnv==NULL || source==NULL) {
1856
0
        *err=U_ILLEGAL_ARGUMENT_ERROR;
1857
0
        return 0xffff;
1858
0
    }
1859
1860
0
    s=*source;
1861
0
    if(sourceLimit<s) {
1862
0
        *err=U_ILLEGAL_ARGUMENT_ERROR;
1863
0
        return 0xffff;
1864
0
    }
1865
1866
    /*
1867
     * Make sure that the buffer sizes do not exceed the number range for
1868
     * int32_t because some functions use the size (in units or bytes)
1869
     * rather than comparing pointers, and because offsets are int32_t values.
1870
     *
1871
     * size_t is guaranteed to be unsigned and large enough for the job.
1872
     *
1873
     * Return with an error instead of adjusting the limits because we would
1874
     * not be able to maintain the semantics that either the source must be
1875
     * consumed or the target filled (unless an error occurs).
1876
     * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1877
     */
1878
0
    if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
1879
0
        *err=U_ILLEGAL_ARGUMENT_ERROR;
1880
0
        return 0xffff;
1881
0
    }
1882
1883
0
    c=U_SENTINEL;
1884
1885
    /* flush the target overflow buffer */
1886
0
    if(cnv->UCharErrorBufferLength>0) {
1887
0
        UChar *overflow;
1888
1889
0
        overflow=cnv->UCharErrorBuffer;
1890
0
        i=0;
1891
0
        length=cnv->UCharErrorBufferLength;
1892
0
        U16_NEXT(overflow, i, length, c);
1893
1894
        /* move the remaining overflow contents up to the beginning */
1895
0
        if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
1896
0
            uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
1897
0
                         cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1898
0
        }
1899
1900
0
        if(!U16_IS_LEAD(c) || i<length) {
1901
0
            return c;
1902
0
        }
1903
        /*
1904
         * Continue if the overflow buffer contained only a lead surrogate,
1905
         * in case the converter outputs single surrogates from complete
1906
         * input sequences.
1907
         */
1908
0
    }
1909
1910
    /*
1911
     * flush==TRUE is implied for ucnv_getNextUChar()
1912
     *
1913
     * do not simply return even if s==sourceLimit because the converter may
1914
     * not have seen flush==TRUE before
1915
     */
1916
1917
    /* prepare the converter arguments */
1918
0
    args.converter=cnv;
1919
0
    args.flush=TRUE;
1920
0
    args.offsets=NULL;
1921
0
    args.source=s;
1922
0
    args.sourceLimit=sourceLimit;
1923
0
    args.target=buffer;
1924
0
    args.targetLimit=buffer+1;
1925
0
    args.size=sizeof(args);
1926
1927
0
    if(c<0) {
1928
        /*
1929
         * call the native getNextUChar() implementation if we are
1930
         * at a character boundary (toULength==0)
1931
         *
1932
         * unlike with _toUnicode(), getNextUChar() implementations must set
1933
         * U_TRUNCATED_CHAR_FOUND for truncated input,
1934
         * in addition to setting toULength/toUBytes[]
1935
         */
1936
0
        if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
1937
0
            c=cnv->sharedData->impl->getNextUChar(&args, err);
1938
0
            *source=s=args.source;
1939
0
            if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
1940
                /* reset the converter without calling the callback function */
1941
0
                _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1942
0
                return 0xffff; /* no output */
1943
0
            } else if(U_SUCCESS(*err) && c>=0) {
1944
0
                return c;
1945
            /*
1946
             * else fall through to use _toUnicode() because
1947
             *   UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
1948
             *   U_FAILURE: call _toUnicode() for callback handling (do not output c)
1949
             */
1950
0
            }
1951
0
        }
1952
1953
        /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
1954
0
        _toUnicodeWithCallback(&args, err);
1955
1956
0
        if(*err==U_BUFFER_OVERFLOW_ERROR) {
1957
0
            *err=U_ZERO_ERROR;
1958
0
        }
1959
1960
0
        i=0;
1961
0
        length=(int32_t)(args.target-buffer);
1962
0
    } else {
1963
        /* write the lead surrogate from the overflow buffer */
1964
0
        buffer[0]=(UChar)c;
1965
0
        args.target=buffer+1;
1966
0
        i=0;
1967
0
        length=1;
1968
0
    }
1969
1970
    /* buffer contents starts at i and ends before length */
1971
1972
0
    if(U_FAILURE(*err)) {
1973
0
        c=0xffff; /* no output */
1974
0
    } else if(length==0) {
1975
        /* no input or only state changes */
1976
0
        *err=U_INDEX_OUTOFBOUNDS_ERROR;
1977
        /* no need to reset explicitly because _toUnicodeWithCallback() did it */
1978
0
        c=0xffff; /* no output */
1979
0
    } else {
1980
0
        c=buffer[0];
1981
0
        i=1;
1982
0
        if(!U16_IS_LEAD(c)) {
1983
            /* consume c=buffer[0], done */
1984
0
        } else {
1985
            /* got a lead surrogate, see if a trail surrogate follows */
1986
0
            UChar c2;
1987
1988
0
            if(cnv->UCharErrorBufferLength>0) {
1989
                /* got overflow output from the conversion */
1990
0
                if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
1991
                    /* got a trail surrogate, too */
1992
0
                    c=U16_GET_SUPPLEMENTARY(c, c2);
1993
1994
                    /* move the remaining overflow contents up to the beginning */
1995
0
                    if((--cnv->UCharErrorBufferLength)>0) {
1996
0
                        uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
1997
0
                                     cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1998
0
                    }
1999
0
                } else {
2000
                    /* c is an unpaired lead surrogate, just return it */
2001
0
                }
2002
0
            } else if(args.source<sourceLimit) {
2003
                /* convert once more, to buffer[1] */
2004
0
                args.targetLimit=buffer+2;
2005
0
                _toUnicodeWithCallback(&args, err);
2006
0
                if(*err==U_BUFFER_OVERFLOW_ERROR) {
2007
0
                    *err=U_ZERO_ERROR;
2008
0
                }
2009
2010
0
                length=(int32_t)(args.target-buffer);
2011
0
                if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
2012
                    /* got a trail surrogate, too */
2013
0
                    c=U16_GET_SUPPLEMENTARY(c, c2);
2014
0
                    i=2;
2015
0
                }
2016
0
            }
2017
0
        }
2018
0
    }
2019
2020
    /*
2021
     * move leftover output from buffer[i..length[
2022
     * into the beginning of the overflow buffer
2023
     */
2024
0
    if(i<length) {
2025
        /* move further overflow back */
2026
0
        int32_t delta=length-i;
2027
0
        if((length=cnv->UCharErrorBufferLength)>0) {
2028
0
            uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
2029
0
                         length*U_SIZEOF_UCHAR);
2030
0
        }
2031
0
        cnv->UCharErrorBufferLength=(int8_t)(length+delta);
2032
2033
0
        cnv->UCharErrorBuffer[0]=buffer[i++];
2034
0
        if(delta>1) {
2035
0
            cnv->UCharErrorBuffer[1]=buffer[i];
2036
0
        }
2037
0
    }
2038
2039
0
    *source=args.source;
2040
0
    return c;
2041
0
}
2042
2043
/* ucnv_convert() and siblings ---------------------------------------------- */
2044
2045
U_CAPI void U_EXPORT2
2046
ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
2047
               char **target, const char *targetLimit,
2048
               const char **source, const char *sourceLimit,
2049
               UChar *pivotStart, UChar **pivotSource,
2050
               UChar **pivotTarget, const UChar *pivotLimit,
2051
               UBool reset, UBool flush,
2052
0
               UErrorCode *pErrorCode) {
2053
0
    UChar pivotBuffer[CHUNK_SIZE];
2054
0
    const UChar *myPivotSource;
2055
0
    UChar *myPivotTarget;
2056
0
    const char *s;
2057
0
    char *t;
2058
2059
0
    UConverterToUnicodeArgs toUArgs;
2060
0
    UConverterFromUnicodeArgs fromUArgs;
2061
0
    UConverterConvert convert;
2062
2063
    /* error checking */
2064
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2065
0
        return;
2066
0
    }
2067
2068
0
    if( targetCnv==NULL || sourceCnv==NULL ||
2069
0
        source==NULL || *source==NULL ||
2070
0
        target==NULL || *target==NULL || targetLimit==NULL
2071
0
    ) {
2072
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2073
0
        return;
2074
0
    }
2075
2076
0
    s=*source;
2077
0
    t=*target;
2078
0
    if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) {
2079
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2080
0
        return;
2081
0
    }
2082
2083
    /*
2084
     * Make sure that the buffer sizes do not exceed the number range for
2085
     * int32_t. See ucnv_toUnicode() for a more detailed comment.
2086
     */
2087
0
    if(
2088
0
        (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
2089
0
        ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
2090
0
    ) {
2091
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2092
0
        return;
2093
0
    }
2094
    
2095
0
    if(pivotStart==NULL) {
2096
0
        if(!flush) {
2097
            /* streaming conversion requires an explicit pivot buffer */
2098
0
            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2099
0
            return;
2100
0
        }
2101
2102
        /* use the stack pivot buffer */
2103
0
        myPivotSource=myPivotTarget=pivotStart=pivotBuffer;
2104
0
        pivotSource=(UChar **)&myPivotSource;
2105
0
        pivotTarget=&myPivotTarget;
2106
0
        pivotLimit=pivotBuffer+CHUNK_SIZE;
2107
0
    } else if(  pivotStart>=pivotLimit ||
2108
0
                pivotSource==NULL || *pivotSource==NULL ||
2109
0
                pivotTarget==NULL || *pivotTarget==NULL ||
2110
0
                pivotLimit==NULL
2111
0
    ) {
2112
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2113
0
        return;
2114
0
    }
2115
2116
0
    if(sourceLimit==NULL) {
2117
        /* get limit of single-byte-NUL-terminated source string */
2118
0
        sourceLimit=uprv_strchr(*source, 0);
2119
0
    }
2120
2121
0
    if(reset) {
2122
0
        ucnv_resetToUnicode(sourceCnv);
2123
0
        ucnv_resetFromUnicode(targetCnv);
2124
0
        *pivotSource=*pivotTarget=pivotStart;
2125
0
    } else if(targetCnv->charErrorBufferLength>0) {
2126
        /* output the targetCnv overflow buffer */
2127
0
        if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {
2128
            /* U_BUFFER_OVERFLOW_ERROR */
2129
0
            return;
2130
0
        }
2131
        /* *target has moved, therefore stop using t */
2132
2133
0
        if( !flush &&
2134
0
            targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
2135
0
            sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
2136
0
        ) {
2137
            /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
2138
0
            return;
2139
0
        }
2140
0
    }
2141
2142
    /* Is direct-UTF-8 conversion available? */
2143
0
    if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
2144
0
        targetCnv->sharedData->impl->fromUTF8!=NULL
2145
0
    ) {
2146
0
        convert=targetCnv->sharedData->impl->fromUTF8;
2147
0
    } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
2148
0
               sourceCnv->sharedData->impl->toUTF8!=NULL
2149
0
    ) {
2150
0
        convert=sourceCnv->sharedData->impl->toUTF8;
2151
0
    } else {
2152
0
        convert=NULL;
2153
0
    }
2154
2155
    /*
2156
     * If direct-UTF-8 conversion is available, then we use a smaller
2157
     * pivot buffer for error handling and partial matches
2158
     * so that we quickly return to direct conversion.
2159
     *
2160
     * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
2161
     *
2162
     * We could reduce the pivot buffer size further, at the cost of
2163
     * buffer overflows from callbacks.
2164
     * The pivot buffer should not be smaller than the maximum number of
2165
     * fromUnicode extension table input UChars
2166
     * (for m:n conversion, see
2167
     * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
2168
     * or 2 for surrogate pairs.
2169
     *
2170
     * Too small a buffer can cause thrashing between pivoting and direct
2171
     * conversion, with function call overhead outweighing the benefits
2172
     * of direct conversion.
2173
     */
2174
0
    if(convert!=NULL && (pivotLimit-pivotStart)>32) {
2175
0
        pivotLimit=pivotStart+32;
2176
0
    }
2177
2178
    /* prepare the converter arguments */
2179
0
    fromUArgs.converter=targetCnv;
2180
0
    fromUArgs.flush=FALSE;
2181
0
    fromUArgs.offsets=NULL;
2182
0
    fromUArgs.target=*target;
2183
0
    fromUArgs.targetLimit=targetLimit;
2184
0
    fromUArgs.size=sizeof(fromUArgs);
2185
2186
0
    toUArgs.converter=sourceCnv;
2187
0
    toUArgs.flush=flush;
2188
0
    toUArgs.offsets=NULL;
2189
0
    toUArgs.source=s;
2190
0
    toUArgs.sourceLimit=sourceLimit;
2191
0
    toUArgs.targetLimit=pivotLimit;
2192
0
    toUArgs.size=sizeof(toUArgs);
2193
2194
    /*
2195
     * TODO: Consider separating this function into two functions,
2196
     * extracting exactly the conversion loop,
2197
     * for readability and to reduce the set of visible variables.
2198
     *
2199
     * Otherwise stop using s and t from here on.
2200
     */
2201
0
    s=t=NULL;
2202
2203
    /*
2204
     * conversion loop
2205
     *
2206
     * The sequence of steps in the loop may appear backward,
2207
     * but the principle is simple:
2208
     * In the chain of
2209
     *   source - sourceCnv overflow - pivot - targetCnv overflow - target
2210
     * empty out later buffers before refilling them from earlier ones.
2211
     *
2212
     * The targetCnv overflow buffer is flushed out only once before the loop.
2213
     */
2214
0
    for(;;) {
2215
        /*
2216
         * if(pivot not empty or error or replay or flush fromUnicode) {
2217
         *   fromUnicode(pivot -> target);
2218
         * }
2219
         *
2220
         * For pivoting conversion; and for direct conversion for
2221
         * error callback handling and flushing the replay buffer.
2222
         */
2223
0
        if( *pivotSource<*pivotTarget ||
2224
0
            U_FAILURE(*pErrorCode) ||
2225
0
            targetCnv->preFromULength<0 ||
2226
0
            fromUArgs.flush
2227
0
        ) {
2228
0
            fromUArgs.source=*pivotSource;
2229
0
            fromUArgs.sourceLimit=*pivotTarget;
2230
0
            _fromUnicodeWithCallback(&fromUArgs, pErrorCode);
2231
0
            if(U_FAILURE(*pErrorCode)) {
2232
                /* target overflow, or conversion error */
2233
0
                *pivotSource=(UChar *)fromUArgs.source;
2234
0
                break;
2235
0
            }
2236
2237
            /*
2238
             * _fromUnicodeWithCallback() must have consumed the pivot contents
2239
             * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
2240
             */
2241
0
        }
2242
2243
        /* The pivot buffer is empty; reset it so we start at pivotStart. */
2244
0
        *pivotSource=*pivotTarget=pivotStart;
2245
2246
        /*
2247
         * if(sourceCnv overflow buffer not empty) {
2248
         *     move(sourceCnv overflow buffer -> pivot);
2249
         *     continue;
2250
         * }
2251
         */
2252
        /* output the sourceCnv overflow buffer */
2253
0
        if(sourceCnv->UCharErrorBufferLength>0) {
2254
0
            if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {
2255
                /* U_BUFFER_OVERFLOW_ERROR */
2256
0
                *pErrorCode=U_ZERO_ERROR;
2257
0
            }
2258
0
            continue;
2259
0
        }
2260
2261
        /*
2262
         * check for end of input and break if done
2263
         *
2264
         * Checking both flush and fromUArgs.flush ensures that the converters
2265
         * have been called with the flush flag set if the ucnv_convertEx()
2266
         * caller set it.
2267
         */
2268
0
        if( toUArgs.source==sourceLimit &&
2269
0
            sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
2270
0
            (!flush || fromUArgs.flush)
2271
0
        ) {
2272
            /* done successfully */
2273
0
            break;
2274
0
        }
2275
2276
        /*
2277
         * use direct conversion if available
2278
         * but not if continuing a partial match
2279
         * or flushing the toUnicode replay buffer
2280
         */
2281
0
        if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {
2282
0
            if(*pErrorCode==U_USING_DEFAULT_WARNING) {
2283
                /* remove a warning that may be set by this function */
2284
0
                *pErrorCode=U_ZERO_ERROR;
2285
0
            }
2286
0
            convert(&fromUArgs, &toUArgs, pErrorCode);
2287
0
            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2288
0
                break;
2289
0
            } else if(U_FAILURE(*pErrorCode)) {
2290
0
                if(sourceCnv->toULength>0) {
2291
                    /*
2292
                     * Fall through to calling _toUnicodeWithCallback()
2293
                     * for callback handling.
2294
                     *
2295
                     * The pivot buffer will be reset with
2296
                     *   *pivotSource=*pivotTarget=pivotStart;
2297
                     * which indicates a toUnicode error to the caller
2298
                     * (*pivotSource==pivotStart shows no pivot UChars consumed).
2299
                     */
2300
0
                } else {
2301
                    /*
2302
                     * Indicate a fromUnicode error to the caller
2303
                     * (*pivotSource>pivotStart shows some pivot UChars consumed).
2304
                     */
2305
0
                    *pivotSource=*pivotTarget=pivotStart+1;
2306
                    /*
2307
                     * Loop around to calling _fromUnicodeWithCallbacks()
2308
                     * for callback handling.
2309
                     */
2310
0
                    continue;
2311
0
                }
2312
0
            } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {
2313
                /*
2314
                 * No error, but the implementation requested to temporarily
2315
                 * fall back to pivoting.
2316
                 */
2317
0
                *pErrorCode=U_ZERO_ERROR;
2318
            /*
2319
             * The following else branches are almost identical to the end-of-input
2320
             * handling in _toUnicodeWithCallback().
2321
             * Avoid calling it just for the end of input.
2322
             */
2323
0
            } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */
2324
                /*
2325
                 * the entire input stream is consumed
2326
                 * and there is a partial, truncated input sequence left
2327
                 */
2328
2329
                /* inject an error and continue with callback handling */
2330
0
                *pErrorCode=U_TRUNCATED_CHAR_FOUND;
2331
0
            } else {
2332
                /* input consumed */
2333
0
                if(flush) {
2334
                    /* reset the converters without calling the callback functions */
2335
0
                    _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);
2336
0
                    _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);
2337
0
                }
2338
2339
                /* done successfully */
2340
0
                break;
2341
0
            }
2342
0
        }
2343
        
2344
        /*
2345
         * toUnicode(source -> pivot);
2346
         *
2347
         * For pivoting conversion; and for direct conversion for
2348
         * error callback handling, continuing partial matches
2349
         * and flushing the replay buffer.
2350
         *
2351
         * The pivot buffer is empty and reset.
2352
         */
2353
0
        toUArgs.target=pivotStart; /* ==*pivotTarget */
2354
        /* toUArgs.targetLimit=pivotLimit; already set before the loop */
2355
0
        _toUnicodeWithCallback(&toUArgs, pErrorCode);
2356
0
        *pivotTarget=toUArgs.target;
2357
0
        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2358
            /* pivot overflow: continue with the conversion loop */
2359
0
            *pErrorCode=U_ZERO_ERROR;
2360
0
        } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {
2361
            /* conversion error, or there was nothing left to convert */
2362
0
            break;
2363
0
        }
2364
        /*
2365
         * else:
2366
         * _toUnicodeWithCallback() wrote into the pivot buffer,
2367
         * continue with fromUnicode conversion.
2368
         *
2369
         * Set the fromUnicode flush flag if we flush and if toUnicode has
2370
         * processed the end of the input.
2371
         */
2372
0
        if( flush && toUArgs.source==sourceLimit &&
2373
0
            sourceCnv->preToULength>=0 &&
2374
0
            sourceCnv->UCharErrorBufferLength==0
2375
0
        ) {
2376
0
            fromUArgs.flush=TRUE;
2377
0
        }
2378
0
    }
2379
2380
    /*
2381
     * The conversion loop is exited when one of the following is true:
2382
     * - the entire source text has been converted successfully to the target buffer
2383
     * - a target buffer overflow occurred
2384
     * - a conversion error occurred
2385
     */
2386
2387
0
    *source=toUArgs.source;
2388
0
    *target=fromUArgs.target;
2389
2390
    /* terminate the target buffer if possible */
2391
0
    if(flush && U_SUCCESS(*pErrorCode)) {
2392
0
        if(*target!=targetLimit) {
2393
0
            **target=0;
2394
0
            if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
2395
0
                *pErrorCode=U_ZERO_ERROR;
2396
0
            }
2397
0
        } else {
2398
0
            *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
2399
0
        }
2400
0
    }
2401
0
}
2402
2403
/* internal implementation of ucnv_convert() etc. with preflighting */
2404
static int32_t
2405
ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
2406
                     char *target, int32_t targetCapacity,
2407
                     const char *source, int32_t sourceLength,
2408
0
                     UErrorCode *pErrorCode) {
2409
0
    UChar pivotBuffer[CHUNK_SIZE];
2410
0
    UChar *pivot, *pivot2;
2411
2412
0
    char *myTarget;
2413
0
    const char *sourceLimit;
2414
0
    const char *targetLimit;
2415
0
    int32_t targetLength=0;
2416
2417
    /* set up */
2418
0
    if(sourceLength<0) {
2419
0
        sourceLimit=uprv_strchr(source, 0);
2420
0
    } else {
2421
0
        sourceLimit=source+sourceLength;
2422
0
    }
2423
2424
    /* if there is no input data, we're done */
2425
0
    if(source==sourceLimit) {
2426
0
        return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2427
0
    }
2428
2429
0
    pivot=pivot2=pivotBuffer;
2430
0
    myTarget=target;
2431
0
    targetLength=0;
2432
2433
0
    if(targetCapacity>0) {
2434
        /* perform real conversion */
2435
0
        targetLimit=target+targetCapacity;
2436
0
        ucnv_convertEx(outConverter, inConverter,
2437
0
                       &myTarget, targetLimit,
2438
0
                       &source, sourceLimit,
2439
0
                       pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2440
0
                       FALSE,
2441
0
                       TRUE,
2442
0
                       pErrorCode);
2443
0
        targetLength=(int32_t)(myTarget-target);
2444
0
    }
2445
2446
    /*
2447
     * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
2448
     * to it but continue the conversion in order to store in targetCapacity
2449
     * the number of bytes that was required.
2450
     */
2451
0
    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
2452
0
    {
2453
0
        char targetBuffer[CHUNK_SIZE];
2454
2455
0
        targetLimit=targetBuffer+CHUNK_SIZE;
2456
0
        do {
2457
0
            *pErrorCode=U_ZERO_ERROR;
2458
0
            myTarget=targetBuffer;
2459
0
            ucnv_convertEx(outConverter, inConverter,
2460
0
                           &myTarget, targetLimit,
2461
0
                           &source, sourceLimit,
2462
0
                           pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2463
0
                           FALSE,
2464
0
                           TRUE,
2465
0
                           pErrorCode);
2466
0
            targetLength+=(int32_t)(myTarget-targetBuffer);
2467
0
        } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
2468
2469
        /* done with preflighting, set warnings and errors as appropriate */
2470
0
        return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
2471
0
    }
2472
2473
    /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
2474
0
    return targetLength;
2475
0
}
2476
2477
U_CAPI int32_t U_EXPORT2
2478
ucnv_convert(const char *toConverterName, const char *fromConverterName,
2479
             char *target, int32_t targetCapacity,
2480
             const char *source, int32_t sourceLength,
2481
0
             UErrorCode *pErrorCode) {
2482
0
    UConverter in, out; /* stack-allocated */
2483
0
    UConverter *inConverter, *outConverter;
2484
0
    int32_t targetLength;
2485
2486
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2487
0
        return 0;
2488
0
    }
2489
2490
0
    if( source==NULL || sourceLength<-1 ||
2491
0
        targetCapacity<0 || (targetCapacity>0 && target==NULL)
2492
0
    ) {
2493
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2494
0
        return 0;
2495
0
    }
2496
2497
    /* if there is no input data, we're done */
2498
0
    if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2499
0
        return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2500
0
    }
2501
2502
    /* create the converters */
2503
0
    inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
2504
0
    if(U_FAILURE(*pErrorCode)) {
2505
0
        return 0;
2506
0
    }
2507
2508
0
    outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
2509
0
    if(U_FAILURE(*pErrorCode)) {
2510
0
        ucnv_close(inConverter);
2511
0
        return 0;
2512
0
    }
2513
2514
0
    targetLength=ucnv_internalConvert(outConverter, inConverter,
2515
0
                                      target, targetCapacity,
2516
0
                                      source, sourceLength,
2517
0
                                      pErrorCode);
2518
2519
0
    ucnv_close(inConverter);
2520
0
    ucnv_close(outConverter);
2521
2522
0
    return targetLength;
2523
0
}
2524
2525
/* @internal */
2526
static int32_t
2527
ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
2528
                        UConverterType algorithmicType,
2529
                        UConverter *cnv,
2530
                        char *target, int32_t targetCapacity,
2531
                        const char *source, int32_t sourceLength,
2532
0
                        UErrorCode *pErrorCode) {
2533
0
    UConverter algoConverterStatic; /* stack-allocated */
2534
0
    UConverter *algoConverter, *to, *from;
2535
0
    int32_t targetLength;
2536
2537
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2538
0
        return 0;
2539
0
    }
2540
2541
0
    if( cnv==NULL || source==NULL || sourceLength<-1 ||
2542
0
        targetCapacity<0 || (targetCapacity>0 && target==NULL)
2543
0
    ) {
2544
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2545
0
        return 0;
2546
0
    }
2547
2548
    /* if there is no input data, we're done */
2549
0
    if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2550
0
        return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2551
0
    }
2552
2553
    /* create the algorithmic converter */
2554
0
    algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
2555
0
                                                  "", 0, pErrorCode);
2556
0
    if(U_FAILURE(*pErrorCode)) {
2557
0
        return 0;
2558
0
    }
2559
2560
    /* reset the other converter */
2561
0
    if(convertToAlgorithmic) {
2562
        /* cnv->Unicode->algo */
2563
0
        ucnv_resetToUnicode(cnv);
2564
0
        to=algoConverter;
2565
0
        from=cnv;
2566
0
    } else {
2567
        /* algo->Unicode->cnv */
2568
0
        ucnv_resetFromUnicode(cnv);
2569
0
        from=algoConverter;
2570
0
        to=cnv;
2571
0
    }
2572
2573
0
    targetLength=ucnv_internalConvert(to, from,
2574
0
                                      target, targetCapacity,
2575
0
                                      source, sourceLength,
2576
0
                                      pErrorCode);
2577
2578
0
    ucnv_close(algoConverter);
2579
2580
0
    return targetLength;
2581
0
}
2582
2583
U_CAPI int32_t U_EXPORT2
2584
ucnv_toAlgorithmic(UConverterType algorithmicType,
2585
                   UConverter *cnv,
2586
                   char *target, int32_t targetCapacity,
2587
                   const char *source, int32_t sourceLength,
2588
0
                   UErrorCode *pErrorCode) {
2589
0
    return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,
2590
0
                                   target, targetCapacity,
2591
0
                                   source, sourceLength,
2592
0
                                   pErrorCode);
2593
0
}
2594
2595
U_CAPI int32_t U_EXPORT2
2596
ucnv_fromAlgorithmic(UConverter *cnv,
2597
                     UConverterType algorithmicType,
2598
                     char *target, int32_t targetCapacity,
2599
                     const char *source, int32_t sourceLength,
2600
0
                     UErrorCode *pErrorCode) {
2601
0
    return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,
2602
0
                                   target, targetCapacity,
2603
0
                                   source, sourceLength,
2604
0
                                   pErrorCode);
2605
0
}
2606
2607
U_CAPI UConverterType  U_EXPORT2
2608
ucnv_getType(const UConverter* converter)
2609
0
{
2610
0
    int8_t type = converter->sharedData->staticData->conversionType;
2611
0
#if !UCONFIG_NO_LEGACY_CONVERSION
2612
0
    if(type == UCNV_MBCS) {
2613
0
        return ucnv_MBCSGetType(converter);
2614
0
    }
2615
0
#endif
2616
0
    return (UConverterType)type;
2617
0
}
2618
2619
U_CAPI void  U_EXPORT2
2620
ucnv_getStarters(const UConverter* converter, 
2621
                 UBool starters[256],
2622
                 UErrorCode* err)
2623
0
{
2624
0
    if (err == NULL || U_FAILURE(*err)) {
2625
0
        return;
2626
0
    }
2627
2628
0
    if(converter->sharedData->impl->getStarters != NULL) {
2629
0
        converter->sharedData->impl->getStarters(converter, starters, err);
2630
0
    } else {
2631
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
2632
0
    }
2633
0
}
2634
2635
static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
2636
0
{
2637
0
    UErrorCode errorCode;
2638
0
    const char *name;
2639
0
    int32_t i;
2640
2641
0
    if(cnv==NULL) {
2642
0
        return NULL;
2643
0
    }
2644
2645
0
    errorCode=U_ZERO_ERROR;
2646
0
    name=ucnv_getName(cnv, &errorCode);
2647
0
    if(U_FAILURE(errorCode)) {
2648
0
        return NULL;
2649
0
    }
2650
2651
0
    for(i=0; i<UPRV_LENGTHOF(ambiguousConverters); ++i)
2652
0
    {
2653
0
        if(0==uprv_strcmp(name, ambiguousConverters[i].name))
2654
0
        {
2655
0
            return ambiguousConverters+i;
2656
0
        }
2657
0
    }
2658
2659
0
    return NULL;
2660
0
}
2661
2662
U_CAPI void  U_EXPORT2
2663
ucnv_fixFileSeparator(const UConverter *cnv, 
2664
                      UChar* source, 
2665
0
                      int32_t sourceLength) {
2666
0
    const UAmbiguousConverter *a;
2667
0
    int32_t i;
2668
0
    UChar variant5c;
2669
2670
0
    if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)
2671
0
    {
2672
0
        return;
2673
0
    }
2674
2675
0
    variant5c=a->variant5c;
2676
0
    for(i=0; i<sourceLength; ++i) {
2677
0
        if(source[i]==variant5c) {
2678
0
            source[i]=0x5c;
2679
0
        }
2680
0
    }
2681
0
}
2682
2683
U_CAPI UBool  U_EXPORT2
2684
0
ucnv_isAmbiguous(const UConverter *cnv) {
2685
0
    return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);
2686
0
}
2687
2688
U_CAPI void  U_EXPORT2
2689
ucnv_setFallback(UConverter *cnv, UBool usesFallback)
2690
0
{
2691
0
    cnv->useFallback = usesFallback;
2692
0
}
2693
2694
U_CAPI UBool  U_EXPORT2
2695
ucnv_usesFallback(const UConverter *cnv)
2696
0
{
2697
0
    return cnv->useFallback;
2698
0
}
2699
2700
U_CAPI void  U_EXPORT2
2701
ucnv_getInvalidChars (const UConverter * converter,
2702
                      char *errBytes,
2703
                      int8_t * len,
2704
                      UErrorCode * err)
2705
0
{
2706
0
    if (err == NULL || U_FAILURE(*err))
2707
0
    {
2708
0
        return;
2709
0
    }
2710
0
    if (len == NULL || errBytes == NULL || converter == NULL)
2711
0
    {
2712
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
2713
0
        return;
2714
0
    }
2715
0
    if (*len < converter->invalidCharLength)
2716
0
    {
2717
0
        *err = U_INDEX_OUTOFBOUNDS_ERROR;
2718
0
        return;
2719
0
    }
2720
0
    if ((*len = converter->invalidCharLength) > 0)
2721
0
    {
2722
0
        uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
2723
0
    }
2724
0
}
2725
2726
U_CAPI void  U_EXPORT2
2727
ucnv_getInvalidUChars (const UConverter * converter,
2728
                       UChar *errChars,
2729
                       int8_t * len,
2730
                       UErrorCode * err)
2731
0
{
2732
0
    if (err == NULL || U_FAILURE(*err))
2733
0
    {
2734
0
        return;
2735
0
    }
2736
0
    if (len == NULL || errChars == NULL || converter == NULL)
2737
0
    {
2738
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
2739
0
        return;
2740
0
    }
2741
0
    if (*len < converter->invalidUCharLength)
2742
0
    {
2743
0
        *err = U_INDEX_OUTOFBOUNDS_ERROR;
2744
0
        return;
2745
0
    }
2746
0
    if ((*len = converter->invalidUCharLength) > 0)
2747
0
    {
2748
0
        u_memcpy (errChars, converter->invalidUCharBuffer, *len);
2749
0
    }
2750
0
}
2751
2752
0
#define SIG_MAX_LEN 5
2753
2754
U_CAPI const char* U_EXPORT2
2755
ucnv_detectUnicodeSignature( const char* source,
2756
                             int32_t sourceLength,
2757
                             int32_t* signatureLength,
2758
0
                             UErrorCode* pErrorCode) {
2759
0
    int32_t dummy;
2760
2761
    /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
2762
     * bytes we don't misdetect something 
2763
     */
2764
0
    char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
2765
0
    int i = 0;
2766
2767
0
    if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){
2768
0
        return NULL;
2769
0
    }
2770
    
2771
0
    if(source == NULL || sourceLength < -1){
2772
0
        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
2773
0
        return NULL;
2774
0
    }
2775
2776
0
    if(signatureLength == NULL) {
2777
0
        signatureLength = &dummy;
2778
0
    }
2779
2780
0
    if(sourceLength==-1){
2781
0
        sourceLength=(int32_t)uprv_strlen(source);
2782
0
    }
2783
2784
    
2785
0
    while(i<sourceLength&& i<SIG_MAX_LEN){
2786
0
        start[i]=source[i];
2787
0
        i++;
2788
0
    }
2789
2790
0
    if(start[0] == '\xFE' && start[1] == '\xFF') {
2791
0
        *signatureLength=2;
2792
0
        return  "UTF-16BE";
2793
0
    } else if(start[0] == '\xFF' && start[1] == '\xFE') {
2794
0
        if(start[2] == '\x00' && start[3] =='\x00') {
2795
0
            *signatureLength=4;
2796
0
            return "UTF-32LE";
2797
0
        } else {
2798
0
            *signatureLength=2;
2799
0
            return  "UTF-16LE";
2800
0
        }
2801
0
    } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
2802
0
        *signatureLength=3;
2803
0
        return  "UTF-8";
2804
0
    } else if(start[0] == '\x00' && start[1] == '\x00' && 
2805
0
              start[2] == '\xFE' && start[3]=='\xFF') {
2806
0
        *signatureLength=4;
2807
0
        return  "UTF-32BE";
2808
0
    } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
2809
0
        *signatureLength=3;
2810
0
        return "SCSU";
2811
0
    } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
2812
0
        *signatureLength=3;
2813
0
        return "BOCU-1";
2814
0
    } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
2815
        /*
2816
         * UTF-7: Initial U+FEFF is encoded as +/v8  or  +/v9  or  +/v+  or  +/v/
2817
         * depending on the second UTF-16 code unit.
2818
         * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
2819
         * if it occurs.
2820
         *
2821
         * So far we have +/v
2822
         */
2823
0
        if(start[3] == '\x38' && start[4] == '\x2D') {
2824
            /* 5 bytes +/v8- */
2825
0
            *signatureLength=5;
2826
0
            return "UTF-7";
2827
0
        } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
2828
            /* 4 bytes +/v8  or  +/v9  or  +/v+  or  +/v/ */
2829
0
            *signatureLength=4;
2830
0
            return "UTF-7";
2831
0
        }
2832
0
    }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
2833
0
        *signatureLength=4;
2834
0
        return "UTF-EBCDIC";
2835
0
    }
2836
2837
2838
    /* no known Unicode signature byte sequence recognized */
2839
0
    *signatureLength=0;
2840
0
    return NULL;
2841
0
}
2842
2843
U_CAPI int32_t U_EXPORT2
2844
ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)
2845
0
{
2846
0
    if(status == NULL || U_FAILURE(*status)){
2847
0
        return -1;
2848
0
    }
2849
0
    if(cnv == NULL){
2850
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
2851
0
        return -1;
2852
0
    }
2853
2854
0
    if(cnv->preFromUFirstCP >= 0){
2855
0
        return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
2856
0
    }else if(cnv->preFromULength < 0){
2857
0
        return -cnv->preFromULength ;
2858
0
    }else if(cnv->fromUChar32 > 0){
2859
0
        return 1;
2860
0
    }
2861
0
    return 0; 
2862
2863
0
}
2864
2865
U_CAPI int32_t U_EXPORT2
2866
0
ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
2867
2868
0
    if(status == NULL || U_FAILURE(*status)){
2869
0
        return -1;
2870
0
    }
2871
0
    if(cnv == NULL){
2872
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
2873
0
        return -1;
2874
0
    }
2875
2876
0
    if(cnv->preToULength > 0){
2877
0
        return cnv->preToULength ;
2878
0
    }else if(cnv->preToULength < 0){
2879
0
        return -cnv->preToULength;
2880
0
    }else if(cnv->toULength > 0){
2881
0
        return cnv->toULength;
2882
0
    }
2883
0
    return 0;
2884
0
}
2885
2886
U_CAPI UBool U_EXPORT2
2887
0
ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){
2888
0
    if (U_FAILURE(*status)) {
2889
0
        return FALSE;
2890
0
    }
2891
2892
0
    if (cnv == NULL) {
2893
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
2894
0
        return FALSE;
2895
0
    }
2896
2897
0
    switch (ucnv_getType(cnv)) {
2898
0
        case UCNV_SBCS:
2899
0
        case UCNV_DBCS:
2900
0
        case UCNV_UTF32_BigEndian:
2901
0
        case UCNV_UTF32_LittleEndian:
2902
0
        case UCNV_UTF32:
2903
0
        case UCNV_US_ASCII:
2904
0
            return TRUE;
2905
0
        default:
2906
0
            return FALSE;
2907
0
    }
2908
0
}
2909
#endif
2910
2911
/*
2912
 * Hey, Emacs, please set the following:
2913
 *
2914
 * Local Variables:
2915
 * indent-tabs-mode: nil
2916
 * End:
2917
 *
2918
 */