Coverage Report

Created: 2025-07-11 06:23

/src/icu/source/common/ucnv_err.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
 *****************************************************************************
5
 *
6
 *   Copyright (C) 1998-2016, International Business Machines
7
 *   Corporation and others.  All Rights Reserved.
8
 *
9
 *****************************************************************************
10
 *
11
 *  ucnv_err.c
12
 *  Implements error behaviour functions called by T_UConverter_{from,to}Unicode
13
 *
14
 *
15
*   Change history:
16
*
17
*   06/29/2000  helena      Major rewrite of the callback APIs.
18
*/
19
20
#include "unicode/utypes.h"
21
22
#if !UCONFIG_NO_CONVERSION
23
24
#include "unicode/ucnv_err.h"
25
#include "unicode/ucnv_cb.h"
26
#include "ucnv_cnv.h"
27
#include "cmemory.h"
28
#include "unicode/ucnv.h"
29
#include "ustrfmt.h"
30
31
0
#define VALUE_STRING_LENGTH 48
32
/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
33
0
#define UNICODE_PERCENT_SIGN_CODEPOINT  0x0025
34
0
#define UNICODE_U_CODEPOINT             0x0055
35
0
#define UNICODE_X_CODEPOINT             0x0058
36
0
#define UNICODE_RS_CODEPOINT            0x005C
37
0
#define UNICODE_U_LOW_CODEPOINT         0x0075
38
0
#define UNICODE_X_LOW_CODEPOINT         0x0078
39
0
#define UNICODE_AMP_CODEPOINT           0x0026
40
0
#define UNICODE_HASH_CODEPOINT          0x0023
41
0
#define UNICODE_SEMICOLON_CODEPOINT     0x003B
42
0
#define UNICODE_PLUS_CODEPOINT          0x002B
43
0
#define UNICODE_LEFT_CURLY_CODEPOINT    0x007B
44
0
#define UNICODE_RIGHT_CURLY_CODEPOINT   0x007D
45
0
#define UNICODE_SPACE_CODEPOINT         0x0020
46
#define UCNV_PRV_ESCAPE_ICU         0
47
0
#define UCNV_PRV_ESCAPE_C           'C'
48
0
#define UCNV_PRV_ESCAPE_XML_DEC     'D'
49
0
#define UCNV_PRV_ESCAPE_XML_HEX     'X'
50
0
#define UCNV_PRV_ESCAPE_JAVA        'J'
51
0
#define UCNV_PRV_ESCAPE_UNICODE     'U'
52
0
#define UCNV_PRV_ESCAPE_CSS2        'S'
53
0
#define UCNV_PRV_STOP_ON_ILLEGAL    'i'
54
55
/*
56
 * IS_DEFAULT_IGNORABLE_CODE_POINT
57
 * This is to check if a code point has the default ignorable unicode property.
58
 * As such, this list needs to be updated if the ignorable code point list ever
59
 * changes.
60
 * To avoid dependency on other code, this list is hard coded here.
61
 * When an ignorable code point is found and is unmappable, the default callbacks
62
 * will ignore them.
63
 * For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g=
64
 *
65
 * This list should be sync with the one in CharsetCallback.java
66
 */
67
0
#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\
68
0
    (c == 0x00AD) || \
69
0
    (c == 0x034F) || \
70
0
    (c == 0x061C) || \
71
0
    (c == 0x115F) || \
72
0
    (c == 0x1160) || \
73
0
    (0x17B4 <= c && c <= 0x17B5) || \
74
0
    (0x180B <= c && c <= 0x180E) || \
75
0
    (0x200B <= c && c <= 0x200F) || \
76
0
    (0x202A <= c && c <= 0x202E) || \
77
0
    (c == 0x2060) || \
78
0
    (0x2066 <= c && c <= 0x2069) || \
79
0
    (0x2061 <= c && c <= 0x2064) || \
80
0
    (0x206A <= c && c <= 0x206F) || \
81
0
    (c == 0x3164) || \
82
0
    (0x0FE00 <= c && c <= 0x0FE0F) || \
83
0
    (c == 0x0FEFF) || \
84
0
    (c == 0x0FFA0) || \
85
0
    (0x01BCA0  <= c && c <= 0x01BCA3) || \
86
0
    (0x01D173 <= c && c <= 0x01D17A) || \
87
0
    (c == 0x0E0001) || \
88
0
    (0x0E0020 <= c && c <= 0x0E007F) || \
89
0
    (0x0E0100 <= c && c <= 0x0E01EF) || \
90
0
    (c == 0x2065) || \
91
0
    (0x0FFF0 <= c && c <= 0x0FFF8) || \
92
0
    (c == 0x0E0000) || \
93
0
    (0x0E0002 <= c && c <= 0x0E001F) || \
94
0
    (0x0E0080 <= c && c <= 0x0E00FF) || \
95
0
    (0x0E01F0 <= c && c <= 0x0E0FFF) \
96
0
    )
97
98
99
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
100
U_CAPI void    U_EXPORT2
101
UCNV_FROM_U_CALLBACK_STOP (
102
                  const void *context,
103
                  UConverterFromUnicodeArgs *fromUArgs,
104
                  const UChar* codeUnits,
105
                  int32_t length,
106
                  UChar32 codePoint,
107
                  UConverterCallbackReason reason,
108
                  UErrorCode * err)
109
0
{
110
0
    (void)context;
111
0
    (void)fromUArgs;
112
0
    (void)codeUnits;
113
0
    (void)length;
114
0
    if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
115
0
    {
116
        /*
117
         * Skip if the codepoint has unicode property of default ignorable.
118
         */
119
0
        *err = U_ZERO_ERROR;
120
0
    }
121
    /* the caller must have set the error code accordingly */
122
0
    return;
123
0
}
124
125
126
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
127
U_CAPI void    U_EXPORT2
128
UCNV_TO_U_CALLBACK_STOP (
129
                   const void *context,
130
                   UConverterToUnicodeArgs *toUArgs,
131
                   const char* codePoints,
132
                   int32_t length,
133
                   UConverterCallbackReason reason,
134
                   UErrorCode * err)
135
0
{
136
    /* the caller must have set the error code accordingly */
137
0
    (void)context; (void)toUArgs; (void)codePoints; (void)length; (void)reason; (void)err;
138
0
    return;
139
0
}
140
141
U_CAPI void    U_EXPORT2
142
UCNV_FROM_U_CALLBACK_SKIP (                  
143
                  const void *context,
144
                  UConverterFromUnicodeArgs *fromUArgs,
145
                  const UChar* codeUnits,
146
                  int32_t length,
147
                  UChar32 codePoint,
148
                  UConverterCallbackReason reason,
149
                  UErrorCode * err)
150
0
{
151
0
    (void)fromUArgs;
152
0
    (void)codeUnits;
153
0
    (void)length;
154
0
    if (reason <= UCNV_IRREGULAR)
155
0
    {
156
0
        if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
157
0
        {
158
            /*
159
             * Skip if the codepoint has unicode property of default ignorable.
160
             */
161
0
            *err = U_ZERO_ERROR;
162
0
        }
163
0
        else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
164
0
        {
165
0
            *err = U_ZERO_ERROR;
166
0
        }
167
        /* else the caller must have set the error code accordingly. */
168
0
    }
169
    /* else ignore the reset, close and clone calls. */
170
0
}
171
172
U_CAPI void    U_EXPORT2
173
UCNV_FROM_U_CALLBACK_SUBSTITUTE (
174
                  const void *context,
175
                  UConverterFromUnicodeArgs *fromArgs,
176
                  const UChar* codeUnits,
177
                  int32_t length,
178
                  UChar32 codePoint,
179
                  UConverterCallbackReason reason,
180
                  UErrorCode * err)
181
0
{
182
0
    (void)codeUnits;
183
0
    (void)length;
184
0
    if (reason <= UCNV_IRREGULAR)
185
0
    {
186
0
        if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
187
0
        {
188
            /*
189
             * Skip if the codepoint has unicode property of default ignorable.
190
             */
191
0
            *err = U_ZERO_ERROR;
192
0
        }
193
0
        else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
194
0
        {
195
0
            *err = U_ZERO_ERROR;
196
0
            ucnv_cbFromUWriteSub(fromArgs, 0, err);
197
0
        }
198
        /* else the caller must have set the error code accordingly. */
199
0
    }
200
    /* else ignore the reset, close and clone calls. */
201
0
}
202
203
/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
204
 *uses a clean copy (resetted) of the converter, to convert that unicode
205
 *escape sequence to the target codepage (if conversion failure happens then
206
 *we revert to substituting with subchar)
207
 */
208
U_CAPI void    U_EXPORT2
209
UCNV_FROM_U_CALLBACK_ESCAPE (
210
                         const void *context,
211
                         UConverterFromUnicodeArgs *fromArgs,
212
                         const UChar *codeUnits,
213
                         int32_t length,
214
                         UChar32 codePoint,
215
                         UConverterCallbackReason reason,
216
                         UErrorCode * err)
217
0
{
218
219
0
  UChar valueString[VALUE_STRING_LENGTH];
220
0
  int32_t valueStringLength = 0;
221
0
  int32_t i = 0;
222
223
0
  const UChar *myValueSource = NULL;
224
0
  UErrorCode err2 = U_ZERO_ERROR;
225
0
  UConverterFromUCallback original = NULL;
226
0
  const void *originalContext;
227
228
0
  UConverterFromUCallback ignoredCallback = NULL;
229
0
  const void *ignoredContext;
230
  
231
0
  if (reason > UCNV_IRREGULAR)
232
0
  {
233
0
      return;
234
0
  }
235
0
  else if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
236
0
  {
237
      /*
238
       * Skip if the codepoint has unicode property of default ignorable.
239
       */
240
0
      *err = U_ZERO_ERROR;
241
0
      return;
242
0
  }
243
244
0
  ucnv_setFromUCallBack (fromArgs->converter,
245
0
                     (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
246
0
                     NULL,
247
0
                     &original,
248
0
                     &originalContext,
249
0
                     &err2);
250
  
251
0
  if (U_FAILURE (err2))
252
0
  {
253
0
    *err = err2;
254
0
    return;
255
0
  } 
256
0
  if(context==NULL)
257
0
  { 
258
0
      while (i < length)
259
0
      {
260
0
        valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
261
0
        valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
262
0
        valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
263
0
      }
264
0
  }
265
0
  else
266
0
  {
267
0
      switch(*((char*)context))
268
0
      {
269
0
      case UCNV_PRV_ESCAPE_JAVA:
270
0
          while (i < length)
271
0
          {
272
0
              valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
273
0
              valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
274
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
275
0
          }
276
0
          break;
277
278
0
      case UCNV_PRV_ESCAPE_C:
279
0
          valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
280
281
0
          if(length==2){
282
0
              valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
283
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
284
285
0
          }
286
0
          else{
287
0
              valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
288
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
289
0
          }
290
0
          break;
291
292
0
      case UCNV_PRV_ESCAPE_XML_DEC:
293
294
0
          valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
295
0
          valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
296
0
          if(length==2){
297
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
298
0
          }
299
0
          else{
300
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
301
0
          }
302
0
          valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
303
0
          break;
304
305
0
      case UCNV_PRV_ESCAPE_XML_HEX:
306
307
0
          valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
308
0
          valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
309
0
          valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
310
0
          if(length==2){
311
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
312
0
          }
313
0
          else{
314
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
315
0
          }
316
0
          valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
317
0
          break;
318
319
0
      case UCNV_PRV_ESCAPE_UNICODE:
320
0
          valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT;    /* adding { */
321
0
          valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;    /* adding U */
322
0
          valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */
323
0
          if (length == 2) {
324
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
325
0
          } else {
326
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
327
0
          }
328
0
          valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT;    /* adding } */
329
0
          break;
330
331
0
      case UCNV_PRV_ESCAPE_CSS2:
332
0
          valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
333
0
          valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
334
          /* Always add space character, becase the next character might be whitespace,
335
             which would erroneously be considered the termination of the escape sequence. */
336
0
          valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
337
0
          break;
338
339
0
      default:
340
0
          while (i < length)
341
0
          {
342
0
              valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
343
0
              valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;             /* adding U */
344
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
345
0
          }
346
0
      }
347
0
  }  
348
0
  myValueSource = valueString;
349
350
  /* reset the error */
351
0
  *err = U_ZERO_ERROR;
352
353
0
  ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
354
355
0
  ucnv_setFromUCallBack (fromArgs->converter,
356
0
                         original,
357
0
                         originalContext,
358
0
                         &ignoredCallback,
359
0
                         &ignoredContext,
360
0
                         &err2);
361
0
  if (U_FAILURE (err2))
362
0
  {
363
0
      *err = err2;
364
0
      return;
365
0
  }
366
367
0
  return;
368
0
}
369
370
371
372
U_CAPI void  U_EXPORT2
373
UCNV_TO_U_CALLBACK_SKIP (
374
                 const void *context,
375
                 UConverterToUnicodeArgs *toArgs,
376
                 const char* codeUnits,
377
                 int32_t length,
378
                 UConverterCallbackReason reason,
379
                 UErrorCode * err)
380
0
{
381
0
    (void)toArgs;
382
0
    (void)codeUnits;
383
0
    (void)length;
384
0
    if (reason <= UCNV_IRREGULAR)
385
0
    {
386
0
        if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
387
0
        {
388
0
            *err = U_ZERO_ERROR;
389
0
        }
390
        /* else the caller must have set the error code accordingly. */
391
0
    }
392
    /* else ignore the reset, close and clone calls. */
393
0
}
394
395
U_CAPI void    U_EXPORT2
396
UCNV_TO_U_CALLBACK_SUBSTITUTE (
397
                 const void *context,
398
                 UConverterToUnicodeArgs *toArgs,
399
                 const char* codeUnits,
400
                 int32_t length,
401
                 UConverterCallbackReason reason,
402
                 UErrorCode * err)
403
1.74M
{
404
1.74M
    (void)codeUnits;
405
1.74M
    (void)length;
406
1.74M
    if (reason <= UCNV_IRREGULAR)
407
1.74M
    {
408
1.74M
        if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
409
1.74M
        {
410
1.74M
            *err = U_ZERO_ERROR;
411
1.74M
            ucnv_cbToUWriteSub(toArgs,0,err);
412
1.74M
        }
413
        /* else the caller must have set the error code accordingly. */
414
1.74M
    }
415
    /* else ignore the reset, close and clone calls. */
416
1.74M
}
417
418
/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
419
 *and uses that as the substitution sequence
420
 */
421
U_CAPI void   U_EXPORT2
422
UCNV_TO_U_CALLBACK_ESCAPE (
423
                 const void *context,
424
                 UConverterToUnicodeArgs *toArgs,
425
                 const char* codeUnits,
426
                 int32_t length,
427
                 UConverterCallbackReason reason,
428
                 UErrorCode * err)
429
0
{
430
0
    UChar uniValueString[VALUE_STRING_LENGTH];
431
0
    int32_t valueStringLength = 0;
432
0
    int32_t i = 0;
433
434
0
    if (reason > UCNV_IRREGULAR)
435
0
    {
436
0
        return;
437
0
    }
438
439
0
    if(context==NULL)
440
0
    {    
441
0
        while (i < length)
442
0
        {
443
0
            uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
444
0
            uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT;    /* adding X */
445
0
            valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
446
0
        }
447
0
    }
448
0
    else
449
0
    {
450
0
        switch(*((char*)context))
451
0
        {
452
0
        case UCNV_PRV_ESCAPE_XML_DEC:
453
0
            while (i < length)
454
0
            {
455
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
456
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
457
0
                valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
458
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
459
0
            }
460
0
            break;
461
462
0
        case UCNV_PRV_ESCAPE_XML_HEX:
463
0
            while (i < length)
464
0
            {
465
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
466
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
467
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
468
0
                valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
469
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
470
0
            }
471
0
            break;
472
0
        case UCNV_PRV_ESCAPE_C:
473
0
            while (i < length)
474
0
            {
475
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
476
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
477
0
                valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
478
0
            }
479
0
            break;
480
0
        default:
481
0
            while (i < length)
482
0
            {
483
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
484
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT;    /* adding X */
485
0
                uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
486
0
                valueStringLength += 2;
487
0
            }
488
0
        }
489
0
    }
490
    /* reset the error */
491
0
    *err = U_ZERO_ERROR;
492
493
0
    ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
494
0
}
495
496
#endif