Coverage Report

Created: 2025-12-07 06:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/common/ucnv_err.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
 *****************************************************************************
5
 *
6
 *   Copyright (C) 1998-2016, International Business Machines
7
 *   Corporation and others.  All Rights Reserved.
8
 *
9
 *****************************************************************************
10
 *
11
 *  ucnv_err.c
12
 *  Implements error behaviour functions called by T_UConverter_{from,to}Unicode
13
 *
14
 *
15
*   Change history:
16
*
17
*   06/29/2000  helena      Major rewrite of the callback APIs.
18
*/
19
20
#include "unicode/utypes.h"
21
22
#if !UCONFIG_NO_CONVERSION
23
24
#include "unicode/ucnv_err.h"
25
#include "unicode/ucnv_cb.h"
26
#include "ucnv_cnv.h"
27
#include "cmemory.h"
28
#include "unicode/ucnv.h"
29
#include "ustrfmt.h"
30
31
0
#define VALUE_STRING_LENGTH 48
32
/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
33
0
#define UNICODE_PERCENT_SIGN_CODEPOINT  0x0025
34
0
#define UNICODE_U_CODEPOINT             0x0055
35
0
#define UNICODE_X_CODEPOINT             0x0058
36
0
#define UNICODE_RS_CODEPOINT            0x005C
37
0
#define UNICODE_U_LOW_CODEPOINT         0x0075
38
0
#define UNICODE_X_LOW_CODEPOINT         0x0078
39
0
#define UNICODE_AMP_CODEPOINT           0x0026
40
0
#define UNICODE_HASH_CODEPOINT          0x0023
41
0
#define UNICODE_SEMICOLON_CODEPOINT     0x003B
42
0
#define UNICODE_PLUS_CODEPOINT          0x002B
43
0
#define UNICODE_LEFT_CURLY_CODEPOINT    0x007B
44
0
#define UNICODE_RIGHT_CURLY_CODEPOINT   0x007D
45
0
#define UNICODE_SPACE_CODEPOINT         0x0020
46
#define UCNV_PRV_ESCAPE_ICU         0
47
0
#define UCNV_PRV_ESCAPE_C           'C'
48
0
#define UCNV_PRV_ESCAPE_XML_DEC     'D'
49
0
#define UCNV_PRV_ESCAPE_XML_HEX     'X'
50
0
#define UCNV_PRV_ESCAPE_JAVA        'J'
51
0
#define UCNV_PRV_ESCAPE_UNICODE     'U'
52
0
#define UCNV_PRV_ESCAPE_CSS2        'S'
53
0
#define UCNV_PRV_STOP_ON_ILLEGAL    'i'
54
55
/*
56
 * IS_DEFAULT_IGNORABLE_CODE_POINT
57
 * This is to check if a code point has the default ignorable unicode property.
58
 * As such, this list needs to be updated if the ignorable code point list ever
59
 * changes.
60
 * To avoid dependency on other code, this list is hard coded here.
61
 * When an ignorable code point is found and is unmappable, the default callbacks
62
 * will ignore them.
63
 * For a list of the default ignorable code points, use this link:
64
 * https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i=
65
 *
66
 * This list should be sync with the one in CharsetCallback.java
67
 */
68
38.6M
#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) ( \
69
38.6M
    (c == 0x00AD) || \
70
38.6M
    (c == 0x034F) || \
71
38.6M
    (c == 0x061C) || \
72
38.6M
    (c == 0x115F) || \
73
38.6M
    (c == 0x1160) || \
74
38.6M
    (0x17B4 <= c && c <= 0x17B5) || \
75
38.6M
    (0x180B <= c && c <= 0x180F) || \
76
38.6M
    (0x200B <= c && c <= 0x200F) || \
77
38.6M
    (0x202A <= c && c <= 0x202E) || \
78
38.6M
    (0x2060 <= c && c <= 0x206F) || \
79
38.6M
    (c == 0x3164) || \
80
38.6M
    (0xFE00 <= c && c <= 0xFE0F) || \
81
38.6M
    (c == 0xFEFF) || \
82
38.6M
    (c == 0xFFA0) || \
83
38.6M
    (0xFFF0 <= c && c <= 0xFFF8) || \
84
38.6M
    (0x1BCA0 <= c && c <= 0x1BCA3) || \
85
38.6M
    (0x1D173 <= c && c <= 0x1D17A) || \
86
38.6M
    (0xE0000 <= c && c <= 0xE0FFF))
87
88
89
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
90
U_CAPI void    U_EXPORT2
91
UCNV_FROM_U_CALLBACK_STOP (
92
                  const void *context,
93
                  UConverterFromUnicodeArgs *fromUArgs,
94
                  const char16_t* codeUnits,
95
                  int32_t length,
96
                  UChar32 codePoint,
97
                  UConverterCallbackReason reason,
98
                  UErrorCode * err)
99
0
{
100
0
    (void)context;
101
0
    (void)fromUArgs;
102
0
    (void)codeUnits;
103
0
    (void)length;
104
0
    if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
105
0
    {
106
        /*
107
         * Skip if the codepoint has unicode property of default ignorable.
108
         */
109
0
        *err = U_ZERO_ERROR;
110
0
    }
111
    /* the caller must have set the error code accordingly */
112
0
}
113
114
115
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
116
U_CAPI void    U_EXPORT2
117
UCNV_TO_U_CALLBACK_STOP (
118
                   const void *context,
119
                   UConverterToUnicodeArgs *toUArgs,
120
                   const char* codePoints,
121
                   int32_t length,
122
                   UConverterCallbackReason reason,
123
                   UErrorCode * err)
124
0
{
125
    /* the caller must have set the error code accordingly */
126
0
    (void)context; (void)toUArgs; (void)codePoints; (void)length; (void)reason; (void)err;
127
0
}
128
129
U_CAPI void    U_EXPORT2
130
UCNV_FROM_U_CALLBACK_SKIP (                  
131
                  const void *context,
132
                  UConverterFromUnicodeArgs *fromUArgs,
133
                  const char16_t* codeUnits,
134
                  int32_t length,
135
                  UChar32 codePoint,
136
                  UConverterCallbackReason reason,
137
                  UErrorCode * err)
138
0
{
139
0
    (void)fromUArgs;
140
0
    (void)codeUnits;
141
0
    (void)length;
142
0
    if (reason <= UCNV_IRREGULAR)
143
0
    {
144
0
        if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
145
0
        {
146
            /*
147
             * Skip if the codepoint has unicode property of default ignorable.
148
             */
149
0
            *err = U_ZERO_ERROR;
150
0
        }
151
0
        else if (context == nullptr || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
152
0
        {
153
0
            *err = U_ZERO_ERROR;
154
0
        }
155
        /* else the caller must have set the error code accordingly. */
156
0
    }
157
    /* else ignore the reset, close and clone calls. */
158
0
}
159
160
U_CAPI void    U_EXPORT2
161
UCNV_FROM_U_CALLBACK_SUBSTITUTE (
162
                  const void *context,
163
                  UConverterFromUnicodeArgs *fromArgs,
164
                  const char16_t* codeUnits,
165
                  int32_t length,
166
                  UChar32 codePoint,
167
                  UConverterCallbackReason reason,
168
                  UErrorCode * err)
169
64.2M
{
170
64.2M
    (void)codeUnits;
171
64.2M
    (void)length;
172
64.2M
    if (reason <= UCNV_IRREGULAR)
173
64.2M
    {
174
64.2M
        if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
175
66.8k
        {
176
            /*
177
             * Skip if the codepoint has unicode property of default ignorable.
178
             */
179
66.8k
            *err = U_ZERO_ERROR;
180
66.8k
        }
181
64.1M
        else if (context == nullptr || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
182
64.1M
        {
183
64.1M
            *err = U_ZERO_ERROR;
184
64.1M
            ucnv_cbFromUWriteSub(fromArgs, 0, err);
185
64.1M
        }
186
        /* else the caller must have set the error code accordingly. */
187
64.2M
    }
188
    /* else ignore the reset, close and clone calls. */
189
64.2M
}
190
191
/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
192
 *uses a clean copy (resetted) of the converter, to convert that unicode
193
 *escape sequence to the target codepage (if conversion failure happens then
194
 *we revert to substituting with subchar)
195
 */
196
U_CAPI void    U_EXPORT2
197
UCNV_FROM_U_CALLBACK_ESCAPE (
198
                         const void *context,
199
                         UConverterFromUnicodeArgs *fromArgs,
200
                         const char16_t *codeUnits,
201
                         int32_t length,
202
                         UChar32 codePoint,
203
                         UConverterCallbackReason reason,
204
                         UErrorCode * err)
205
0
{
206
207
0
  char16_t valueString[VALUE_STRING_LENGTH];
208
0
  int32_t valueStringLength = 0;
209
0
  int32_t i = 0;
210
211
0
  const char16_t *myValueSource = nullptr;
212
0
  UErrorCode err2 = U_ZERO_ERROR;
213
0
  UConverterFromUCallback original = nullptr;
214
0
  const void *originalContext;
215
216
0
  UConverterFromUCallback ignoredCallback = nullptr;
217
0
  const void *ignoredContext;
218
  
219
0
  if (reason > UCNV_IRREGULAR)
220
0
  {
221
0
      return;
222
0
  }
223
0
  else if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
224
0
  {
225
      /*
226
       * Skip if the codepoint has unicode property of default ignorable.
227
       */
228
0
      *err = U_ZERO_ERROR;
229
0
      return;
230
0
  }
231
232
0
  ucnv_setFromUCallBack (fromArgs->converter,
233
0
                     (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
234
0
                     nullptr,
235
0
                     &original,
236
0
                     &originalContext,
237
0
                     &err2);
238
  
239
0
  if (U_FAILURE (err2))
240
0
  {
241
0
    *err = err2;
242
0
    return;
243
0
  } 
244
0
  if(context==nullptr)
245
0
  { 
246
0
      while (i < length)
247
0
      {
248
0
        valueString[valueStringLength++] = (char16_t) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
249
0
        valueString[valueStringLength++] = (char16_t) UNICODE_U_CODEPOINT; /* adding U */
250
0
        valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
251
0
      }
252
0
  }
253
0
  else
254
0
  {
255
0
      switch(*((char*)context))
256
0
      {
257
0
      case UCNV_PRV_ESCAPE_JAVA:
258
0
          while (i < length)
259
0
          {
260
0
              valueString[valueStringLength++] = (char16_t) UNICODE_RS_CODEPOINT;    /* adding \ */
261
0
              valueString[valueStringLength++] = (char16_t) UNICODE_U_LOW_CODEPOINT; /* adding u */
262
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
263
0
          }
264
0
          break;
265
266
0
      case UCNV_PRV_ESCAPE_C:
267
0
          valueString[valueStringLength++] = (char16_t) UNICODE_RS_CODEPOINT;    /* adding \ */
268
269
0
          if(length==2){
270
0
              valueString[valueStringLength++] = (char16_t) UNICODE_U_CODEPOINT; /* adding U */
271
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
272
273
0
          }
274
0
          else{
275
0
              valueString[valueStringLength++] = (char16_t) UNICODE_U_LOW_CODEPOINT; /* adding u */
276
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
277
0
          }
278
0
          break;
279
280
0
      case UCNV_PRV_ESCAPE_XML_DEC:
281
282
0
          valueString[valueStringLength++] = (char16_t) UNICODE_AMP_CODEPOINT;   /* adding & */
283
0
          valueString[valueStringLength++] = (char16_t) UNICODE_HASH_CODEPOINT;  /* adding # */
284
0
          if(length==2){
285
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
286
0
          }
287
0
          else{
288
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
289
0
          }
290
0
          valueString[valueStringLength++] = (char16_t) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
291
0
          break;
292
293
0
      case UCNV_PRV_ESCAPE_XML_HEX:
294
295
0
          valueString[valueStringLength++] = (char16_t) UNICODE_AMP_CODEPOINT;   /* adding & */
296
0
          valueString[valueStringLength++] = (char16_t) UNICODE_HASH_CODEPOINT;  /* adding # */
297
0
          valueString[valueStringLength++] = (char16_t) UNICODE_X_LOW_CODEPOINT; /* adding x */
298
0
          if(length==2){
299
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
300
0
          }
301
0
          else{
302
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
303
0
          }
304
0
          valueString[valueStringLength++] = (char16_t) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
305
0
          break;
306
307
0
      case UCNV_PRV_ESCAPE_UNICODE:
308
0
          valueString[valueStringLength++] = (char16_t) UNICODE_LEFT_CURLY_CODEPOINT;    /* adding { */
309
0
          valueString[valueStringLength++] = (char16_t) UNICODE_U_CODEPOINT;    /* adding U */
310
0
          valueString[valueStringLength++] = (char16_t) UNICODE_PLUS_CODEPOINT; /* adding + */
311
0
          if (length == 2) {
312
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
313
0
          } else {
314
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
315
0
          }
316
0
          valueString[valueStringLength++] = (char16_t) UNICODE_RIGHT_CURLY_CODEPOINT;    /* adding } */
317
0
          break;
318
319
0
      case UCNV_PRV_ESCAPE_CSS2:
320
0
          valueString[valueStringLength++] = (char16_t) UNICODE_RS_CODEPOINT;    /* adding \ */
321
0
          valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
322
          /* Always add space character, because the next character might be whitespace,
323
             which would erroneously be considered the termination of the escape sequence. */
324
0
          valueString[valueStringLength++] = (char16_t) UNICODE_SPACE_CODEPOINT;
325
0
          break;
326
327
0
      default:
328
0
          while (i < length)
329
0
          {
330
0
              valueString[valueStringLength++] = (char16_t) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
331
0
              valueString[valueStringLength++] = (char16_t) UNICODE_U_CODEPOINT;             /* adding U */
332
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
333
0
          }
334
0
      }
335
0
  }  
336
0
  myValueSource = valueString;
337
338
  /* reset the error */
339
0
  *err = U_ZERO_ERROR;
340
341
0
  ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
342
343
0
  ucnv_setFromUCallBack (fromArgs->converter,
344
0
                         original,
345
0
                         originalContext,
346
0
                         &ignoredCallback,
347
0
                         &ignoredContext,
348
0
                         &err2);
349
0
  if (U_FAILURE (err2))
350
0
  {
351
0
      *err = err2;
352
0
      return;
353
0
  }
354
0
}
355
356
357
358
U_CAPI void  U_EXPORT2
359
UCNV_TO_U_CALLBACK_SKIP (
360
                 const void *context,
361
                 UConverterToUnicodeArgs *toArgs,
362
                 const char* codeUnits,
363
                 int32_t length,
364
                 UConverterCallbackReason reason,
365
                 UErrorCode * err)
366
0
{
367
0
    (void)toArgs;
368
0
    (void)codeUnits;
369
0
    (void)length;
370
0
    if (reason <= UCNV_IRREGULAR)
371
0
    {
372
0
        if (context == nullptr || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
373
0
        {
374
0
            *err = U_ZERO_ERROR;
375
0
        }
376
        /* else the caller must have set the error code accordingly. */
377
0
    }
378
    /* else ignore the reset, close and clone calls. */
379
0
}
380
381
U_CAPI void    U_EXPORT2
382
UCNV_TO_U_CALLBACK_SUBSTITUTE (
383
                 const void *context,
384
                 UConverterToUnicodeArgs *toArgs,
385
                 const char* codeUnits,
386
                 int32_t length,
387
                 UConverterCallbackReason reason,
388
                 UErrorCode * err)
389
4.01M
{
390
4.01M
    (void)codeUnits;
391
4.01M
    (void)length;
392
4.01M
    if (reason <= UCNV_IRREGULAR)
393
4.01M
    {
394
4.01M
        if (context == nullptr || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
395
4.01M
        {
396
4.01M
            *err = U_ZERO_ERROR;
397
4.01M
            ucnv_cbToUWriteSub(toArgs,0,err);
398
4.01M
        }
399
        /* else the caller must have set the error code accordingly. */
400
4.01M
    }
401
    /* else ignore the reset, close and clone calls. */
402
4.01M
}
403
404
/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
405
 *and uses that as the substitution sequence
406
 */
407
U_CAPI void   U_EXPORT2
408
UCNV_TO_U_CALLBACK_ESCAPE (
409
                 const void *context,
410
                 UConverterToUnicodeArgs *toArgs,
411
                 const char* codeUnits,
412
                 int32_t length,
413
                 UConverterCallbackReason reason,
414
                 UErrorCode * err)
415
0
{
416
0
    char16_t uniValueString[VALUE_STRING_LENGTH];
417
0
    int32_t valueStringLength = 0;
418
0
    int32_t i = 0;
419
420
0
    if (reason > UCNV_IRREGULAR)
421
0
    {
422
0
        return;
423
0
    }
424
425
0
    if(context==nullptr)
426
0
    {    
427
0
        while (i < length)
428
0
        {
429
0
            uniValueString[valueStringLength++] = (char16_t) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
430
0
            uniValueString[valueStringLength++] = (char16_t) UNICODE_X_CODEPOINT;    /* adding X */
431
0
            valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
432
0
        }
433
0
    }
434
0
    else
435
0
    {
436
0
        switch(*((char*)context))
437
0
        {
438
0
        case UCNV_PRV_ESCAPE_XML_DEC:
439
0
            while (i < length)
440
0
            {
441
0
                uniValueString[valueStringLength++] = (char16_t) UNICODE_AMP_CODEPOINT;   /* adding & */
442
0
                uniValueString[valueStringLength++] = (char16_t) UNICODE_HASH_CODEPOINT;  /* adding # */
443
0
                valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
444
0
                uniValueString[valueStringLength++] = (char16_t) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
445
0
            }
446
0
            break;
447
448
0
        case UCNV_PRV_ESCAPE_XML_HEX:
449
0
            while (i < length)
450
0
            {
451
0
                uniValueString[valueStringLength++] = (char16_t) UNICODE_AMP_CODEPOINT;   /* adding & */
452
0
                uniValueString[valueStringLength++] = (char16_t) UNICODE_HASH_CODEPOINT;  /* adding # */
453
0
                uniValueString[valueStringLength++] = (char16_t) UNICODE_X_LOW_CODEPOINT; /* adding x */
454
0
                valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
455
0
                uniValueString[valueStringLength++] = (char16_t) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
456
0
            }
457
0
            break;
458
0
        case UCNV_PRV_ESCAPE_C:
459
0
            while (i < length)
460
0
            {
461
0
                uniValueString[valueStringLength++] = (char16_t) UNICODE_RS_CODEPOINT;    /* adding \ */
462
0
                uniValueString[valueStringLength++] = (char16_t) UNICODE_X_LOW_CODEPOINT; /* adding x */
463
0
                valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
464
0
            }
465
0
            break;
466
0
        default:
467
0
            while (i < length)
468
0
            {
469
0
                uniValueString[valueStringLength++] = (char16_t) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
470
0
                uniValueString[valueStringLength++] = (char16_t) UNICODE_X_CODEPOINT;    /* adding X */
471
0
                uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
472
0
                valueStringLength += 2;
473
0
            }
474
0
        }
475
0
    }
476
    /* reset the error */
477
0
    *err = U_ZERO_ERROR;
478
479
0
    ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
480
0
}
481
482
#endif