Coverage Report

Created: 2024-04-24 06:23

/src/icu/source/common/ucnv_err.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
 *****************************************************************************
5
 *
6
 *   Copyright (C) 1998-2016, International Business Machines
7
 *   Corporation and others.  All Rights Reserved.
8
 *
9
 *****************************************************************************
10
 *
11
 *  ucnv_err.c
12
 *  Implements error behaviour functions called by T_UConverter_{from,to}Unicode
13
 *
14
 *
15
*   Change history:
16
*
17
*   06/29/2000  helena      Major rewrite of the callback APIs.
18
*/
19
20
#include "unicode/utypes.h"
21
22
#if !UCONFIG_NO_CONVERSION
23
24
#include "unicode/ucnv_err.h"
25
#include "unicode/ucnv_cb.h"
26
#include "ucnv_cnv.h"
27
#include "cmemory.h"
28
#include "unicode/ucnv.h"
29
#include "ustrfmt.h"
30
31
0
#define VALUE_STRING_LENGTH 48
32
/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
33
0
#define UNICODE_PERCENT_SIGN_CODEPOINT  0x0025
34
0
#define UNICODE_U_CODEPOINT             0x0055
35
0
#define UNICODE_X_CODEPOINT             0x0058
36
0
#define UNICODE_RS_CODEPOINT            0x005C
37
0
#define UNICODE_U_LOW_CODEPOINT         0x0075
38
0
#define UNICODE_X_LOW_CODEPOINT         0x0078
39
0
#define UNICODE_AMP_CODEPOINT           0x0026
40
0
#define UNICODE_HASH_CODEPOINT          0x0023
41
0
#define UNICODE_SEMICOLON_CODEPOINT     0x003B
42
0
#define UNICODE_PLUS_CODEPOINT          0x002B
43
0
#define UNICODE_LEFT_CURLY_CODEPOINT    0x007B
44
0
#define UNICODE_RIGHT_CURLY_CODEPOINT   0x007D
45
0
#define UNICODE_SPACE_CODEPOINT         0x0020
46
#define UCNV_PRV_ESCAPE_ICU         0
47
0
#define UCNV_PRV_ESCAPE_C           'C'
48
0
#define UCNV_PRV_ESCAPE_XML_DEC     'D'
49
0
#define UCNV_PRV_ESCAPE_XML_HEX     'X'
50
0
#define UCNV_PRV_ESCAPE_JAVA        'J'
51
0
#define UCNV_PRV_ESCAPE_UNICODE     'U'
52
0
#define UCNV_PRV_ESCAPE_CSS2        'S'
53
0
#define UCNV_PRV_STOP_ON_ILLEGAL    'i'
54
55
/*
56
 * IS_DEFAULT_IGNORABLE_CODE_POINT
57
 * This is to check if a code point has the default ignorable unicode property.
58
 * As such, this list needs to be updated if the ignorable code point list ever
59
 * changes.
60
 * To avoid dependency on other code, this list is hard coded here.
61
 * When an ignorable code point is found and is unmappable, the default callbacks
62
 * will ignore them.
63
 * For a list of the default ignorable code points, use this link:
64
 * https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i=
65
 *
66
 * This list should be sync with the one in CharsetCallback.java
67
 */
68
0
#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) ( \
69
0
    (c == 0x00AD) || \
70
0
    (c == 0x034F) || \
71
0
    (c == 0x061C) || \
72
0
    (c == 0x115F) || \
73
0
    (c == 0x1160) || \
74
0
    (0x17B4 <= c && c <= 0x17B5) || \
75
0
    (0x180B <= c && c <= 0x180F) || \
76
0
    (0x200B <= c && c <= 0x200F) || \
77
0
    (0x202A <= c && c <= 0x202E) || \
78
0
    (0x2060 <= c && c <= 0x206F) || \
79
0
    (c == 0x3164) || \
80
0
    (0xFE00 <= c && c <= 0xFE0F) || \
81
0
    (c == 0xFEFF) || \
82
0
    (c == 0xFFA0) || \
83
0
    (0xFFF0 <= c && c <= 0xFFF8) || \
84
0
    (0x1BCA0 <= c && c <= 0x1BCA3) || \
85
0
    (0x1D173 <= c && c <= 0x1D17A) || \
86
0
    (0xE0000 <= c && c <= 0xE0FFF))
87
88
89
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
90
U_CAPI void    U_EXPORT2
91
UCNV_FROM_U_CALLBACK_STOP (
92
                  const void *context,
93
                  UConverterFromUnicodeArgs *fromUArgs,
94
                  const UChar* codeUnits,
95
                  int32_t length,
96
                  UChar32 codePoint,
97
                  UConverterCallbackReason reason,
98
                  UErrorCode * err)
99
0
{
100
0
    (void)context;
101
0
    (void)fromUArgs;
102
0
    (void)codeUnits;
103
0
    (void)length;
104
0
    if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
105
0
    {
106
        /*
107
         * Skip if the codepoint has unicode property of default ignorable.
108
         */
109
0
        *err = U_ZERO_ERROR;
110
0
    }
111
    /* the caller must have set the error code accordingly */
112
0
    return;
113
0
}
114
115
116
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
117
U_CAPI void    U_EXPORT2
118
UCNV_TO_U_CALLBACK_STOP (
119
                   const void *context,
120
                   UConverterToUnicodeArgs *toUArgs,
121
                   const char* codePoints,
122
                   int32_t length,
123
                   UConverterCallbackReason reason,
124
                   UErrorCode * err)
125
0
{
126
    /* the caller must have set the error code accordingly */
127
0
    (void)context; (void)toUArgs; (void)codePoints; (void)length; (void)reason; (void)err;
128
0
    return;
129
0
}
130
131
U_CAPI void    U_EXPORT2
132
UCNV_FROM_U_CALLBACK_SKIP (                  
133
                  const void *context,
134
                  UConverterFromUnicodeArgs *fromUArgs,
135
                  const UChar* codeUnits,
136
                  int32_t length,
137
                  UChar32 codePoint,
138
                  UConverterCallbackReason reason,
139
                  UErrorCode * err)
140
0
{
141
0
    (void)fromUArgs;
142
0
    (void)codeUnits;
143
0
    (void)length;
144
0
    if (reason <= UCNV_IRREGULAR)
145
0
    {
146
0
        if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
147
0
        {
148
            /*
149
             * Skip if the codepoint has unicode property of default ignorable.
150
             */
151
0
            *err = U_ZERO_ERROR;
152
0
        }
153
0
        else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
154
0
        {
155
0
            *err = U_ZERO_ERROR;
156
0
        }
157
        /* else the caller must have set the error code accordingly. */
158
0
    }
159
    /* else ignore the reset, close and clone calls. */
160
0
}
161
162
U_CAPI void    U_EXPORT2
163
UCNV_FROM_U_CALLBACK_SUBSTITUTE (
164
                  const void *context,
165
                  UConverterFromUnicodeArgs *fromArgs,
166
                  const UChar* codeUnits,
167
                  int32_t length,
168
                  UChar32 codePoint,
169
                  UConverterCallbackReason reason,
170
                  UErrorCode * err)
171
0
{
172
0
    (void)codeUnits;
173
0
    (void)length;
174
0
    if (reason <= UCNV_IRREGULAR)
175
0
    {
176
0
        if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
177
0
        {
178
            /*
179
             * Skip if the codepoint has unicode property of default ignorable.
180
             */
181
0
            *err = U_ZERO_ERROR;
182
0
        }
183
0
        else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
184
0
        {
185
0
            *err = U_ZERO_ERROR;
186
0
            ucnv_cbFromUWriteSub(fromArgs, 0, err);
187
0
        }
188
        /* else the caller must have set the error code accordingly. */
189
0
    }
190
    /* else ignore the reset, close and clone calls. */
191
0
}
192
193
/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
194
 *uses a clean copy (resetted) of the converter, to convert that unicode
195
 *escape sequence to the target codepage (if conversion failure happens then
196
 *we revert to substituting with subchar)
197
 */
198
U_CAPI void    U_EXPORT2
199
UCNV_FROM_U_CALLBACK_ESCAPE (
200
                         const void *context,
201
                         UConverterFromUnicodeArgs *fromArgs,
202
                         const UChar *codeUnits,
203
                         int32_t length,
204
                         UChar32 codePoint,
205
                         UConverterCallbackReason reason,
206
                         UErrorCode * err)
207
0
{
208
209
0
  UChar valueString[VALUE_STRING_LENGTH];
210
0
  int32_t valueStringLength = 0;
211
0
  int32_t i = 0;
212
213
0
  const UChar *myValueSource = NULL;
214
0
  UErrorCode err2 = U_ZERO_ERROR;
215
0
  UConverterFromUCallback original = NULL;
216
0
  const void *originalContext;
217
218
0
  UConverterFromUCallback ignoredCallback = NULL;
219
0
  const void *ignoredContext;
220
  
221
0
  if (reason > UCNV_IRREGULAR)
222
0
  {
223
0
      return;
224
0
  }
225
0
  else if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
226
0
  {
227
      /*
228
       * Skip if the codepoint has unicode property of default ignorable.
229
       */
230
0
      *err = U_ZERO_ERROR;
231
0
      return;
232
0
  }
233
234
0
  ucnv_setFromUCallBack (fromArgs->converter,
235
0
                     (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
236
0
                     NULL,
237
0
                     &original,
238
0
                     &originalContext,
239
0
                     &err2);
240
  
241
0
  if (U_FAILURE (err2))
242
0
  {
243
0
    *err = err2;
244
0
    return;
245
0
  } 
246
0
  if(context==NULL)
247
0
  { 
248
0
      while (i < length)
249
0
      {
250
0
        valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
251
0
        valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
252
0
        valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
253
0
      }
254
0
  }
255
0
  else
256
0
  {
257
0
      switch(*((char*)context))
258
0
      {
259
0
      case UCNV_PRV_ESCAPE_JAVA:
260
0
          while (i < length)
261
0
          {
262
0
              valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
263
0
              valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
264
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
265
0
          }
266
0
          break;
267
268
0
      case UCNV_PRV_ESCAPE_C:
269
0
          valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
270
271
0
          if(length==2){
272
0
              valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
273
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
274
275
0
          }
276
0
          else{
277
0
              valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
278
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
279
0
          }
280
0
          break;
281
282
0
      case UCNV_PRV_ESCAPE_XML_DEC:
283
284
0
          valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
285
0
          valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
286
0
          if(length==2){
287
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
288
0
          }
289
0
          else{
290
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
291
0
          }
292
0
          valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
293
0
          break;
294
295
0
      case UCNV_PRV_ESCAPE_XML_HEX:
296
297
0
          valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
298
0
          valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
299
0
          valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
300
0
          if(length==2){
301
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
302
0
          }
303
0
          else{
304
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
305
0
          }
306
0
          valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
307
0
          break;
308
309
0
      case UCNV_PRV_ESCAPE_UNICODE:
310
0
          valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT;    /* adding { */
311
0
          valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;    /* adding U */
312
0
          valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */
313
0
          if (length == 2) {
314
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
315
0
          } else {
316
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
317
0
          }
318
0
          valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT;    /* adding } */
319
0
          break;
320
321
0
      case UCNV_PRV_ESCAPE_CSS2:
322
0
          valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
323
0
          valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
324
          /* Always add space character, because the next character might be whitespace,
325
             which would erroneously be considered the termination of the escape sequence. */
326
0
          valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
327
0
          break;
328
329
0
      default:
330
0
          while (i < length)
331
0
          {
332
0
              valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
333
0
              valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;             /* adding U */
334
0
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
335
0
          }
336
0
      }
337
0
  }  
338
0
  myValueSource = valueString;
339
340
  /* reset the error */
341
0
  *err = U_ZERO_ERROR;
342
343
0
  ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
344
345
0
  ucnv_setFromUCallBack (fromArgs->converter,
346
0
                         original,
347
0
                         originalContext,
348
0
                         &ignoredCallback,
349
0
                         &ignoredContext,
350
0
                         &err2);
351
0
  if (U_FAILURE (err2))
352
0
  {
353
0
      *err = err2;
354
0
      return;
355
0
  }
356
357
0
  return;
358
0
}
359
360
361
362
U_CAPI void  U_EXPORT2
363
UCNV_TO_U_CALLBACK_SKIP (
364
                 const void *context,
365
                 UConverterToUnicodeArgs *toArgs,
366
                 const char* codeUnits,
367
                 int32_t length,
368
                 UConverterCallbackReason reason,
369
                 UErrorCode * err)
370
0
{
371
0
    (void)toArgs;
372
0
    (void)codeUnits;
373
0
    (void)length;
374
0
    if (reason <= UCNV_IRREGULAR)
375
0
    {
376
0
        if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
377
0
        {
378
0
            *err = U_ZERO_ERROR;
379
0
        }
380
        /* else the caller must have set the error code accordingly. */
381
0
    }
382
    /* else ignore the reset, close and clone calls. */
383
0
}
384
385
U_CAPI void    U_EXPORT2
386
UCNV_TO_U_CALLBACK_SUBSTITUTE (
387
                 const void *context,
388
                 UConverterToUnicodeArgs *toArgs,
389
                 const char* codeUnits,
390
                 int32_t length,
391
                 UConverterCallbackReason reason,
392
                 UErrorCode * err)
393
0
{
394
0
    (void)codeUnits;
395
0
    (void)length;
396
0
    if (reason <= UCNV_IRREGULAR)
397
0
    {
398
0
        if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
399
0
        {
400
0
            *err = U_ZERO_ERROR;
401
0
            ucnv_cbToUWriteSub(toArgs,0,err);
402
0
        }
403
        /* else the caller must have set the error code accordingly. */
404
0
    }
405
    /* else ignore the reset, close and clone calls. */
406
0
}
407
408
/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
409
 *and uses that as the substitution sequence
410
 */
411
U_CAPI void   U_EXPORT2
412
UCNV_TO_U_CALLBACK_ESCAPE (
413
                 const void *context,
414
                 UConverterToUnicodeArgs *toArgs,
415
                 const char* codeUnits,
416
                 int32_t length,
417
                 UConverterCallbackReason reason,
418
                 UErrorCode * err)
419
0
{
420
0
    UChar uniValueString[VALUE_STRING_LENGTH];
421
0
    int32_t valueStringLength = 0;
422
0
    int32_t i = 0;
423
424
0
    if (reason > UCNV_IRREGULAR)
425
0
    {
426
0
        return;
427
0
    }
428
429
0
    if(context==NULL)
430
0
    {    
431
0
        while (i < length)
432
0
        {
433
0
            uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
434
0
            uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT;    /* adding X */
435
0
            valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
436
0
        }
437
0
    }
438
0
    else
439
0
    {
440
0
        switch(*((char*)context))
441
0
        {
442
0
        case UCNV_PRV_ESCAPE_XML_DEC:
443
0
            while (i < length)
444
0
            {
445
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
446
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
447
0
                valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
448
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
449
0
            }
450
0
            break;
451
452
0
        case UCNV_PRV_ESCAPE_XML_HEX:
453
0
            while (i < length)
454
0
            {
455
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
456
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
457
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
458
0
                valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
459
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
460
0
            }
461
0
            break;
462
0
        case UCNV_PRV_ESCAPE_C:
463
0
            while (i < length)
464
0
            {
465
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
466
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
467
0
                valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
468
0
            }
469
0
            break;
470
0
        default:
471
0
            while (i < length)
472
0
            {
473
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
474
0
                uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT;    /* adding X */
475
0
                uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
476
0
                valueStringLength += 2;
477
0
            }
478
0
        }
479
0
    }
480
    /* reset the error */
481
0
    *err = U_ZERO_ERROR;
482
483
0
    ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
484
0
}
485
486
#endif