Coverage Report

Created: 2025-07-11 06:23

/src/icu/source/common/ucnvlat1.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/* 
4
**********************************************************************
5
*   Copyright (C) 2000-2015, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*   file name:  ucnvlat1.cpp
9
*   encoding:   UTF-8
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
*   created on: 2000feb07
14
*   created by: Markus W. Scherer
15
*/
16
17
#include "unicode/utypes.h"
18
19
#if !UCONFIG_NO_CONVERSION
20
21
#include "unicode/ucnv.h"
22
#include "unicode/uset.h"
23
#include "unicode/utf8.h"
24
#include "ucnv_bld.h"
25
#include "ucnv_cnv.h"
26
27
/* control optimizations according to the platform */
28
#define LATIN1_UNROLL_FROM_UNICODE 1
29
30
/* ISO 8859-1 --------------------------------------------------------------- */
31
32
/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
33
U_CDECL_BEGIN
34
static void U_CALLCONV
35
_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
36
1.11k
                            UErrorCode *pErrorCode) {
37
1.11k
    const uint8_t *source;
38
1.11k
    UChar *target;
39
1.11k
    int32_t targetCapacity, length;
40
1.11k
    int32_t *offsets;
41
42
1.11k
    int32_t sourceIndex;
43
44
    /* set up the local pointers */
45
1.11k
    source=(const uint8_t *)pArgs->source;
46
1.11k
    target=pArgs->target;
47
1.11k
    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
48
1.11k
    offsets=pArgs->offsets;
49
50
1.11k
    sourceIndex=0;
51
52
    /*
53
     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
54
     * for the minimum of the sourceLength and targetCapacity
55
     */
56
1.11k
    length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
57
1.11k
    if(length<=targetCapacity) {
58
1.11k
        targetCapacity=length;
59
1.11k
    } else {
60
        /* target will be full */
61
0
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
62
0
        length=targetCapacity;
63
0
    }
64
65
1.11k
    if(targetCapacity>=8) {
66
        /* This loop is unrolled for speed and improved pipelining. */
67
442
        int32_t count, loops;
68
69
442
        loops=count=targetCapacity>>3;
70
442
        length=targetCapacity&=0x7;
71
292k
        do {
72
292k
            target[0]=source[0];
73
292k
            target[1]=source[1];
74
292k
            target[2]=source[2];
75
292k
            target[3]=source[3];
76
292k
            target[4]=source[4];
77
292k
            target[5]=source[5];
78
292k
            target[6]=source[6];
79
292k
            target[7]=source[7];
80
292k
            target+=8;
81
292k
            source+=8;
82
292k
        } while(--count>0);
83
84
442
        if(offsets!=NULL) {
85
0
            do {
86
0
                offsets[0]=sourceIndex++;
87
0
                offsets[1]=sourceIndex++;
88
0
                offsets[2]=sourceIndex++;
89
0
                offsets[3]=sourceIndex++;
90
0
                offsets[4]=sourceIndex++;
91
0
                offsets[5]=sourceIndex++;
92
0
                offsets[6]=sourceIndex++;
93
0
                offsets[7]=sourceIndex++;
94
0
                offsets+=8;
95
0
            } while(--loops>0);
96
0
        }
97
442
    }
98
99
    /* conversion loop */
100
4.98k
    while(targetCapacity>0) {
101
3.87k
        *target++=*source++;
102
3.87k
        --targetCapacity;
103
3.87k
    }
104
105
    /* write back the updated pointers */
106
1.11k
    pArgs->source=(const char *)source;
107
1.11k
    pArgs->target=target;
108
109
    /* set offsets */
110
1.11k
    if(offsets!=NULL) {
111
0
        while(length>0) {
112
0
            *offsets++=sourceIndex++;
113
0
            --length;
114
0
        }
115
0
        pArgs->offsets=offsets;
116
0
    }
117
1.11k
}
118
119
/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
120
static UChar32 U_CALLCONV
121
_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
122
0
                    UErrorCode *pErrorCode) {
123
0
    const uint8_t *source=(const uint8_t *)pArgs->source;
124
0
    if(source<(const uint8_t *)pArgs->sourceLimit) {
125
0
        pArgs->source=(const char *)(source+1);
126
0
        return *source;
127
0
    }
128
129
    /* no output because of empty input */
130
0
    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
131
0
    return 0xffff;
132
0
}
133
134
/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
135
static void U_CALLCONV
136
_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
137
0
                              UErrorCode *pErrorCode) {
138
0
    UConverter *cnv;
139
0
    const UChar *source, *sourceLimit;
140
0
    uint8_t *target, *oldTarget;
141
0
    int32_t targetCapacity, length;
142
0
    int32_t *offsets;
143
144
0
    UChar32 cp;
145
0
    UChar c, max;
146
147
0
    int32_t sourceIndex;
148
149
    /* set up the local pointers */
150
0
    cnv=pArgs->converter;
151
0
    source=pArgs->source;
152
0
    sourceLimit=pArgs->sourceLimit;
153
0
    target=oldTarget=(uint8_t *)pArgs->target;
154
0
    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
155
0
    offsets=pArgs->offsets;
156
157
0
    if(cnv->sharedData==&_Latin1Data) {
158
0
        max=0xff; /* Latin-1 */
159
0
    } else {
160
0
        max=0x7f; /* US-ASCII */
161
0
    }
162
163
    /* get the converter state from UConverter */
164
0
    cp=cnv->fromUChar32;
165
166
    /* sourceIndex=-1 if the current character began in the previous buffer */
167
0
    sourceIndex= cp==0 ? 0 : -1;
168
169
    /*
170
     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
171
     * for the minimum of the sourceLength and targetCapacity
172
     */
173
0
    length=(int32_t)(sourceLimit-source);
174
0
    if(length<targetCapacity) {
175
0
        targetCapacity=length;
176
0
    }
177
178
    /* conversion loop */
179
0
    if(cp!=0 && targetCapacity>0) {
180
0
        goto getTrail;
181
0
    }
182
183
0
#if LATIN1_UNROLL_FROM_UNICODE
184
    /* unroll the loop with the most common case */
185
0
    if(targetCapacity>=16) {
186
0
        int32_t count, loops;
187
0
        UChar u, oredChars;
188
189
0
        loops=count=targetCapacity>>4;
190
0
        do {
191
0
            oredChars=u=*source++;
192
0
            *target++=(uint8_t)u;
193
0
            oredChars|=u=*source++;
194
0
            *target++=(uint8_t)u;
195
0
            oredChars|=u=*source++;
196
0
            *target++=(uint8_t)u;
197
0
            oredChars|=u=*source++;
198
0
            *target++=(uint8_t)u;
199
0
            oredChars|=u=*source++;
200
0
            *target++=(uint8_t)u;
201
0
            oredChars|=u=*source++;
202
0
            *target++=(uint8_t)u;
203
0
            oredChars|=u=*source++;
204
0
            *target++=(uint8_t)u;
205
0
            oredChars|=u=*source++;
206
0
            *target++=(uint8_t)u;
207
0
            oredChars|=u=*source++;
208
0
            *target++=(uint8_t)u;
209
0
            oredChars|=u=*source++;
210
0
            *target++=(uint8_t)u;
211
0
            oredChars|=u=*source++;
212
0
            *target++=(uint8_t)u;
213
0
            oredChars|=u=*source++;
214
0
            *target++=(uint8_t)u;
215
0
            oredChars|=u=*source++;
216
0
            *target++=(uint8_t)u;
217
0
            oredChars|=u=*source++;
218
0
            *target++=(uint8_t)u;
219
0
            oredChars|=u=*source++;
220
0
            *target++=(uint8_t)u;
221
0
            oredChars|=u=*source++;
222
0
            *target++=(uint8_t)u;
223
224
            /* were all 16 entries really valid? */
225
0
            if(oredChars>max) {
226
                /* no, return to the first of these 16 */
227
0
                source-=16;
228
0
                target-=16;
229
0
                break;
230
0
            }
231
0
        } while(--count>0);
232
0
        count=loops-count;
233
0
        targetCapacity-=16*count;
234
235
0
        if(offsets!=NULL) {
236
0
            oldTarget+=16*count;
237
0
            while(count>0) {
238
0
                *offsets++=sourceIndex++;
239
0
                *offsets++=sourceIndex++;
240
0
                *offsets++=sourceIndex++;
241
0
                *offsets++=sourceIndex++;
242
0
                *offsets++=sourceIndex++;
243
0
                *offsets++=sourceIndex++;
244
0
                *offsets++=sourceIndex++;
245
0
                *offsets++=sourceIndex++;
246
0
                *offsets++=sourceIndex++;
247
0
                *offsets++=sourceIndex++;
248
0
                *offsets++=sourceIndex++;
249
0
                *offsets++=sourceIndex++;
250
0
                *offsets++=sourceIndex++;
251
0
                *offsets++=sourceIndex++;
252
0
                *offsets++=sourceIndex++;
253
0
                *offsets++=sourceIndex++;
254
0
                --count;
255
0
            }
256
0
        }
257
0
    }
258
0
#endif
259
260
    /* conversion loop */
261
0
    c=0;
262
0
    while(targetCapacity>0 && (c=*source++)<=max) {
263
        /* convert the Unicode code point */
264
0
        *target++=(uint8_t)c;
265
0
        --targetCapacity;
266
0
    }
267
268
0
    if(c>max) {
269
0
        cp=c;
270
0
        if(!U_IS_SURROGATE(cp)) {
271
            /* callback(unassigned) */
272
0
        } else if(U_IS_SURROGATE_LEAD(cp)) {
273
0
getTrail:
274
0
            if(source<sourceLimit) {
275
                /* test the following code unit */
276
0
                UChar trail=*source;
277
0
                if(U16_IS_TRAIL(trail)) {
278
0
                    ++source;
279
0
                    cp=U16_GET_SUPPLEMENTARY(cp, trail);
280
                    /* this codepage does not map supplementary code points */
281
                    /* callback(unassigned) */
282
0
                } else {
283
                    /* this is an unmatched lead code unit (1st surrogate) */
284
                    /* callback(illegal) */
285
0
                }
286
0
            } else {
287
                /* no more input */
288
0
                cnv->fromUChar32=cp;
289
0
                goto noMoreInput;
290
0
            }
291
0
        } else {
292
            /* this is an unmatched trail code unit (2nd surrogate) */
293
            /* callback(illegal) */
294
0
        }
295
296
0
        *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
297
0
        cnv->fromUChar32=cp;
298
0
    }
299
0
noMoreInput:
300
301
    /* set offsets since the start */
302
0
    if(offsets!=NULL) {
303
0
        size_t count=target-oldTarget;
304
0
        while(count>0) {
305
0
            *offsets++=sourceIndex++;
306
0
            --count;
307
0
        }
308
0
    }
309
310
0
    if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
311
        /* target is full */
312
0
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
313
0
    }
314
315
    /* write back the updated pointers */
316
0
    pArgs->source=source;
317
0
    pArgs->target=(char *)target;
318
0
    pArgs->offsets=offsets;
319
0
}
320
321
/* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
322
static void U_CALLCONV
323
ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
324
                    UConverterToUnicodeArgs *pToUArgs,
325
0
                    UErrorCode *pErrorCode) {
326
0
    UConverter *utf8;
327
0
    const uint8_t *source, *sourceLimit;
328
0
    uint8_t *target;
329
0
    int32_t targetCapacity;
330
331
0
    UChar32 c;
332
0
    uint8_t b, t1;
333
334
    /* set up the local pointers */
335
0
    utf8=pToUArgs->converter;
336
0
    source=(uint8_t *)pToUArgs->source;
337
0
    sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
338
0
    target=(uint8_t *)pFromUArgs->target;
339
0
    targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
340
341
    /* get the converter state from the UTF-8 UConverter */
342
0
    c=(UChar32)utf8->toUnicodeStatus;
343
0
    if(c!=0 && source<sourceLimit) {
344
0
        if(targetCapacity==0) {
345
0
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
346
0
            return;
347
0
        } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
348
0
            ++source;
349
0
            *target++=(uint8_t)(((c&3)<<6)|t1);
350
0
            --targetCapacity;
351
352
0
            utf8->toUnicodeStatus=0;
353
0
            utf8->toULength=0;
354
0
        } else {
355
            /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
356
0
            *pErrorCode=U_USING_DEFAULT_WARNING;
357
0
            return;
358
0
        }
359
0
    }
360
361
    /*
362
     * Make sure that the last byte sequence before sourceLimit is complete
363
     * or runs into a lead byte.
364
     * In the conversion loop compare source with sourceLimit only once
365
     * per multi-byte character.
366
     * For Latin-1, adjust sourceLimit only for 1 trail byte because
367
     * the conversion loop handles at most 2-byte sequences.
368
     */
369
0
    if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
370
0
        --sourceLimit;
371
0
    }
372
373
    /* conversion loop */
374
0
    while(source<sourceLimit) {
375
0
        if(targetCapacity>0) {
376
0
            b=*source++;
377
0
            if((int8_t)b>=0) {
378
                /* convert ASCII */
379
0
                *target++=(uint8_t)b;
380
0
                --targetCapacity;
381
0
            } else if( /* handle U+0080..U+00FF inline */
382
0
                       b>=0xc2 && b<=0xc3 &&
383
0
                       (t1=(uint8_t)(*source-0x80)) <= 0x3f
384
0
            ) {
385
0
                ++source;
386
0
                *target++=(uint8_t)(((b&3)<<6)|t1);
387
0
                --targetCapacity;
388
0
            } else {
389
                /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
390
0
                pToUArgs->source=(char *)(source-1);
391
0
                pFromUArgs->target=(char *)target;
392
0
                *pErrorCode=U_USING_DEFAULT_WARNING;
393
0
                return;
394
0
            }
395
0
        } else {
396
            /* target is full */
397
0
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
398
0
            break;
399
0
        }
400
0
    }
401
402
    /*
403
     * The sourceLimit may have been adjusted before the conversion loop
404
     * to stop before a truncated sequence.
405
     * If so, then collect the truncated sequence now.
406
     * For Latin-1, there is at most exactly one lead byte because of the
407
     * smaller sourceLimit adjustment logic.
408
     */
409
0
    if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
410
0
        utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
411
0
        utf8->toULength=1;
412
0
        utf8->mode=U8_COUNT_TRAIL_BYTES(b)+1;
413
0
    }
414
415
    /* write back the updated pointers */
416
0
    pToUArgs->source=(char *)source;
417
0
    pFromUArgs->target=(char *)target;
418
0
}
419
420
static void U_CALLCONV
421
_Latin1GetUnicodeSet(const UConverter *cnv,
422
                     const USetAdder *sa,
423
                     UConverterUnicodeSet which,
424
0
                     UErrorCode *pErrorCode) {
425
0
    (void)cnv;
426
0
    (void)which;
427
0
    (void)pErrorCode;
428
0
    sa->addRange(sa->set, 0, 0xff);
429
0
}
430
U_CDECL_END
431
432
433
static const UConverterImpl _Latin1Impl={
434
    UCNV_LATIN_1,
435
436
    NULL,
437
    NULL,
438
439
    NULL,
440
    NULL,
441
    NULL,
442
443
    _Latin1ToUnicodeWithOffsets,
444
    _Latin1ToUnicodeWithOffsets,
445
    _Latin1FromUnicodeWithOffsets,
446
    _Latin1FromUnicodeWithOffsets,
447
    _Latin1GetNextUChar,
448
449
    NULL,
450
    NULL,
451
    NULL,
452
    NULL,
453
    _Latin1GetUnicodeSet,
454
455
    NULL,
456
    ucnv_Latin1FromUTF8
457
};
458
459
static const UConverterStaticData _Latin1StaticData={
460
    sizeof(UConverterStaticData),
461
    "ISO-8859-1",
462
    819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
463
    { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
464
    0,
465
    0,
466
    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
467
};
468
469
const UConverterSharedData _Latin1Data=
470
        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl);
471
472
/* US-ASCII ----------------------------------------------------------------- */
473
474
U_CDECL_BEGIN
475
/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
476
static void U_CALLCONV
477
_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
478
1.28M
                           UErrorCode *pErrorCode) {
479
1.28M
    const uint8_t *source, *sourceLimit;
480
1.28M
    UChar *target, *oldTarget;
481
1.28M
    int32_t targetCapacity, length;
482
1.28M
    int32_t *offsets;
483
484
1.28M
    int32_t sourceIndex;
485
486
1.28M
    uint8_t c;
487
488
    /* set up the local pointers */
489
1.28M
    source=(const uint8_t *)pArgs->source;
490
1.28M
    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
491
1.28M
    target=oldTarget=pArgs->target;
492
1.28M
    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
493
1.28M
    offsets=pArgs->offsets;
494
495
    /* sourceIndex=-1 if the current character began in the previous buffer */
496
1.28M
    sourceIndex=0;
497
498
    /*
499
     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
500
     * for the minimum of the sourceLength and targetCapacity
501
     */
502
1.28M
    length=(int32_t)(sourceLimit-source);
503
1.28M
    if(length<targetCapacity) {
504
1.28M
        targetCapacity=length;
505
1.28M
    }
506
507
1.28M
    if(targetCapacity>=8) {
508
        /* This loop is unrolled for speed and improved pipelining. */
509
1.27M
        int32_t count, loops;
510
1.27M
        UChar oredChars;
511
512
1.27M
        loops=count=targetCapacity>>3;
513
1.38M
        do {
514
1.38M
            oredChars=target[0]=source[0];
515
1.38M
            oredChars|=target[1]=source[1];
516
1.38M
            oredChars|=target[2]=source[2];
517
1.38M
            oredChars|=target[3]=source[3];
518
1.38M
            oredChars|=target[4]=source[4];
519
1.38M
            oredChars|=target[5]=source[5];
520
1.38M
            oredChars|=target[6]=source[6];
521
1.38M
            oredChars|=target[7]=source[7];
522
523
            /* were all 16 entries really valid? */
524
1.38M
            if(oredChars>0x7f) {
525
                /* no, return to the first of these 16 */
526
1.27M
                break;
527
1.27M
            }
528
109k
            source+=8;
529
109k
            target+=8;
530
109k
        } while(--count>0);
531
0
        count=loops-count;
532
1.27M
        targetCapacity-=count*8;
533
534
1.27M
        if(offsets!=NULL) {
535
0
            oldTarget+=count*8;
536
0
            while(count>0) {
537
0
                offsets[0]=sourceIndex++;
538
0
                offsets[1]=sourceIndex++;
539
0
                offsets[2]=sourceIndex++;
540
0
                offsets[3]=sourceIndex++;
541
0
                offsets[4]=sourceIndex++;
542
0
                offsets[5]=sourceIndex++;
543
0
                offsets[6]=sourceIndex++;
544
0
                offsets[7]=sourceIndex++;
545
0
                offsets+=8;
546
0
                --count;
547
0
            }
548
0
        }
549
1.27M
    }
550
551
    /* conversion loop */
552
0
    c=0;
553
1.47M
    while(targetCapacity>0 && (c=*source++)<=0x7f) {
554
193k
        *target++=c;
555
193k
        --targetCapacity;
556
193k
    }
557
558
1.28M
    if(c>0x7f) {
559
        /* callback(illegal); copy the current bytes to toUBytes[] */
560
1.28M
        UConverter *cnv=pArgs->converter;
561
1.28M
        cnv->toUBytes[0]=c;
562
1.28M
        cnv->toULength=1;
563
1.28M
        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
564
1.28M
    } else if(source<sourceLimit && target>=pArgs->targetLimit) {
565
        /* target is full */
566
0
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
567
0
    }
568
569
    /* set offsets since the start */
570
1.28M
    if(offsets!=NULL) {
571
0
        size_t count=target-oldTarget;
572
0
        while(count>0) {
573
0
            *offsets++=sourceIndex++;
574
0
            --count;
575
0
        }
576
0
    }
577
578
    /* write back the updated pointers */
579
1.28M
    pArgs->source=(const char *)source;
580
1.28M
    pArgs->target=target;
581
1.28M
    pArgs->offsets=offsets;
582
1.28M
}
583
584
/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
585
static UChar32 U_CALLCONV
586
_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
587
0
                   UErrorCode *pErrorCode) {
588
0
    const uint8_t *source;
589
0
    uint8_t b;
590
591
0
    source=(const uint8_t *)pArgs->source;
592
0
    if(source<(const uint8_t *)pArgs->sourceLimit) {
593
0
        b=*source++;
594
0
        pArgs->source=(const char *)source;
595
0
        if(b<=0x7f) {
596
0
            return b;
597
0
        } else {
598
0
            UConverter *cnv=pArgs->converter;
599
0
            cnv->toUBytes[0]=b;
600
0
            cnv->toULength=1;
601
0
            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
602
0
            return 0xffff;
603
0
        }
604
0
    }
605
606
    /* no output because of empty input */
607
0
    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
608
0
    return 0xffff;
609
0
}
610
611
/* "Convert" UTF-8 to US-ASCII: Validate and copy. */
612
static void U_CALLCONV
613
ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
614
                   UConverterToUnicodeArgs *pToUArgs,
615
0
                   UErrorCode *pErrorCode) {
616
0
    const uint8_t *source, *sourceLimit;
617
0
    uint8_t *target;
618
0
    int32_t targetCapacity, length;
619
620
0
    uint8_t c;
621
622
0
    if(pToUArgs->converter->toUnicodeStatus!=0) {
623
        /* no handling of partial UTF-8 characters here, fall back to pivoting */
624
0
        *pErrorCode=U_USING_DEFAULT_WARNING;
625
0
        return;
626
0
    }
627
628
    /* set up the local pointers */
629
0
    source=(const uint8_t *)pToUArgs->source;
630
0
    sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
631
0
    target=(uint8_t *)pFromUArgs->target;
632
0
    targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
633
634
    /*
635
     * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
636
     * for the minimum of the sourceLength and targetCapacity
637
     */
638
0
    length=(int32_t)(sourceLimit-source);
639
0
    if(length<targetCapacity) {
640
0
        targetCapacity=length;
641
0
    }
642
643
    /* unroll the loop with the most common case */
644
0
    if(targetCapacity>=16) {
645
0
        int32_t count, loops;
646
0
        uint8_t oredChars;
647
648
0
        loops=count=targetCapacity>>4;
649
0
        do {
650
0
            oredChars=*target++=*source++;
651
0
            oredChars|=*target++=*source++;
652
0
            oredChars|=*target++=*source++;
653
0
            oredChars|=*target++=*source++;
654
0
            oredChars|=*target++=*source++;
655
0
            oredChars|=*target++=*source++;
656
0
            oredChars|=*target++=*source++;
657
0
            oredChars|=*target++=*source++;
658
0
            oredChars|=*target++=*source++;
659
0
            oredChars|=*target++=*source++;
660
0
            oredChars|=*target++=*source++;
661
0
            oredChars|=*target++=*source++;
662
0
            oredChars|=*target++=*source++;
663
0
            oredChars|=*target++=*source++;
664
0
            oredChars|=*target++=*source++;
665
0
            oredChars|=*target++=*source++;
666
667
            /* were all 16 entries really valid? */
668
0
            if(oredChars>0x7f) {
669
                /* no, return to the first of these 16 */
670
0
                source-=16;
671
0
                target-=16;
672
0
                break;
673
0
            }
674
0
        } while(--count>0);
675
0
        count=loops-count;
676
0
        targetCapacity-=16*count;
677
0
    }
678
679
    /* conversion loop */
680
0
    c=0;
681
0
    while(targetCapacity>0 && (c=*source)<=0x7f) {
682
0
        ++source;
683
0
        *target++=c;
684
0
        --targetCapacity;
685
0
    }
686
687
0
    if(c>0x7f) {
688
        /* non-ASCII character, handle in standard converter */
689
0
        *pErrorCode=U_USING_DEFAULT_WARNING;
690
0
    } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
691
        /* target is full */
692
0
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
693
0
    }
694
695
    /* write back the updated pointers */
696
0
    pToUArgs->source=(const char *)source;
697
0
    pFromUArgs->target=(char *)target;
698
0
}
699
700
static void U_CALLCONV
701
_ASCIIGetUnicodeSet(const UConverter *cnv,
702
                    const USetAdder *sa,
703
                    UConverterUnicodeSet which,
704
0
                    UErrorCode *pErrorCode) {
705
0
    (void)cnv;
706
0
    (void)which;
707
0
    (void)pErrorCode;
708
0
    sa->addRange(sa->set, 0, 0x7f);
709
0
}
710
U_CDECL_END
711
712
static const UConverterImpl _ASCIIImpl={
713
    UCNV_US_ASCII,
714
715
    NULL,
716
    NULL,
717
718
    NULL,
719
    NULL,
720
    NULL,
721
722
    _ASCIIToUnicodeWithOffsets,
723
    _ASCIIToUnicodeWithOffsets,
724
    _Latin1FromUnicodeWithOffsets,
725
    _Latin1FromUnicodeWithOffsets,
726
    _ASCIIGetNextUChar,
727
728
    NULL,
729
    NULL,
730
    NULL,
731
    NULL,
732
    _ASCIIGetUnicodeSet,
733
734
    NULL,
735
    ucnv_ASCIIFromUTF8
736
};
737
738
static const UConverterStaticData _ASCIIStaticData={
739
    sizeof(UConverterStaticData),
740
    "US-ASCII",
741
    367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
742
    { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
743
    0,
744
    0,
745
    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
746
};
747
748
const UConverterSharedData _ASCIIData=
749
        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl);
750
751
#endif