Coverage Report

Created: 2026-03-12 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/source/common/ucnvlat1.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/* 
4
**********************************************************************
5
*   Copyright (C) 2000-2015, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*   file name:  ucnvlat1.cpp
9
*   encoding:   UTF-8
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
*   created on: 2000feb07
14
*   created by: Markus W. Scherer
15
*/
16
17
#include "unicode/utypes.h"
18
19
#if !UCONFIG_NO_CONVERSION
20
21
#include "unicode/ucnv.h"
22
#include "unicode/uset.h"
23
#include "unicode/utf8.h"
24
#include "ucnv_bld.h"
25
#include "ucnv_cnv.h"
26
#include "ustr_imp.h"
27
28
/* control optimizations according to the platform */
29
#define LATIN1_UNROLL_FROM_UNICODE 1
30
31
/* ISO 8859-1 --------------------------------------------------------------- */
32
33
/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
34
U_CDECL_BEGIN
35
static void U_CALLCONV
36
_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
37
51.6k
                            UErrorCode *pErrorCode) {
38
51.6k
    const uint8_t *source;
39
51.6k
    UChar *target;
40
51.6k
    int32_t targetCapacity, length;
41
51.6k
    int32_t *offsets;
42
43
51.6k
    int32_t sourceIndex;
44
45
    /* set up the local pointers */
46
51.6k
    source=(const uint8_t *)pArgs->source;
47
51.6k
    target=pArgs->target;
48
51.6k
    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
49
51.6k
    offsets=pArgs->offsets;
50
51
51.6k
    sourceIndex=0;
52
53
    /*
54
     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
55
     * for the minimum of the sourceLength and targetCapacity
56
     */
57
51.6k
    length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
58
51.6k
    if(length<=targetCapacity) {
59
4.62k
        targetCapacity=length;
60
47.0k
    } else {
61
        /* target will be full */
62
47.0k
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
63
47.0k
        length=targetCapacity;
64
47.0k
    }
65
66
51.6k
    if(targetCapacity>=8) {
67
        /* This loop is unrolled for speed and improved pipelining. */
68
50.1k
        int32_t count, loops;
69
70
50.1k
        loops=count=targetCapacity>>3;
71
50.1k
        length=targetCapacity&=0x7;
72
6.05M
        do {
73
6.05M
            target[0]=source[0];
74
6.05M
            target[1]=source[1];
75
6.05M
            target[2]=source[2];
76
6.05M
            target[3]=source[3];
77
6.05M
            target[4]=source[4];
78
6.05M
            target[5]=source[5];
79
6.05M
            target[6]=source[6];
80
6.05M
            target[7]=source[7];
81
6.05M
            target+=8;
82
6.05M
            source+=8;
83
6.05M
        } while(--count>0);
84
85
50.1k
        if(offsets!=NULL) {
86
0
            do {
87
0
                offsets[0]=sourceIndex++;
88
0
                offsets[1]=sourceIndex++;
89
0
                offsets[2]=sourceIndex++;
90
0
                offsets[3]=sourceIndex++;
91
0
                offsets[4]=sourceIndex++;
92
0
                offsets[5]=sourceIndex++;
93
0
                offsets[6]=sourceIndex++;
94
0
                offsets[7]=sourceIndex++;
95
0
                offsets+=8;
96
0
            } while(--loops>0);
97
0
        }
98
50.1k
    }
99
100
    /* conversion loop */
101
66.4k
    while(targetCapacity>0) {
102
14.7k
        *target++=*source++;
103
14.7k
        --targetCapacity;
104
14.7k
    }
105
106
    /* write back the updated pointers */
107
51.6k
    pArgs->source=(const char *)source;
108
51.6k
    pArgs->target=target;
109
110
    /* set offsets */
111
51.6k
    if(offsets!=NULL) {
112
0
        while(length>0) {
113
0
            *offsets++=sourceIndex++;
114
0
            --length;
115
0
        }
116
0
        pArgs->offsets=offsets;
117
0
    }
118
51.6k
}
119
120
/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
121
static UChar32 U_CALLCONV
122
_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
123
0
                    UErrorCode *pErrorCode) {
124
0
    const uint8_t *source=(const uint8_t *)pArgs->source;
125
0
    if(source<(const uint8_t *)pArgs->sourceLimit) {
126
0
        pArgs->source=(const char *)(source+1);
127
0
        return *source;
128
0
    }
129
130
    /* no output because of empty input */
131
0
    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
132
0
    return 0xffff;
133
0
}
134
135
/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
136
static void U_CALLCONV
137
_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
138
0
                              UErrorCode *pErrorCode) {
139
0
    UConverter *cnv;
140
0
    const UChar *source, *sourceLimit;
141
0
    uint8_t *target, *oldTarget;
142
0
    int32_t targetCapacity, length;
143
0
    int32_t *offsets;
144
145
0
    UChar32 cp;
146
0
    UChar c, max;
147
148
0
    int32_t sourceIndex;
149
150
    /* set up the local pointers */
151
0
    cnv=pArgs->converter;
152
0
    source=pArgs->source;
153
0
    sourceLimit=pArgs->sourceLimit;
154
0
    target=oldTarget=(uint8_t *)pArgs->target;
155
0
    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
156
0
    offsets=pArgs->offsets;
157
158
0
    if(cnv->sharedData==&_Latin1Data) {
159
0
        max=0xff; /* Latin-1 */
160
0
    } else {
161
0
        max=0x7f; /* US-ASCII */
162
0
    }
163
164
    /* get the converter state from UConverter */
165
0
    cp=cnv->fromUChar32;
166
167
    /* sourceIndex=-1 if the current character began in the previous buffer */
168
0
    sourceIndex= cp==0 ? 0 : -1;
169
170
    /*
171
     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
172
     * for the minimum of the sourceLength and targetCapacity
173
     */
174
0
    length=(int32_t)(sourceLimit-source);
175
0
    if(length<targetCapacity) {
176
0
        targetCapacity=length;
177
0
    }
178
179
    /* conversion loop */
180
0
    if(cp!=0 && targetCapacity>0) {
181
0
        goto getTrail;
182
0
    }
183
184
0
#if LATIN1_UNROLL_FROM_UNICODE
185
    /* unroll the loop with the most common case */
186
0
    if(targetCapacity>=16) {
187
0
        int32_t count, loops;
188
0
        UChar u, oredChars;
189
190
0
        loops=count=targetCapacity>>4;
191
0
        do {
192
0
            oredChars=u=*source++;
193
0
            *target++=(uint8_t)u;
194
0
            oredChars|=u=*source++;
195
0
            *target++=(uint8_t)u;
196
0
            oredChars|=u=*source++;
197
0
            *target++=(uint8_t)u;
198
0
            oredChars|=u=*source++;
199
0
            *target++=(uint8_t)u;
200
0
            oredChars|=u=*source++;
201
0
            *target++=(uint8_t)u;
202
0
            oredChars|=u=*source++;
203
0
            *target++=(uint8_t)u;
204
0
            oredChars|=u=*source++;
205
0
            *target++=(uint8_t)u;
206
0
            oredChars|=u=*source++;
207
0
            *target++=(uint8_t)u;
208
0
            oredChars|=u=*source++;
209
0
            *target++=(uint8_t)u;
210
0
            oredChars|=u=*source++;
211
0
            *target++=(uint8_t)u;
212
0
            oredChars|=u=*source++;
213
0
            *target++=(uint8_t)u;
214
0
            oredChars|=u=*source++;
215
0
            *target++=(uint8_t)u;
216
0
            oredChars|=u=*source++;
217
0
            *target++=(uint8_t)u;
218
0
            oredChars|=u=*source++;
219
0
            *target++=(uint8_t)u;
220
0
            oredChars|=u=*source++;
221
0
            *target++=(uint8_t)u;
222
0
            oredChars|=u=*source++;
223
0
            *target++=(uint8_t)u;
224
225
            /* were all 16 entries really valid? */
226
0
            if(oredChars>max) {
227
                /* no, return to the first of these 16 */
228
0
                source-=16;
229
0
                target-=16;
230
0
                break;
231
0
            }
232
0
        } while(--count>0);
233
0
        count=loops-count;
234
0
        targetCapacity-=16*count;
235
236
0
        if(offsets!=NULL) {
237
0
            oldTarget+=16*count;
238
0
            while(count>0) {
239
0
                *offsets++=sourceIndex++;
240
0
                *offsets++=sourceIndex++;
241
0
                *offsets++=sourceIndex++;
242
0
                *offsets++=sourceIndex++;
243
0
                *offsets++=sourceIndex++;
244
0
                *offsets++=sourceIndex++;
245
0
                *offsets++=sourceIndex++;
246
0
                *offsets++=sourceIndex++;
247
0
                *offsets++=sourceIndex++;
248
0
                *offsets++=sourceIndex++;
249
0
                *offsets++=sourceIndex++;
250
0
                *offsets++=sourceIndex++;
251
0
                *offsets++=sourceIndex++;
252
0
                *offsets++=sourceIndex++;
253
0
                *offsets++=sourceIndex++;
254
0
                *offsets++=sourceIndex++;
255
0
                --count;
256
0
            }
257
0
        }
258
0
    }
259
0
#endif
260
261
    /* conversion loop */
262
0
    c=0;
263
0
    while(targetCapacity>0 && (c=*source++)<=max) {
264
        /* convert the Unicode code point */
265
0
        *target++=(uint8_t)c;
266
0
        --targetCapacity;
267
0
    }
268
269
0
    if(c>max) {
270
0
        cp=c;
271
0
        if(!U_IS_SURROGATE(cp)) {
272
            /* callback(unassigned) */
273
0
        } else if(U_IS_SURROGATE_LEAD(cp)) {
274
0
getTrail:
275
0
            if(source<sourceLimit) {
276
                /* test the following code unit */
277
0
                UChar trail=*source;
278
0
                if(U16_IS_TRAIL(trail)) {
279
0
                    ++source;
280
0
                    cp=U16_GET_SUPPLEMENTARY(cp, trail);
281
                    /* this codepage does not map supplementary code points */
282
                    /* callback(unassigned) */
283
0
                } else {
284
                    /* this is an unmatched lead code unit (1st surrogate) */
285
                    /* callback(illegal) */
286
0
                }
287
0
            } else {
288
                /* no more input */
289
0
                cnv->fromUChar32=cp;
290
0
                goto noMoreInput;
291
0
            }
292
0
        } else {
293
            /* this is an unmatched trail code unit (2nd surrogate) */
294
            /* callback(illegal) */
295
0
        }
296
297
0
        *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
298
0
        cnv->fromUChar32=cp;
299
0
    }
300
0
noMoreInput:
301
302
    /* set offsets since the start */
303
0
    if(offsets!=NULL) {
304
0
        size_t count=target-oldTarget;
305
0
        while(count>0) {
306
0
            *offsets++=sourceIndex++;
307
0
            --count;
308
0
        }
309
0
    }
310
311
0
    if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
312
        /* target is full */
313
0
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
314
0
    }
315
316
    /* write back the updated pointers */
317
0
    pArgs->source=source;
318
0
    pArgs->target=(char *)target;
319
0
    pArgs->offsets=offsets;
320
0
}
321
322
/* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
323
static void U_CALLCONV
324
ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
325
                    UConverterToUnicodeArgs *pToUArgs,
326
0
                    UErrorCode *pErrorCode) {
327
0
    UConverter *utf8;
328
0
    const uint8_t *source, *sourceLimit;
329
0
    uint8_t *target;
330
0
    int32_t targetCapacity;
331
332
0
    UChar32 c;
333
0
    uint8_t b, t1;
334
335
    /* set up the local pointers */
336
0
    utf8=pToUArgs->converter;
337
0
    source=(uint8_t *)pToUArgs->source;
338
0
    sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
339
0
    target=(uint8_t *)pFromUArgs->target;
340
0
    targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
341
342
    /* get the converter state from the UTF-8 UConverter */
343
0
    c=(UChar32)utf8->toUnicodeStatus;
344
0
    if(c!=0 && source<sourceLimit) {
345
0
        if(targetCapacity==0) {
346
0
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
347
0
            return;
348
0
        } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
349
0
            ++source;
350
0
            *target++=(uint8_t)(((c&3)<<6)|t1);
351
0
            --targetCapacity;
352
353
0
            utf8->toUnicodeStatus=0;
354
0
            utf8->toULength=0;
355
0
        } else {
356
            /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
357
0
            *pErrorCode=U_USING_DEFAULT_WARNING;
358
0
            return;
359
0
        }
360
0
    }
361
362
    /*
363
     * Make sure that the last byte sequence before sourceLimit is complete
364
     * or runs into a lead byte.
365
     * In the conversion loop compare source with sourceLimit only once
366
     * per multi-byte character.
367
     * For Latin-1, adjust sourceLimit only for 1 trail byte because
368
     * the conversion loop handles at most 2-byte sequences.
369
     */
370
0
    if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
371
0
        --sourceLimit;
372
0
    }
373
374
    /* conversion loop */
375
0
    while(source<sourceLimit) {
376
0
        if(targetCapacity>0) {
377
0
            b=*source++;
378
0
            if(U8_IS_SINGLE(b)) {
379
                /* convert ASCII */
380
0
                *target++=(uint8_t)b;
381
0
                --targetCapacity;
382
0
            } else if( /* handle U+0080..U+00FF inline */
383
0
                       b>=0xc2 && b<=0xc3 &&
384
0
                       (t1=(uint8_t)(*source-0x80)) <= 0x3f
385
0
            ) {
386
0
                ++source;
387
0
                *target++=(uint8_t)(((b&3)<<6)|t1);
388
0
                --targetCapacity;
389
0
            } else {
390
                /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
391
0
                pToUArgs->source=(char *)(source-1);
392
0
                pFromUArgs->target=(char *)target;
393
0
                *pErrorCode=U_USING_DEFAULT_WARNING;
394
0
                return;
395
0
            }
396
0
        } else {
397
            /* target is full */
398
0
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
399
0
            break;
400
0
        }
401
0
    }
402
403
    /*
404
     * The sourceLimit may have been adjusted before the conversion loop
405
     * to stop before a truncated sequence.
406
     * If so, then collect the truncated sequence now.
407
     * For Latin-1, there is at most exactly one lead byte because of the
408
     * smaller sourceLimit adjustment logic.
409
     */
410
0
    if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
411
0
        utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
412
0
        utf8->toULength=1;
413
0
        utf8->mode=U8_COUNT_BYTES(b);
414
0
    }
415
416
    /* write back the updated pointers */
417
0
    pToUArgs->source=(char *)source;
418
0
    pFromUArgs->target=(char *)target;
419
0
}
420
421
static void U_CALLCONV
422
_Latin1GetUnicodeSet(const UConverter *cnv,
423
                     const USetAdder *sa,
424
                     UConverterUnicodeSet which,
425
0
                     UErrorCode *pErrorCode) {
426
0
    (void)cnv;
427
0
    (void)which;
428
0
    (void)pErrorCode;
429
0
    sa->addRange(sa->set, 0, 0xff);
430
0
}
431
U_CDECL_END
432
433
434
static const UConverterImpl _Latin1Impl={
435
    UCNV_LATIN_1,
436
437
    NULL,
438
    NULL,
439
440
    NULL,
441
    NULL,
442
    NULL,
443
444
    _Latin1ToUnicodeWithOffsets,
445
    _Latin1ToUnicodeWithOffsets,
446
    _Latin1FromUnicodeWithOffsets,
447
    _Latin1FromUnicodeWithOffsets,
448
    _Latin1GetNextUChar,
449
450
    NULL,
451
    NULL,
452
    NULL,
453
    NULL,
454
    _Latin1GetUnicodeSet,
455
456
    NULL,
457
    ucnv_Latin1FromUTF8
458
};
459
460
static const UConverterStaticData _Latin1StaticData={
461
    sizeof(UConverterStaticData),
462
    "ISO-8859-1",
463
    819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
464
    { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
465
    0,
466
    0,
467
    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
468
};
469
470
const UConverterSharedData _Latin1Data=
471
        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl);
472
473
/* US-ASCII ----------------------------------------------------------------- */
474
475
U_CDECL_BEGIN
476
/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
477
static void U_CALLCONV
478
_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
479
0
                           UErrorCode *pErrorCode) {
480
0
    const uint8_t *source, *sourceLimit;
481
0
    UChar *target, *oldTarget;
482
0
    int32_t targetCapacity, length;
483
0
    int32_t *offsets;
484
485
0
    int32_t sourceIndex;
486
487
0
    uint8_t c;
488
489
    /* set up the local pointers */
490
0
    source=(const uint8_t *)pArgs->source;
491
0
    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
492
0
    target=oldTarget=pArgs->target;
493
0
    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
494
0
    offsets=pArgs->offsets;
495
496
    /* sourceIndex=-1 if the current character began in the previous buffer */
497
0
    sourceIndex=0;
498
499
    /*
500
     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
501
     * for the minimum of the sourceLength and targetCapacity
502
     */
503
0
    length=(int32_t)(sourceLimit-source);
504
0
    if(length<targetCapacity) {
505
0
        targetCapacity=length;
506
0
    }
507
508
0
    if(targetCapacity>=8) {
509
        /* This loop is unrolled for speed and improved pipelining. */
510
0
        int32_t count, loops;
511
0
        UChar oredChars;
512
513
0
        loops=count=targetCapacity>>3;
514
0
        do {
515
0
            oredChars=target[0]=source[0];
516
0
            oredChars|=target[1]=source[1];
517
0
            oredChars|=target[2]=source[2];
518
0
            oredChars|=target[3]=source[3];
519
0
            oredChars|=target[4]=source[4];
520
0
            oredChars|=target[5]=source[5];
521
0
            oredChars|=target[6]=source[6];
522
0
            oredChars|=target[7]=source[7];
523
524
            /* were all 16 entries really valid? */
525
0
            if(oredChars>0x7f) {
526
                /* no, return to the first of these 16 */
527
0
                break;
528
0
            }
529
0
            source+=8;
530
0
            target+=8;
531
0
        } while(--count>0);
532
0
        count=loops-count;
533
0
        targetCapacity-=count*8;
534
535
0
        if(offsets!=NULL) {
536
0
            oldTarget+=count*8;
537
0
            while(count>0) {
538
0
                offsets[0]=sourceIndex++;
539
0
                offsets[1]=sourceIndex++;
540
0
                offsets[2]=sourceIndex++;
541
0
                offsets[3]=sourceIndex++;
542
0
                offsets[4]=sourceIndex++;
543
0
                offsets[5]=sourceIndex++;
544
0
                offsets[6]=sourceIndex++;
545
0
                offsets[7]=sourceIndex++;
546
0
                offsets+=8;
547
0
                --count;
548
0
            }
549
0
        }
550
0
    }
551
552
    /* conversion loop */
553
0
    c=0;
554
0
    while(targetCapacity>0 && (c=*source++)<=0x7f) {
555
0
        *target++=c;
556
0
        --targetCapacity;
557
0
    }
558
559
0
    if(c>0x7f) {
560
        /* callback(illegal); copy the current bytes to toUBytes[] */
561
0
        UConverter *cnv=pArgs->converter;
562
0
        cnv->toUBytes[0]=c;
563
0
        cnv->toULength=1;
564
0
        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
565
0
    } else if(source<sourceLimit && target>=pArgs->targetLimit) {
566
        /* target is full */
567
0
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
568
0
    }
569
570
    /* set offsets since the start */
571
0
    if(offsets!=NULL) {
572
0
        size_t count=target-oldTarget;
573
0
        while(count>0) {
574
0
            *offsets++=sourceIndex++;
575
0
            --count;
576
0
        }
577
0
    }
578
579
    /* write back the updated pointers */
580
0
    pArgs->source=(const char *)source;
581
0
    pArgs->target=target;
582
0
    pArgs->offsets=offsets;
583
0
}
584
585
/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
586
static UChar32 U_CALLCONV
587
_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
588
0
                   UErrorCode *pErrorCode) {
589
0
    const uint8_t *source;
590
0
    uint8_t b;
591
592
0
    source=(const uint8_t *)pArgs->source;
593
0
    if(source<(const uint8_t *)pArgs->sourceLimit) {
594
0
        b=*source++;
595
0
        pArgs->source=(const char *)source;
596
0
        if(b<=0x7f) {
597
0
            return b;
598
0
        } else {
599
0
            UConverter *cnv=pArgs->converter;
600
0
            cnv->toUBytes[0]=b;
601
0
            cnv->toULength=1;
602
0
            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
603
0
            return 0xffff;
604
0
        }
605
0
    }
606
607
    /* no output because of empty input */
608
0
    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
609
0
    return 0xffff;
610
0
}
611
612
/* "Convert" UTF-8 to US-ASCII: Validate and copy. */
613
static void U_CALLCONV
614
ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
615
                   UConverterToUnicodeArgs *pToUArgs,
616
0
                   UErrorCode *pErrorCode) {
617
0
    const uint8_t *source, *sourceLimit;
618
0
    uint8_t *target;
619
0
    int32_t targetCapacity, length;
620
621
0
    uint8_t c;
622
623
0
    if(pToUArgs->converter->toUnicodeStatus!=0) {
624
        /* no handling of partial UTF-8 characters here, fall back to pivoting */
625
0
        *pErrorCode=U_USING_DEFAULT_WARNING;
626
0
        return;
627
0
    }
628
629
    /* set up the local pointers */
630
0
    source=(const uint8_t *)pToUArgs->source;
631
0
    sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
632
0
    target=(uint8_t *)pFromUArgs->target;
633
0
    targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
634
635
    /*
636
     * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
637
     * for the minimum of the sourceLength and targetCapacity
638
     */
639
0
    length=(int32_t)(sourceLimit-source);
640
0
    if(length<targetCapacity) {
641
0
        targetCapacity=length;
642
0
    }
643
644
    /* unroll the loop with the most common case */
645
0
    if(targetCapacity>=16) {
646
0
        int32_t count, loops;
647
0
        uint8_t oredChars;
648
649
0
        loops=count=targetCapacity>>4;
650
0
        do {
651
0
            oredChars=*target++=*source++;
652
0
            oredChars|=*target++=*source++;
653
0
            oredChars|=*target++=*source++;
654
0
            oredChars|=*target++=*source++;
655
0
            oredChars|=*target++=*source++;
656
0
            oredChars|=*target++=*source++;
657
0
            oredChars|=*target++=*source++;
658
0
            oredChars|=*target++=*source++;
659
0
            oredChars|=*target++=*source++;
660
0
            oredChars|=*target++=*source++;
661
0
            oredChars|=*target++=*source++;
662
0
            oredChars|=*target++=*source++;
663
0
            oredChars|=*target++=*source++;
664
0
            oredChars|=*target++=*source++;
665
0
            oredChars|=*target++=*source++;
666
0
            oredChars|=*target++=*source++;
667
668
            /* were all 16 entries really valid? */
669
0
            if(oredChars>0x7f) {
670
                /* no, return to the first of these 16 */
671
0
                source-=16;
672
0
                target-=16;
673
0
                break;
674
0
            }
675
0
        } while(--count>0);
676
0
        count=loops-count;
677
0
        targetCapacity-=16*count;
678
0
    }
679
680
    /* conversion loop */
681
0
    c=0;
682
0
    while(targetCapacity>0 && (c=*source)<=0x7f) {
683
0
        ++source;
684
0
        *target++=c;
685
0
        --targetCapacity;
686
0
    }
687
688
0
    if(c>0x7f) {
689
        /* non-ASCII character, handle in standard converter */
690
0
        *pErrorCode=U_USING_DEFAULT_WARNING;
691
0
    } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
692
        /* target is full */
693
0
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
694
0
    }
695
696
    /* write back the updated pointers */
697
0
    pToUArgs->source=(const char *)source;
698
0
    pFromUArgs->target=(char *)target;
699
0
}
700
701
static void U_CALLCONV
702
_ASCIIGetUnicodeSet(const UConverter *cnv,
703
                    const USetAdder *sa,
704
                    UConverterUnicodeSet which,
705
0
                    UErrorCode *pErrorCode) {
706
0
    (void)cnv;
707
0
    (void)which;
708
0
    (void)pErrorCode;
709
0
    sa->addRange(sa->set, 0, 0x7f);
710
0
}
711
U_CDECL_END
712
713
static const UConverterImpl _ASCIIImpl={
714
    UCNV_US_ASCII,
715
716
    NULL,
717
    NULL,
718
719
    NULL,
720
    NULL,
721
    NULL,
722
723
    _ASCIIToUnicodeWithOffsets,
724
    _ASCIIToUnicodeWithOffsets,
725
    _Latin1FromUnicodeWithOffsets,
726
    _Latin1FromUnicodeWithOffsets,
727
    _ASCIIGetNextUChar,
728
729
    NULL,
730
    NULL,
731
    NULL,
732
    NULL,
733
    _ASCIIGetUnicodeSet,
734
735
    NULL,
736
    ucnv_ASCIIFromUTF8
737
};
738
739
static const UConverterStaticData _ASCIIStaticData={
740
    sizeof(UConverterStaticData),
741
    "US-ASCII",
742
    367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
743
    { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
744
    0,
745
    0,
746
    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
747
};
748
749
const UConverterSharedData _ASCIIData=
750
        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl);
751
752
#endif