Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/uinvchar.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 1999-2010, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  uinvchar.c
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:2
14
*
15
*   created on: 2004sep14
16
*   created by: Markus W. Scherer
17
*
18
*   Functions for handling invariant characters, moved here from putil.c
19
*   for better modularization.
20
*/
21
22
#include "unicode/utypes.h"
23
#include "unicode/ustring.h"
24
#include "udataswp.h"
25
#include "cstring.h"
26
#include "cmemory.h"
27
#include "uassert.h"
28
#include "uinvchar.h"
29
30
/* invariant-character handling --------------------------------------------- */
31
32
/*
33
 * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
34
 * appropriately for most EBCDIC codepages.
35
 *
36
 * They currently also map most other ASCII graphic characters,
37
 * appropriately for codepages 37 and 1047.
38
 * Exceptions: The characters for []^ have different codes in 37 & 1047.
39
 * Both versions are mapped to ASCII.
40
 *
41
 *    ASCII 37 1047
42
 * [     5B BA   AD
43
 * ]     5D BB   BD
44
 * ^     5E B0   5F
45
 *
46
 * There are no mappings for variant characters from Unicode to EBCDIC.
47
 *
48
 * Currently, C0 control codes are also included in these maps.
49
 * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
50
 * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
51
 * but there is no mapping for ASCII LF back to EBCDIC.
52
 *
53
 *    ASCII EBCDIC S/390-OE
54
 * LF    0A     25       15
55
 * NEL   85     15       25
56
 *
57
 * The maps below explicitly exclude the variant
58
 * control and graphical characters that are in ASCII-based
59
 * codepages at 0x80 and above.
60
 * "No mapping" is expressed by mapping to a 00 byte.
61
 *
62
 * These tables do not establish a converter or a codepage.
63
 */
64
65
static const uint8_t asciiFromEbcdic[256]={
66
    0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
67
    0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
68
    0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
69
    0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
70
71
    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
72
    0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
73
    0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
74
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
75
76
    0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
77
    0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
78
    0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
79
    0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
80
81
    0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
82
    0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
83
    0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
84
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
85
};
86
87
static const uint8_t ebcdicFromAscii[256]={
88
    0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
89
    0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
90
    0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
91
    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
92
93
    0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
94
    0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
95
    0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
96
    0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
97
98
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102
103
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
107
};
108
109
/* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
110
static const uint8_t lowercaseAsciiFromEbcdic[256]={
111
    0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
112
    0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
113
    0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
114
    0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
115
116
    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
117
    0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
118
    0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
119
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
120
121
    0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
122
    0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
123
    0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
124
    0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
125
126
    0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
127
    0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
128
    0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
129
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
130
};
131
132
/*
133
 * Bit sets indicating which characters of the ASCII repertoire
134
 * (by ASCII/Unicode code) are "invariant".
135
 * See utypes.h for more details.
136
 *
137
 * As invariant are considered the characters of the ASCII repertoire except
138
 * for the following:
139
 * 21  '!' <exclamation mark>
140
 * 23  '#' <number sign>
141
 * 24  '$' <dollar sign>
142
 *
143
 * 40  '@' <commercial at>
144
 *
145
 * 5b  '[' <left bracket>
146
 * 5c  '\' <backslash>
147
 * 5d  ']' <right bracket>
148
 * 5e  '^' <circumflex>
149
 *
150
 * 60  '`' <grave accent>
151
 *
152
 * 7b  '{' <left brace>
153
 * 7c  '|' <vertical line>
154
 * 7d  '}' <right brace>
155
 * 7e  '~' <tilde>
156
 */
157
static const uint32_t invariantChars[4]={
158
    0xfffffbff, /* 00..1f but not 0a */
159
    0xffffffe5, /* 20..3f but not 21 23 24 */
160
    0x87fffffe, /* 40..5f but not 40 5b..5e */
161
    0x87fffffe  /* 60..7f but not 60 7b..7e */
162
};
163
164
/*
165
 * test unsigned types (or values known to be non-negative) for invariant characters,
166
 * tests ASCII-family character values
167
 */
168
0
#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
169
170
/* test signed types for invariant characters, adds test for positive values */
171
#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
172
173
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
174
0
#define CHAR_TO_UCHAR(c) c
175
0
#define UCHAR_TO_CHAR(c) c
176
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
177
#define CHAR_TO_UCHAR(u) asciiFromEbcdic[u]
178
#define UCHAR_TO_CHAR(u) ebcdicFromAscii[u]
179
#else
180
#   error U_CHARSET_FAMILY is not valid
181
#endif
182
183
184
U_CAPI void U_EXPORT2
185
0
u_charsToUChars(const char *cs, UChar *us, int32_t length) {
186
0
    UChar u;
187
0
    uint8_t c;
188
189
    /*
190
     * Allow the entire ASCII repertoire to be mapped _to_ Unicode.
191
     * For EBCDIC systems, this works for characters with codes from
192
     * codepages 37 and 1047 or compatible.
193
     */
194
0
    while(length>0) {
195
0
        c=(uint8_t)(*cs++);
196
0
        u=(UChar)CHAR_TO_UCHAR(c);
197
0
        U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */
198
0
        *us++=u;
199
0
        --length;
200
0
    }
201
0
}
202
203
U_CAPI void U_EXPORT2
204
0
u_UCharsToChars(const UChar *us, char *cs, int32_t length) {
205
0
    UChar u;
206
207
0
    while(length>0) {
208
0
        u=*us++;
209
0
        if(!UCHAR_IS_INVARIANT(u)) {
210
0
            U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */
211
0
            u=0;
212
0
        }
213
0
        *cs++=(char)UCHAR_TO_CHAR(u);
214
0
        --length;
215
0
    }
216
0
}
217
218
U_CAPI UBool U_EXPORT2
219
0
uprv_isInvariantString(const char *s, int32_t length) {
220
0
    uint8_t c;
221
222
0
    for(;;) {
223
0
        if(length<0) {
224
            /* NUL-terminated */
225
0
            c=(uint8_t)*s++;
226
0
            if(c==0) {
227
0
                break;
228
0
            }
229
0
        } else {
230
            /* count length */
231
0
            if(length==0) {
232
0
                break;
233
0
            }
234
0
            --length;
235
0
            c=(uint8_t)*s++;
236
0
            if(c==0) {
237
0
                continue; /* NUL is invariant */
238
0
            }
239
0
        }
240
        /* c!=0 now, one branch below checks c==0 for variant characters */
241
242
        /*
243
         * no assertions here because these functions are legitimately called
244
         * for strings with variant characters
245
         */
246
0
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
247
0
        if(!UCHAR_IS_INVARIANT(c)) {
248
0
            return FALSE; /* found a variant char */
249
0
        }
250
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
251
        c=CHAR_TO_UCHAR(c);
252
        if(c==0 || !UCHAR_IS_INVARIANT(c)) {
253
            return FALSE; /* found a variant char */
254
        }
255
#else
256
#   error U_CHARSET_FAMILY is not valid
257
#endif
258
0
    }
259
0
    return TRUE;
260
0
}
261
262
U_CAPI UBool U_EXPORT2
263
0
uprv_isInvariantUString(const UChar *s, int32_t length) {
264
0
    UChar c;
265
266
0
    for(;;) {
267
0
        if(length<0) {
268
            /* NUL-terminated */
269
0
            c=*s++;
270
0
            if(c==0) {
271
0
                break;
272
0
            }
273
0
        } else {
274
            /* count length */
275
0
            if(length==0) {
276
0
                break;
277
0
            }
278
0
            --length;
279
0
            c=*s++;
280
0
        }
281
282
        /*
283
         * no assertions here because these functions are legitimately called
284
         * for strings with variant characters
285
         */
286
0
        if(!UCHAR_IS_INVARIANT(c)) {
287
0
            return FALSE; /* found a variant char */
288
0
        }
289
0
    }
290
0
    return TRUE;
291
0
}
292
293
/* UDataSwapFn implementations used in udataswp.c ------- */
294
295
/* convert ASCII to EBCDIC and verify that all characters are invariant */
296
U_CAPI int32_t U_EXPORT2
297
uprv_ebcdicFromAscii(const UDataSwapper *ds,
298
                     const void *inData, int32_t length, void *outData,
299
0
                     UErrorCode *pErrorCode) {
300
0
    const uint8_t *s;
301
0
    uint8_t *t;
302
0
    uint8_t c;
303
304
0
    int32_t count;
305
306
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
307
0
        return 0;
308
0
    }
309
0
    if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
310
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
311
0
        return 0;
312
0
    }
313
314
    /* setup and swapping */
315
0
    s=(const uint8_t *)inData;
316
0
    t=(uint8_t *)outData;
317
0
    count=length;
318
0
    while(count>0) {
319
0
        c=*s++;
320
0
        if(!UCHAR_IS_INVARIANT(c)) {
321
0
            udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
322
0
                             length, length-count);
323
0
            *pErrorCode=U_INVALID_CHAR_FOUND;
324
0
            return 0;
325
0
        }
326
0
        *t++=ebcdicFromAscii[c];
327
0
        --count;
328
0
    }
329
330
0
    return length;
331
0
}
332
333
/* this function only checks and copies ASCII strings without conversion */
334
U_CFUNC int32_t
335
uprv_copyAscii(const UDataSwapper *ds,
336
               const void *inData, int32_t length, void *outData,
337
0
               UErrorCode *pErrorCode) {
338
0
    const uint8_t *s;
339
0
    uint8_t c;
340
341
0
    int32_t count;
342
343
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
344
0
        return 0;
345
0
    }
346
0
    if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
347
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
348
0
        return 0;
349
0
    }
350
351
    /* setup and checking */
352
0
    s=(const uint8_t *)inData;
353
0
    count=length;
354
0
    while(count>0) {
355
0
        c=*s++;
356
0
        if(!UCHAR_IS_INVARIANT(c)) {
357
0
            udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
358
0
                             length, length-count);
359
0
            *pErrorCode=U_INVALID_CHAR_FOUND;
360
0
            return 0;
361
0
        }
362
0
        --count;
363
0
    }
364
365
0
    if(length>0 && inData!=outData) {
366
0
        uprv_memcpy(outData, inData, length);
367
0
    }
368
369
0
    return length;
370
0
}
371
372
/* convert EBCDIC to ASCII and verify that all characters are invariant */
373
U_CFUNC int32_t
374
uprv_asciiFromEbcdic(const UDataSwapper *ds,
375
                     const void *inData, int32_t length, void *outData,
376
0
                     UErrorCode *pErrorCode) {
377
0
    const uint8_t *s;
378
0
    uint8_t *t;
379
0
    uint8_t c;
380
381
0
    int32_t count;
382
383
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
384
0
        return 0;
385
0
    }
386
0
    if(ds==NULL || inData==NULL || length<0 ||  (length>0 && outData==NULL)) {
387
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
388
0
        return 0;
389
0
    }
390
391
    /* setup and swapping */
392
0
    s=(const uint8_t *)inData;
393
0
    t=(uint8_t *)outData;
394
0
    count=length;
395
0
    while(count>0) {
396
0
        c=*s++;
397
0
        if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
398
0
            udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
399
0
                             length, length-count);
400
0
            *pErrorCode=U_INVALID_CHAR_FOUND;
401
0
            return 0;
402
0
        }
403
0
        *t++=c;
404
0
        --count;
405
0
    }
406
407
0
    return length;
408
0
}
409
410
/* this function only checks and copies EBCDIC strings without conversion */
411
U_CFUNC int32_t
412
uprv_copyEbcdic(const UDataSwapper *ds,
413
                const void *inData, int32_t length, void *outData,
414
0
                UErrorCode *pErrorCode) {
415
0
    const uint8_t *s;
416
0
    uint8_t c;
417
418
0
    int32_t count;
419
420
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
421
0
        return 0;
422
0
    }
423
0
    if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
424
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
425
0
        return 0;
426
0
    }
427
428
    /* setup and checking */
429
0
    s=(const uint8_t *)inData;
430
0
    count=length;
431
0
    while(count>0) {
432
0
        c=*s++;
433
0
        if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
434
0
            udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
435
0
                             length, length-count);
436
0
            *pErrorCode=U_INVALID_CHAR_FOUND;
437
0
            return 0;
438
0
        }
439
0
        --count;
440
0
    }
441
442
0
    if(length>0 && inData!=outData) {
443
0
        uprv_memcpy(outData, inData, length);
444
0
    }
445
446
0
    return length;
447
0
}
448
449
U_CFUNC UBool
450
0
uprv_isEbcdicAtSign(char c) {
451
0
    static const uint8_t ebcdicAtSigns[] = {
452
0
        0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
453
0
    return c != 0 && uprv_strchr((const char *)ebcdicAtSigns, c) != nullptr;
454
0
}
455
456
/* compare invariant strings; variant characters compare less than others and unlike each other */
457
U_CFUNC int32_t
458
uprv_compareInvAscii(const UDataSwapper *ds,
459
                     const char *outString, int32_t outLength,
460
0
                     const UChar *localString, int32_t localLength) {
461
0
    (void)ds;
462
0
    int32_t minLength;
463
0
    UChar32 c1, c2;
464
0
    uint8_t c;
465
466
0
    if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
467
0
        return 0;
468
0
    }
469
470
0
    if(outLength<0) {
471
0
        outLength=(int32_t)uprv_strlen(outString);
472
0
    }
473
0
    if(localLength<0) {
474
0
        localLength=u_strlen(localString);
475
0
    }
476
477
0
    minLength= outLength<localLength ? outLength : localLength;
478
479
0
    while(minLength>0) {
480
0
        c=(uint8_t)*outString++;
481
0
        if(UCHAR_IS_INVARIANT(c)) {
482
0
            c1=c;
483
0
        } else {
484
0
            c1=-1;
485
0
        }
486
487
0
        c2=*localString++;
488
0
        if(!UCHAR_IS_INVARIANT(c2)) {
489
0
            c2=-2;
490
0
        }
491
492
0
        if((c1-=c2)!=0) {
493
0
            return c1;
494
0
        }
495
496
0
        --minLength;
497
0
    }
498
499
    /* strings start with same prefix, compare lengths */
500
0
    return outLength-localLength;
501
0
}
502
503
U_CFUNC int32_t
504
uprv_compareInvEbcdic(const UDataSwapper *ds,
505
                      const char *outString, int32_t outLength,
506
0
                      const UChar *localString, int32_t localLength) {
507
0
    (void)ds;
508
0
    int32_t minLength;
509
0
    UChar32 c1, c2;
510
0
    uint8_t c;
511
512
0
    if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
513
0
        return 0;
514
0
    }
515
516
0
    if(outLength<0) {
517
0
        outLength=(int32_t)uprv_strlen(outString);
518
0
    }
519
0
    if(localLength<0) {
520
0
        localLength=u_strlen(localString);
521
0
    }
522
523
0
    minLength= outLength<localLength ? outLength : localLength;
524
525
0
    while(minLength>0) {
526
0
        c=(uint8_t)*outString++;
527
0
        if(c==0) {
528
0
            c1=0;
529
0
        } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
530
            /* c1 is set */
531
0
        } else {
532
0
            c1=-1;
533
0
        }
534
535
0
        c2=*localString++;
536
0
        if(!UCHAR_IS_INVARIANT(c2)) {
537
0
            c2=-2;
538
0
        }
539
540
0
        if((c1-=c2)!=0) {
541
0
            return c1;
542
0
        }
543
544
0
        --minLength;
545
0
    }
546
547
    /* strings start with same prefix, compare lengths */
548
0
    return outLength-localLength;
549
0
}
550
551
U_CAPI int32_t U_EXPORT2
552
0
uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
553
0
    int32_t c1, c2;
554
555
0
    for(;; ++s1, ++s2) {
556
0
        c1=(uint8_t)*s1;
557
0
        c2=(uint8_t)*s2;
558
0
        if(c1!=c2) {
559
0
            if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) {
560
0
                c1=-(int32_t)(uint8_t)*s1;
561
0
            }
562
0
            if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) {
563
0
                c2=-(int32_t)(uint8_t)*s2;
564
0
            }
565
0
            return c1-c2;
566
0
        } else if(c1==0) {
567
0
            return 0;
568
0
        }
569
0
    }
570
0
}
571
572
U_CAPI char U_EXPORT2
573
0
uprv_ebcdicToAscii(char c) {
574
0
    return (char)asciiFromEbcdic[(uint8_t)c];
575
0
}
576
577
U_CAPI char U_EXPORT2
578
0
uprv_ebcdicToLowercaseAscii(char c) {
579
0
    return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
580
0
}
581
582
U_CAPI uint8_t* U_EXPORT2
583
uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
584
0
{
585
0
  uint8_t *orig_dst = dst;
586
587
0
  if(n==-1) { 
588
0
    n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
589
0
  }
590
  /* copy non-null */
591
0
  while(*src && n>0) {
592
0
    *(dst++) = asciiFromEbcdic[*(src++)];
593
0
    n--;
594
0
  }
595
  /* pad */
596
0
  while(n>0) {
597
0
    *(dst++) = 0;
598
0
    n--;
599
0
  }
600
0
  return orig_dst;
601
0
}
602
603
U_CAPI uint8_t* U_EXPORT2
604
uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
605
0
{
606
0
  uint8_t *orig_dst = dst;
607
608
0
  if(n==-1) { 
609
0
    n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
610
0
  }
611
  /* copy non-null */
612
0
  while(*src && n>0) {
613
0
    char ch = ebcdicFromAscii[*(src++)];
614
0
    if(ch == 0) {
615
0
      ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */
616
0
    }
617
0
    *(dst++) = ch;
618
0
    n--;
619
0
  }
620
  /* pad */
621
0
  while(n>0) {
622
0
    *(dst++) = 0;
623
0
    n--;
624
0
  }
625
0
  return orig_dst;
626
0
}
627