Coverage Report

Created: 2023-06-07 07:17

/src/icu/source/common/uinvchar.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 1999-2010, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  uinvchar.c
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:2
14
*
15
*   created on: 2004sep14
16
*   created by: Markus W. Scherer
17
*
18
*   Functions for handling invariant characters, moved here from putil.c
19
*   for better modularization.
20
*/
21
22
#include "unicode/utypes.h"
23
#include "unicode/ustring.h"
24
#include "udataswp.h"
25
#include "cstring.h"
26
#include "cmemory.h"
27
#include "uassert.h"
28
#include "uinvchar.h"
29
30
/* invariant-character handling --------------------------------------------- */
31
32
/*
33
 * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
34
 * appropriately for most EBCDIC codepages.
35
 *
36
 * They currently also map most other ASCII graphic characters,
37
 * appropriately for codepages 37 and 1047.
38
 * Exceptions: The characters for []^ have different codes in 37 & 1047.
39
 * Both versions are mapped to ASCII.
40
 *
41
 *    ASCII 37 1047
42
 * [     5B BA   AD
43
 * ]     5D BB   BD
44
 * ^     5E B0   5F
45
 *
46
 * There are no mappings for variant characters from Unicode to EBCDIC.
47
 *
48
 * Currently, C0 control codes are also included in these maps.
49
 * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
50
 * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
51
 * but there is no mapping for ASCII LF back to EBCDIC.
52
 *
53
 *    ASCII EBCDIC S/390-OE
54
 * LF    0A     25       15
55
 * NEL   85     15       25
56
 *
57
 * The maps below explicitly exclude the variant
58
 * control and graphical characters that are in ASCII-based
59
 * codepages at 0x80 and above.
60
 * "No mapping" is expressed by mapping to a 00 byte.
61
 *
62
 * These tables do not establish a converter or a codepage.
63
 */
64
65
static const uint8_t asciiFromEbcdic[256]={
66
    0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
67
    0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
68
    0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
69
    0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
70
71
    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
72
    0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
73
    0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
74
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
75
76
    0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
77
    0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
78
    0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
79
    0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
80
81
    0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
82
    0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
83
    0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
84
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
85
};
86
87
static const uint8_t ebcdicFromAscii[256]={
88
    0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
89
    0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
90
    0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
91
    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
92
93
    0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
94
    0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
95
    0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
96
    0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
97
98
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102
103
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
107
};
108
109
/* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
110
static const uint8_t lowercaseAsciiFromEbcdic[256]={
111
    0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
112
    0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
113
    0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
114
    0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
115
116
    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
117
    0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
118
    0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
119
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
120
121
    0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
122
    0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
123
    0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
124
    0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
125
126
    0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
127
    0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
128
    0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
129
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
130
};
131
132
/*
133
 * Bit sets indicating which characters of the ASCII repertoire
134
 * (by ASCII/Unicode code) are "invariant".
135
 * See utypes.h for more details.
136
 *
137
 * As invariant are considered the characters of the ASCII repertoire except
138
 * for the following:
139
 * 21  '!' <exclamation mark>
140
 * 23  '#' <number sign>
141
 * 24  '$' <dollar sign>
142
 *
143
 * 40  '@' <commercial at>
144
 *
145
 * 5b  '[' <left bracket>
146
 * 5c  '\' <backslash>
147
 * 5d  ']' <right bracket>
148
 * 5e  '^' <circumflex>
149
 *
150
 * 60  '`' <grave accent>
151
 *
152
 * 7b  '{' <left brace>
153
 * 7c  '|' <vertical line>
154
 * 7d  '}' <right brace>
155
 * 7e  '~' <tilde>
156
 */
157
static const uint32_t invariantChars[4]={
158
    0xfffffbff, /* 00..1f but not 0a */
159
    0xffffffe5, /* 20..3f but not 21 23 24 */
160
    0x87fffffe, /* 40..5f but not 40 5b..5e */
161
    0x87fffffe  /* 60..7f but not 60 7b..7e */
162
};
163
164
/*
165
 * test unsigned types (or values known to be non-negative) for invariant characters,
166
 * tests ASCII-family character values
167
 */
168
0
#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
169
170
/* test signed types for invariant characters, adds test for positive values */
171
#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
172
173
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
174
0
#define CHAR_TO_UCHAR(c) c
175
0
#define UCHAR_TO_CHAR(c) c
176
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
177
#define CHAR_TO_UCHAR(u) asciiFromEbcdic[u]
178
#define UCHAR_TO_CHAR(u) ebcdicFromAscii[u]
179
#else
180
#   error U_CHARSET_FAMILY is not valid
181
#endif
182
183
184
U_CAPI void U_EXPORT2
185
0
u_charsToUChars(const char *cs, UChar *us, int32_t length) {
186
0
    UChar u;
187
0
    uint8_t c;
188
189
    /*
190
     * Allow the entire ASCII repertoire to be mapped _to_ Unicode.
191
     * For EBCDIC systems, this works for characters with codes from
192
     * codepages 37 and 1047 or compatible.
193
     */
194
0
    while(length>0) {
195
0
        c=(uint8_t)(*cs++);
196
0
        u=(UChar)CHAR_TO_UCHAR(c);
197
0
        U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */
198
0
        *us++=u;
199
0
        --length;
200
0
    }
201
0
}
202
203
U_CAPI void U_EXPORT2
204
0
u_UCharsToChars(const UChar *us, char *cs, int32_t length) {
205
0
    UChar u;
206
207
0
    while(length>0) {
208
0
        u=*us++;
209
0
        if(!UCHAR_IS_INVARIANT(u)) {
210
0
            U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */
211
0
            u=0;
212
0
        }
213
0
        *cs++=(char)UCHAR_TO_CHAR(u);
214
0
        --length;
215
0
    }
216
0
}
217
218
U_CAPI UBool U_EXPORT2
219
0
uprv_isInvariantString(const char *s, int32_t length) {
220
0
    uint8_t c;
221
222
0
    for(;;) {
223
0
        if(length<0) {
224
            /* NUL-terminated */
225
0
            c=(uint8_t)*s++;
226
0
            if(c==0) {
227
0
                break;
228
0
            }
229
0
        } else {
230
            /* count length */
231
0
            if(length==0) {
232
0
                break;
233
0
            }
234
0
            --length;
235
0
            c=(uint8_t)*s++;
236
0
            if(c==0) {
237
0
                continue; /* NUL is invariant */
238
0
            }
239
0
        }
240
        /* c!=0 now, one branch below checks c==0 for variant characters */
241
242
        /*
243
         * no assertions here because these functions are legitimately called
244
         * for strings with variant characters
245
         */
246
0
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
247
0
        if(!UCHAR_IS_INVARIANT(c)) {
248
0
            return FALSE; /* found a variant char */
249
0
        }
250
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
251
        c=CHAR_TO_UCHAR(c);
252
        if(c==0 || !UCHAR_IS_INVARIANT(c)) {
253
            return FALSE; /* found a variant char */
254
        }
255
#else
256
#   error U_CHARSET_FAMILY is not valid
257
#endif
258
0
    }
259
0
    return TRUE;
260
0
}
261
262
U_CAPI UBool U_EXPORT2
263
0
uprv_isInvariantUString(const UChar *s, int32_t length) {
264
0
    UChar c;
265
266
0
    for(;;) {
267
0
        if(length<0) {
268
            /* NUL-terminated */
269
0
            c=*s++;
270
0
            if(c==0) {
271
0
                break;
272
0
            }
273
0
        } else {
274
            /* count length */
275
0
            if(length==0) {
276
0
                break;
277
0
            }
278
0
            --length;
279
0
            c=*s++;
280
0
        }
281
282
        /*
283
         * no assertions here because these functions are legitimately called
284
         * for strings with variant characters
285
         */
286
0
        if(!UCHAR_IS_INVARIANT(c)) {
287
0
            return FALSE; /* found a variant char */
288
0
        }
289
0
    }
290
0
    return TRUE;
291
0
}
292
293
/* UDataSwapFn implementations used in udataswp.c ------- */
294
295
/* convert ASCII to EBCDIC and verify that all characters are invariant */
296
U_CAPI int32_t U_EXPORT2
297
uprv_ebcdicFromAscii(const UDataSwapper *ds,
298
                     const void *inData, int32_t length, void *outData,
299
0
                     UErrorCode *pErrorCode) {
300
0
    const uint8_t *s;
301
0
    uint8_t *t;
302
0
    uint8_t c;
303
304
0
    int32_t count;
305
306
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
307
0
        return 0;
308
0
    }
309
0
    if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
310
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
311
0
        return 0;
312
0
    }
313
314
    /* setup and swapping */
315
0
    s=(const uint8_t *)inData;
316
0
    t=(uint8_t *)outData;
317
0
    count=length;
318
0
    while(count>0) {
319
0
        c=*s++;
320
0
        if(!UCHAR_IS_INVARIANT(c)) {
321
0
            udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
322
0
                             length, length-count);
323
0
            *pErrorCode=U_INVALID_CHAR_FOUND;
324
0
            return 0;
325
0
        }
326
0
        *t++=ebcdicFromAscii[c];
327
0
        --count;
328
0
    }
329
330
0
    return length;
331
0
}
332
333
/* this function only checks and copies ASCII strings without conversion */
334
U_CFUNC int32_t
335
uprv_copyAscii(const UDataSwapper *ds,
336
               const void *inData, int32_t length, void *outData,
337
0
               UErrorCode *pErrorCode) {
338
0
    const uint8_t *s;
339
0
    uint8_t c;
340
341
0
    int32_t count;
342
343
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
344
0
        return 0;
345
0
    }
346
0
    if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
347
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
348
0
        return 0;
349
0
    }
350
351
    /* setup and checking */
352
0
    s=(const uint8_t *)inData;
353
0
    count=length;
354
0
    while(count>0) {
355
0
        c=*s++;
356
0
        if(!UCHAR_IS_INVARIANT(c)) {
357
0
            udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
358
0
                             length, length-count);
359
0
            *pErrorCode=U_INVALID_CHAR_FOUND;
360
0
            return 0;
361
0
        }
362
0
        --count;
363
0
    }
364
365
0
    if(length>0 && inData!=outData) {
366
0
        uprv_memcpy(outData, inData, length);
367
0
    }
368
369
0
    return length;
370
0
}
371
372
/* convert EBCDIC to ASCII and verify that all characters are invariant */
373
U_CFUNC int32_t
374
uprv_asciiFromEbcdic(const UDataSwapper *ds,
375
                     const void *inData, int32_t length, void *outData,
376
0
                     UErrorCode *pErrorCode) {
377
0
    const uint8_t *s;
378
0
    uint8_t *t;
379
0
    uint8_t c;
380
381
0
    int32_t count;
382
383
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
384
0
        return 0;
385
0
    }
386
0
    if(ds==NULL || inData==NULL || length<0 ||  (length>0 && outData==NULL)) {
387
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
388
0
        return 0;
389
0
    }
390
391
    /* setup and swapping */
392
0
    s=(const uint8_t *)inData;
393
0
    t=(uint8_t *)outData;
394
0
    count=length;
395
0
    while(count>0) {
396
0
        c=*s++;
397
0
        if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
398
0
            udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
399
0
                             length, length-count);
400
0
            *pErrorCode=U_INVALID_CHAR_FOUND;
401
0
            return 0;
402
0
        }
403
0
        *t++=c;
404
0
        --count;
405
0
    }
406
407
0
    return length;
408
0
}
409
410
/* this function only checks and copies EBCDIC strings without conversion */
411
U_CFUNC int32_t
412
uprv_copyEbcdic(const UDataSwapper *ds,
413
                const void *inData, int32_t length, void *outData,
414
0
                UErrorCode *pErrorCode) {
415
0
    const uint8_t *s;
416
0
    uint8_t c;
417
418
0
    int32_t count;
419
420
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
421
0
        return 0;
422
0
    }
423
0
    if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
424
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
425
0
        return 0;
426
0
    }
427
428
    /* setup and checking */
429
0
    s=(const uint8_t *)inData;
430
0
    count=length;
431
0
    while(count>0) {
432
0
        c=*s++;
433
0
        if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
434
0
            udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
435
0
                             length, length-count);
436
0
            *pErrorCode=U_INVALID_CHAR_FOUND;
437
0
            return 0;
438
0
        }
439
0
        --count;
440
0
    }
441
442
0
    if(length>0 && inData!=outData) {
443
0
        uprv_memcpy(outData, inData, length);
444
0
    }
445
446
0
    return length;
447
0
}
448
449
/* compare invariant strings; variant characters compare less than others and unlike each other */
450
U_CFUNC int32_t
451
uprv_compareInvAscii(const UDataSwapper *ds,
452
                     const char *outString, int32_t outLength,
453
0
                     const UChar *localString, int32_t localLength) {
454
0
    (void)ds;
455
0
    int32_t minLength;
456
0
    UChar32 c1, c2;
457
0
    uint8_t c;
458
459
0
    if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
460
0
        return 0;
461
0
    }
462
463
0
    if(outLength<0) {
464
0
        outLength=(int32_t)uprv_strlen(outString);
465
0
    }
466
0
    if(localLength<0) {
467
0
        localLength=u_strlen(localString);
468
0
    }
469
470
0
    minLength= outLength<localLength ? outLength : localLength;
471
472
0
    while(minLength>0) {
473
0
        c=(uint8_t)*outString++;
474
0
        if(UCHAR_IS_INVARIANT(c)) {
475
0
            c1=c;
476
0
        } else {
477
0
            c1=-1;
478
0
        }
479
480
0
        c2=*localString++;
481
0
        if(!UCHAR_IS_INVARIANT(c2)) {
482
0
            c2=-2;
483
0
        }
484
485
0
        if((c1-=c2)!=0) {
486
0
            return c1;
487
0
        }
488
489
0
        --minLength;
490
0
    }
491
492
    /* strings start with same prefix, compare lengths */
493
0
    return outLength-localLength;
494
0
}
495
496
U_CFUNC int32_t
497
uprv_compareInvEbcdic(const UDataSwapper *ds,
498
                      const char *outString, int32_t outLength,
499
0
                      const UChar *localString, int32_t localLength) {
500
0
    (void)ds;
501
0
    int32_t minLength;
502
0
    UChar32 c1, c2;
503
0
    uint8_t c;
504
505
0
    if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
506
0
        return 0;
507
0
    }
508
509
0
    if(outLength<0) {
510
0
        outLength=(int32_t)uprv_strlen(outString);
511
0
    }
512
0
    if(localLength<0) {
513
0
        localLength=u_strlen(localString);
514
0
    }
515
516
0
    minLength= outLength<localLength ? outLength : localLength;
517
518
0
    while(minLength>0) {
519
0
        c=(uint8_t)*outString++;
520
0
        if(c==0) {
521
0
            c1=0;
522
0
        } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
523
            /* c1 is set */
524
0
        } else {
525
0
            c1=-1;
526
0
        }
527
528
0
        c2=*localString++;
529
0
        if(!UCHAR_IS_INVARIANT(c2)) {
530
0
            c2=-2;
531
0
        }
532
533
0
        if((c1-=c2)!=0) {
534
0
            return c1;
535
0
        }
536
537
0
        --minLength;
538
0
    }
539
540
    /* strings start with same prefix, compare lengths */
541
0
    return outLength-localLength;
542
0
}
543
544
U_CAPI int32_t U_EXPORT2
545
0
uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
546
0
    int32_t c1, c2;
547
548
0
    for(;; ++s1, ++s2) {
549
0
        c1=(uint8_t)*s1;
550
0
        c2=(uint8_t)*s2;
551
0
        if(c1!=c2) {
552
0
            if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) {
553
0
                c1=-(int32_t)(uint8_t)*s1;
554
0
            }
555
0
            if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) {
556
0
                c2=-(int32_t)(uint8_t)*s2;
557
0
            }
558
0
            return c1-c2;
559
0
        } else if(c1==0) {
560
0
            return 0;
561
0
        }
562
0
    }
563
0
}
564
565
U_CAPI char U_EXPORT2
566
0
uprv_ebcdicToLowercaseAscii(char c) {
567
0
    return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
568
0
}
569
570
U_INTERNAL uint8_t* U_EXPORT2
571
uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
572
0
{
573
0
  uint8_t *orig_dst = dst;
574
575
0
  if(n==-1) { 
576
0
    n = uprv_strlen((const char*)src)+1; /* copy NUL */
577
0
  }
578
  /* copy non-null */
579
0
  while(*src && n>0) {
580
0
    *(dst++) = asciiFromEbcdic[*(src++)];
581
0
    n--;
582
0
  }
583
  /* pad */
584
0
  while(n>0) {
585
0
    *(dst++) = 0;
586
0
    n--;
587
0
  }
588
0
  return orig_dst;
589
0
}
590
591
U_INTERNAL uint8_t* U_EXPORT2
592
uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
593
0
{
594
0
  uint8_t *orig_dst = dst;
595
596
0
  if(n==-1) { 
597
0
    n = uprv_strlen((const char*)src)+1; /* copy NUL */
598
0
  }
599
  /* copy non-null */
600
0
  while(*src && n>0) {
601
0
    char ch = ebcdicFromAscii[*(src++)];
602
0
    if(ch == 0) {
603
0
      ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */
604
0
    }
605
0
    *(dst++) = ch;
606
0
    n--;
607
0
  }
608
  /* pad */
609
0
  while(n>0) {
610
0
    *(dst++) = 0;
611
0
    n--;
612
0
  }
613
0
  return orig_dst;
614
0
}
615