Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/uinvchar.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 1999-2015, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  uinvchar.h
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:2
14
*
15
*   created on: 2004sep14
16
*   created by: Markus W. Scherer
17
*
18
*   Definitions for handling invariant characters, moved here from putil.c
19
*   for better modularization.
20
*/
21
22
#ifndef __UINVCHAR_H__
23
#define __UINVCHAR_H__
24
25
#include "unicode/utypes.h"
26
#ifdef __cplusplus
27
#include "unicode/unistr.h"
28
#endif
29
30
/**
31
 * Check if a char string only contains invariant characters.
32
 * See utypes.h for details.
33
 *
34
 * @param s Input string pointer.
35
 * @param length Length of the string, can be -1 if NUL-terminated.
36
 * @return true if s contains only invariant characters.
37
 *
38
 * @internal (ICU 2.8)
39
 */
40
U_CAPI UBool U_EXPORT2
41
uprv_isInvariantString(const char *s, int32_t length);
42
43
/**
44
 * Check if a Unicode string only contains invariant characters.
45
 * See utypes.h for details.
46
 *
47
 * @param s Input string pointer.
48
 * @param length Length of the string, can be -1 if NUL-terminated.
49
 * @return true if s contains only invariant characters.
50
 *
51
 * @internal (ICU 2.8)
52
 */
53
U_CAPI UBool U_EXPORT2
54
uprv_isInvariantUString(const UChar *s, int32_t length);
55
56
/**
57
 * \def U_UPPER_ORDINAL
58
 * Get the ordinal number of an uppercase invariant character
59
 * @internal
60
 */
61
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
62
#   define U_UPPER_ORDINAL(x) ((x)-'A')
63
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
64
#   define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \
65
                              (((x) < 'S') ? ((x)-'J'+9) : \
66
                               ((x)-'S'+18)))
67
#else
68
#   error Unknown charset family!
69
#endif
70
71
#ifdef __cplusplus
72
73
U_NAMESPACE_BEGIN
74
75
/**
76
 * Like U_UPPER_ORDINAL(x) but with validation.
77
 * Returns 0..25 for A..Z else a value outside 0..25.
78
 */
79
0
inline int32_t uprv_upperOrdinal(int32_t c) {
80
0
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
81
0
    return c - 'A';
82
0
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
83
0
    // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8).
84
0
    // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
85
0
    if (c <= 'I') { return c - 'A'; }  // A-I --> 0-8
86
0
    if (c < 'J') { return -1; }
87
0
    if (c <= 'R') { return c - 'J' + 9; }  // J-R --> 9..17
88
0
    if (c < 'S') { return -1; }
89
0
    return c - 'S' + 18;  // S-Z --> 18..25
90
0
#else
91
0
#   error Unknown charset family!
92
0
#endif
93
0
}
94
95
// Like U_UPPER_ORDINAL(x) but for lowercase and with validation.
96
// Returns 0..25 for a..z else a value outside 0..25.
97
0
inline int32_t uprv_lowerOrdinal(int32_t c) {
98
0
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
99
0
    return c - 'a';
100
0
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
101
0
    // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8).
102
0
    // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
103
0
    if (c <= 'i') { return c - 'a'; }  // a-i --> 0-8
104
0
    if (c < 'j') { return -1; }
105
0
    if (c <= 'r') { return c - 'j' + 9; }  // j-r --> 9..17
106
0
    if (c < 's') { return -1; }
107
0
    return c - 's' + 18;  // s-z --> 18..25
108
0
#else
109
0
#   error Unknown charset family!
110
0
#endif
111
0
}
112
113
U_NAMESPACE_END
114
115
#endif
116
117
/**
118
 * Returns true if c == '@' is possible.
119
 * The @ sign is variant, and the @ sign used on one
120
 * EBCDIC machine won't be compiled the same way on other EBCDIC based machines.
121
 * @internal
122
 */
123
U_CFUNC UBool
124
uprv_isEbcdicAtSign(char c);
125
126
/**
127
 * \def uprv_isAtSign
128
 * Returns true if c == '@' is possible.
129
 * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign().
130
 * @internal
131
 */
132
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
133
#   define uprv_isAtSign(c) ((c)=='@')
134
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
135
#   define uprv_isAtSign(c) uprv_isEbcdicAtSign(c)
136
#else
137
#   error Unknown charset family!
138
#endif
139
140
/**
141
 * Compare two EBCDIC invariant-character strings in ASCII order.
142
 * @internal
143
 */
144
U_CAPI int32_t U_EXPORT2
145
uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2);
146
147
/**
148
 * \def uprv_compareInvCharsAsAscii
149
 * Compare two invariant-character strings in ASCII order.
150
 * @internal
151
 */
152
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
153
0
#   define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2)
154
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
155
#   define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2)
156
#else
157
#   error Unknown charset family!
158
#endif
159
160
/**
161
 * Converts an EBCDIC invariant character to ASCII.
162
 * @internal
163
 */
164
U_CAPI char U_EXPORT2
165
uprv_ebcdicToAscii(char c);
166
167
/**
168
 * \def uprv_invCharToAscii
169
 * Converts an invariant character to ASCII.
170
 * @internal
171
 */
172
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
173
#   define uprv_invCharToAscii(c) (c)
174
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
175
#   define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c)
176
#else
177
#   error Unknown charset family!
178
#endif
179
180
/**
181
 * Converts an EBCDIC invariant character to lowercase ASCII.
182
 * @internal
183
 */
184
U_CAPI char U_EXPORT2
185
uprv_ebcdicToLowercaseAscii(char c);
186
187
/**
188
 * \def uprv_invCharToLowercaseAscii
189
 * Converts an invariant character to lowercase ASCII.
190
 * @internal
191
 */
192
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
193
0
#   define uprv_invCharToLowercaseAscii uprv_asciitolower
194
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
195
#   define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii
196
#else
197
#   error Unknown charset family!
198
#endif
199
200
/**
201
 * Copy EBCDIC to ASCII
202
 * @internal
203
 * @see uprv_strncpy
204
 */
205
U_CAPI uint8_t* U_EXPORT2
206
uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
207
208
209
/**
210
 * Copy ASCII to EBCDIC
211
 * @internal
212
 * @see uprv_strncpy
213
 */
214
U_CAPI uint8_t* U_EXPORT2
215
uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
216
217
218
219
#endif