/src/icu/source/common/uinvchar.h
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ******************************************************************************* |
5 | | * |
6 | | * Copyright (C) 1999-2015, International Business Machines |
7 | | * Corporation and others. All Rights Reserved. |
8 | | * |
9 | | ******************************************************************************* |
10 | | * file name: uinvchar.h |
11 | | * encoding: UTF-8 |
12 | | * tab size: 8 (not used) |
13 | | * indentation:2 |
14 | | * |
15 | | * created on: 2004sep14 |
16 | | * created by: Markus W. Scherer |
17 | | * |
18 | | * Definitions for handling invariant characters, moved here from putil.c |
19 | | * for better modularization. |
20 | | */ |
21 | | |
22 | | #ifndef __UINVCHAR_H__ |
23 | | #define __UINVCHAR_H__ |
24 | | |
25 | | #include "unicode/utypes.h" |
26 | | #ifdef __cplusplus |
27 | | #include "unicode/unistr.h" |
28 | | #endif |
29 | | |
30 | | /** |
31 | | * Check if a char string only contains invariant characters. |
32 | | * See utypes.h for details. |
33 | | * |
34 | | * @param s Input string pointer. |
35 | | * @param length Length of the string, can be -1 if NUL-terminated. |
36 | | * @return true if s contains only invariant characters. |
37 | | * |
38 | | * @internal (ICU 2.8) |
39 | | */ |
40 | | U_CAPI UBool U_EXPORT2 |
41 | | uprv_isInvariantString(const char *s, int32_t length); |
42 | | |
43 | | /** |
44 | | * Check if a Unicode string only contains invariant characters. |
45 | | * See utypes.h for details. |
46 | | * |
47 | | * @param s Input string pointer. |
48 | | * @param length Length of the string, can be -1 if NUL-terminated. |
49 | | * @return true if s contains only invariant characters. |
50 | | * |
51 | | * @internal (ICU 2.8) |
52 | | */ |
53 | | U_CAPI UBool U_EXPORT2 |
54 | | uprv_isInvariantUString(const UChar *s, int32_t length); |
55 | | |
56 | | /** |
57 | | * \def U_UPPER_ORDINAL |
58 | | * Get the ordinal number of an uppercase invariant character |
59 | | * @internal |
60 | | */ |
61 | | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
62 | | # define U_UPPER_ORDINAL(x) ((x)-'A') |
63 | | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
64 | | # define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \ |
65 | | (((x) < 'S') ? ((x)-'J'+9) : \ |
66 | | ((x)-'S'+18))) |
67 | | #else |
68 | | # error Unknown charset family! |
69 | | #endif |
70 | | |
71 | | #ifdef __cplusplus |
72 | | |
73 | | U_NAMESPACE_BEGIN |
74 | | |
75 | | /** |
76 | | * Like U_UPPER_ORDINAL(x) but with validation. |
77 | | * Returns 0..25 for A..Z else a value outside 0..25. |
78 | | */ |
79 | 0 | inline int32_t uprv_upperOrdinal(int32_t c) { |
80 | 0 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
81 | 0 | return c - 'A'; |
82 | 0 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
83 | 0 | // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8). |
84 | 0 | // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout |
85 | 0 | if (c <= 'I') { return c - 'A'; } // A-I --> 0-8 |
86 | 0 | if (c < 'J') { return -1; } |
87 | 0 | if (c <= 'R') { return c - 'J' + 9; } // J-R --> 9..17 |
88 | 0 | if (c < 'S') { return -1; } |
89 | 0 | return c - 'S' + 18; // S-Z --> 18..25 |
90 | 0 | #else |
91 | 0 | # error Unknown charset family! |
92 | 0 | #endif |
93 | 0 | } |
94 | | |
95 | | // Like U_UPPER_ORDINAL(x) but for lowercase and with validation. |
96 | | // Returns 0..25 for a..z else a value outside 0..25. |
97 | 0 | inline int32_t uprv_lowerOrdinal(int32_t c) { |
98 | 0 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
99 | 0 | return c - 'a'; |
100 | 0 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
101 | 0 | // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8). |
102 | 0 | // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout |
103 | 0 | if (c <= 'i') { return c - 'a'; } // a-i --> 0-8 |
104 | 0 | if (c < 'j') { return -1; } |
105 | 0 | if (c <= 'r') { return c - 'j' + 9; } // j-r --> 9..17 |
106 | 0 | if (c < 's') { return -1; } |
107 | 0 | return c - 's' + 18; // s-z --> 18..25 |
108 | 0 | #else |
109 | 0 | # error Unknown charset family! |
110 | 0 | #endif |
111 | 0 | } |
112 | | |
113 | | U_NAMESPACE_END |
114 | | |
115 | | #endif |
116 | | |
117 | | /** |
118 | | * Returns true if c == '@' is possible. |
119 | | * The @ sign is variant, and the @ sign used on one |
120 | | * EBCDIC machine won't be compiled the same way on other EBCDIC based machines. |
121 | | * @internal |
122 | | */ |
123 | | U_CFUNC UBool |
124 | | uprv_isEbcdicAtSign(char c); |
125 | | |
126 | | /** |
127 | | * \def uprv_isAtSign |
128 | | * Returns true if c == '@' is possible. |
129 | | * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign(). |
130 | | * @internal |
131 | | */ |
132 | | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
133 | | # define uprv_isAtSign(c) ((c)=='@') |
134 | | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
135 | | # define uprv_isAtSign(c) uprv_isEbcdicAtSign(c) |
136 | | #else |
137 | | # error Unknown charset family! |
138 | | #endif |
139 | | |
140 | | /** |
141 | | * Compare two EBCDIC invariant-character strings in ASCII order. |
142 | | * @internal |
143 | | */ |
144 | | U_CAPI int32_t U_EXPORT2 |
145 | | uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2); |
146 | | |
147 | | /** |
148 | | * \def uprv_compareInvCharsAsAscii |
149 | | * Compare two invariant-character strings in ASCII order. |
150 | | * @internal |
151 | | */ |
152 | | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
153 | 0 | # define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2) |
154 | | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
155 | | # define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2) |
156 | | #else |
157 | | # error Unknown charset family! |
158 | | #endif |
159 | | |
160 | | /** |
161 | | * Converts an EBCDIC invariant character to ASCII. |
162 | | * @internal |
163 | | */ |
164 | | U_CAPI char U_EXPORT2 |
165 | | uprv_ebcdicToAscii(char c); |
166 | | |
167 | | /** |
168 | | * \def uprv_invCharToAscii |
169 | | * Converts an invariant character to ASCII. |
170 | | * @internal |
171 | | */ |
172 | | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
173 | | # define uprv_invCharToAscii(c) (c) |
174 | | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
175 | | # define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c) |
176 | | #else |
177 | | # error Unknown charset family! |
178 | | #endif |
179 | | |
180 | | /** |
181 | | * Converts an EBCDIC invariant character to lowercase ASCII. |
182 | | * @internal |
183 | | */ |
184 | | U_CAPI char U_EXPORT2 |
185 | | uprv_ebcdicToLowercaseAscii(char c); |
186 | | |
187 | | /** |
188 | | * \def uprv_invCharToLowercaseAscii |
189 | | * Converts an invariant character to lowercase ASCII. |
190 | | * @internal |
191 | | */ |
192 | | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
193 | 0 | # define uprv_invCharToLowercaseAscii uprv_asciitolower |
194 | | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
195 | | # define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii |
196 | | #else |
197 | | # error Unknown charset family! |
198 | | #endif |
199 | | |
200 | | /** |
201 | | * Copy EBCDIC to ASCII |
202 | | * @internal |
203 | | * @see uprv_strncpy |
204 | | */ |
205 | | U_CAPI uint8_t* U_EXPORT2 |
206 | | uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n); |
207 | | |
208 | | |
209 | | /** |
210 | | * Copy ASCII to EBCDIC |
211 | | * @internal |
212 | | * @see uprv_strncpy |
213 | | */ |
214 | | U_CAPI uint8_t* U_EXPORT2 |
215 | | uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n); |
216 | | |
217 | | |
218 | | |
219 | | #endif |