/src/icu/source/common/unicode/idna.h
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ******************************************************************************* |
5 | | * Copyright (C) 2010-2012, International Business Machines |
6 | | * Corporation and others. All Rights Reserved. |
7 | | ******************************************************************************* |
8 | | * file name: idna.h |
9 | | * encoding: UTF-8 |
10 | | * tab size: 8 (not used) |
11 | | * indentation:4 |
12 | | * |
13 | | * created on: 2010mar05 |
14 | | * created by: Markus W. Scherer |
15 | | */ |
16 | | |
17 | | #ifndef __IDNA_H__ |
18 | | #define __IDNA_H__ |
19 | | |
20 | | /** |
21 | | * \file |
22 | | * \brief C++ API: Internationalizing Domain Names in Applications (IDNA) |
23 | | */ |
24 | | |
25 | | #include "unicode/utypes.h" |
26 | | |
27 | | #if !UCONFIG_NO_IDNA |
28 | | |
29 | | #include "unicode/bytestream.h" |
30 | | #include "unicode/stringpiece.h" |
31 | | #include "unicode/uidna.h" |
32 | | #include "unicode/unistr.h" |
33 | | |
34 | | U_NAMESPACE_BEGIN |
35 | | |
36 | | class IDNAInfo; |
37 | | |
38 | | /** |
39 | | * Abstract base class for IDNA processing. |
40 | | * See http://www.unicode.org/reports/tr46/ |
41 | | * and http://www.ietf.org/rfc/rfc3490.txt |
42 | | * |
43 | | * The IDNA class is not intended for public subclassing. |
44 | | * |
45 | | * This C++ API currently only implements UTS #46. |
46 | | * The uidna.h C API implements both UTS #46 (functions using UIDNA service object) |
47 | | * and IDNA2003 (functions that do not use a service object). |
48 | | * @stable ICU 4.6 |
49 | | */ |
50 | | class U_COMMON_API IDNA : public UObject { |
51 | | public: |
52 | | /** |
53 | | * Destructor. |
54 | | * @stable ICU 4.6 |
55 | | */ |
56 | | ~IDNA(); |
57 | | |
58 | | /** |
59 | | * Returns an IDNA instance which implements UTS #46. |
60 | | * Returns an unmodifiable instance, owned by the caller. |
61 | | * Cache it for multiple operations, and delete it when done. |
62 | | * The instance is thread-safe, that is, it can be used concurrently. |
63 | | * |
64 | | * UTS #46 defines Unicode IDNA Compatibility Processing, |
65 | | * updated to the latest version of Unicode and compatible with both |
66 | | * IDNA2003 and IDNA2008. |
67 | | * |
68 | | * The worker functions use transitional processing, including deviation mappings, |
69 | | * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE |
70 | | * is used in which case the deviation characters are passed through without change. |
71 | | * |
72 | | * Disallowed characters are mapped to U+FFFD. |
73 | | * |
74 | | * For available options see the uidna.h header. |
75 | | * Operations with the UTS #46 instance do not support the |
76 | | * UIDNA_ALLOW_UNASSIGNED option. |
77 | | * |
78 | | * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). |
79 | | * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than |
80 | | * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. |
81 | | * |
82 | | * @param options Bit set to modify the processing and error checking. |
83 | | * See option bit set values in uidna.h. |
84 | | * @param errorCode Standard ICU error code. Its input value must |
85 | | * pass the U_SUCCESS() test, or else the function returns |
86 | | * immediately. Check for U_FAILURE() on output or use with |
87 | | * function chaining. (See User Guide for details.) |
88 | | * @return the UTS #46 IDNA instance, if successful |
89 | | * @stable ICU 4.6 |
90 | | */ |
91 | | static IDNA * |
92 | | createUTS46Instance(uint32_t options, UErrorCode &errorCode); |
93 | | |
94 | | /** |
95 | | * Converts a single domain name label into its ASCII form for DNS lookup. |
96 | | * If any processing step fails, then info.hasErrors() will be TRUE and |
97 | | * the result might not be an ASCII string. |
98 | | * The label might be modified according to the types of errors. |
99 | | * Labels with severe errors will be left in (or turned into) their Unicode form. |
100 | | * |
101 | | * The UErrorCode indicates an error only in exceptional cases, |
102 | | * such as a U_MEMORY_ALLOCATION_ERROR. |
103 | | * |
104 | | * @param label Input domain name label |
105 | | * @param dest Destination string object |
106 | | * @param info Output container of IDNA processing details. |
107 | | * @param errorCode Standard ICU error code. Its input value must |
108 | | * pass the U_SUCCESS() test, or else the function returns |
109 | | * immediately. Check for U_FAILURE() on output or use with |
110 | | * function chaining. (See User Guide for details.) |
111 | | * @return dest |
112 | | * @stable ICU 4.6 |
113 | | */ |
114 | | virtual UnicodeString & |
115 | | labelToASCII(const UnicodeString &label, UnicodeString &dest, |
116 | | IDNAInfo &info, UErrorCode &errorCode) const = 0; |
117 | | |
118 | | /** |
119 | | * Converts a single domain name label into its Unicode form for human-readable display. |
120 | | * If any processing step fails, then info.hasErrors() will be TRUE. |
121 | | * The label might be modified according to the types of errors. |
122 | | * |
123 | | * The UErrorCode indicates an error only in exceptional cases, |
124 | | * such as a U_MEMORY_ALLOCATION_ERROR. |
125 | | * |
126 | | * @param label Input domain name label |
127 | | * @param dest Destination string object |
128 | | * @param info Output container of IDNA processing details. |
129 | | * @param errorCode Standard ICU error code. Its input value must |
130 | | * pass the U_SUCCESS() test, or else the function returns |
131 | | * immediately. Check for U_FAILURE() on output or use with |
132 | | * function chaining. (See User Guide for details.) |
133 | | * @return dest |
134 | | * @stable ICU 4.6 |
135 | | */ |
136 | | virtual UnicodeString & |
137 | | labelToUnicode(const UnicodeString &label, UnicodeString &dest, |
138 | | IDNAInfo &info, UErrorCode &errorCode) const = 0; |
139 | | |
140 | | /** |
141 | | * Converts a whole domain name into its ASCII form for DNS lookup. |
142 | | * If any processing step fails, then info.hasErrors() will be TRUE and |
143 | | * the result might not be an ASCII string. |
144 | | * The domain name might be modified according to the types of errors. |
145 | | * Labels with severe errors will be left in (or turned into) their Unicode form. |
146 | | * |
147 | | * The UErrorCode indicates an error only in exceptional cases, |
148 | | * such as a U_MEMORY_ALLOCATION_ERROR. |
149 | | * |
150 | | * @param name Input domain name |
151 | | * @param dest Destination string object |
152 | | * @param info Output container of IDNA processing details. |
153 | | * @param errorCode Standard ICU error code. Its input value must |
154 | | * pass the U_SUCCESS() test, or else the function returns |
155 | | * immediately. Check for U_FAILURE() on output or use with |
156 | | * function chaining. (See User Guide for details.) |
157 | | * @return dest |
158 | | * @stable ICU 4.6 |
159 | | */ |
160 | | virtual UnicodeString & |
161 | | nameToASCII(const UnicodeString &name, UnicodeString &dest, |
162 | | IDNAInfo &info, UErrorCode &errorCode) const = 0; |
163 | | |
164 | | /** |
165 | | * Converts a whole domain name into its Unicode form for human-readable display. |
166 | | * If any processing step fails, then info.hasErrors() will be TRUE. |
167 | | * The domain name might be modified according to the types of errors. |
168 | | * |
169 | | * The UErrorCode indicates an error only in exceptional cases, |
170 | | * such as a U_MEMORY_ALLOCATION_ERROR. |
171 | | * |
172 | | * @param name Input domain name |
173 | | * @param dest Destination string object |
174 | | * @param info Output container of IDNA processing details. |
175 | | * @param errorCode Standard ICU error code. Its input value must |
176 | | * pass the U_SUCCESS() test, or else the function returns |
177 | | * immediately. Check for U_FAILURE() on output or use with |
178 | | * function chaining. (See User Guide for details.) |
179 | | * @return dest |
180 | | * @stable ICU 4.6 |
181 | | */ |
182 | | virtual UnicodeString & |
183 | | nameToUnicode(const UnicodeString &name, UnicodeString &dest, |
184 | | IDNAInfo &info, UErrorCode &errorCode) const = 0; |
185 | | |
186 | | // UTF-8 versions of the processing methods ---------------------------- *** |
187 | | |
188 | | /** |
189 | | * Converts a single domain name label into its ASCII form for DNS lookup. |
190 | | * UTF-8 version of labelToASCII(), same behavior. |
191 | | * |
192 | | * @param label Input domain name label |
193 | | * @param dest Destination byte sink; Flush()ed if successful |
194 | | * @param info Output container of IDNA processing details. |
195 | | * @param errorCode Standard ICU error code. Its input value must |
196 | | * pass the U_SUCCESS() test, or else the function returns |
197 | | * immediately. Check for U_FAILURE() on output or use with |
198 | | * function chaining. (See User Guide for details.) |
199 | | * @return dest |
200 | | * @stable ICU 4.6 |
201 | | */ |
202 | | virtual void |
203 | | labelToASCII_UTF8(StringPiece label, ByteSink &dest, |
204 | | IDNAInfo &info, UErrorCode &errorCode) const; |
205 | | |
206 | | /** |
207 | | * Converts a single domain name label into its Unicode form for human-readable display. |
208 | | * UTF-8 version of labelToUnicode(), same behavior. |
209 | | * |
210 | | * @param label Input domain name label |
211 | | * @param dest Destination byte sink; Flush()ed if successful |
212 | | * @param info Output container of IDNA processing details. |
213 | | * @param errorCode Standard ICU error code. Its input value must |
214 | | * pass the U_SUCCESS() test, or else the function returns |
215 | | * immediately. Check for U_FAILURE() on output or use with |
216 | | * function chaining. (See User Guide for details.) |
217 | | * @return dest |
218 | | * @stable ICU 4.6 |
219 | | */ |
220 | | virtual void |
221 | | labelToUnicodeUTF8(StringPiece label, ByteSink &dest, |
222 | | IDNAInfo &info, UErrorCode &errorCode) const; |
223 | | |
224 | | /** |
225 | | * Converts a whole domain name into its ASCII form for DNS lookup. |
226 | | * UTF-8 version of nameToASCII(), same behavior. |
227 | | * |
228 | | * @param name Input domain name |
229 | | * @param dest Destination byte sink; Flush()ed if successful |
230 | | * @param info Output container of IDNA processing details. |
231 | | * @param errorCode Standard ICU error code. Its input value must |
232 | | * pass the U_SUCCESS() test, or else the function returns |
233 | | * immediately. Check for U_FAILURE() on output or use with |
234 | | * function chaining. (See User Guide for details.) |
235 | | * @return dest |
236 | | * @stable ICU 4.6 |
237 | | */ |
238 | | virtual void |
239 | | nameToASCII_UTF8(StringPiece name, ByteSink &dest, |
240 | | IDNAInfo &info, UErrorCode &errorCode) const; |
241 | | |
242 | | /** |
243 | | * Converts a whole domain name into its Unicode form for human-readable display. |
244 | | * UTF-8 version of nameToUnicode(), same behavior. |
245 | | * |
246 | | * @param name Input domain name |
247 | | * @param dest Destination byte sink; Flush()ed if successful |
248 | | * @param info Output container of IDNA processing details. |
249 | | * @param errorCode Standard ICU error code. Its input value must |
250 | | * pass the U_SUCCESS() test, or else the function returns |
251 | | * immediately. Check for U_FAILURE() on output or use with |
252 | | * function chaining. (See User Guide for details.) |
253 | | * @return dest |
254 | | * @stable ICU 4.6 |
255 | | */ |
256 | | virtual void |
257 | | nameToUnicodeUTF8(StringPiece name, ByteSink &dest, |
258 | | IDNAInfo &info, UErrorCode &errorCode) const; |
259 | | }; |
260 | | |
261 | | class UTS46; |
262 | | |
263 | | /** |
264 | | * Output container for IDNA processing errors. |
265 | | * The IDNAInfo class is not suitable for subclassing. |
266 | | * @stable ICU 4.6 |
267 | | */ |
268 | | class U_COMMON_API IDNAInfo : public UMemory { |
269 | | public: |
270 | | /** |
271 | | * Constructor for stack allocation. |
272 | | * @stable ICU 4.6 |
273 | | */ |
274 | 6.52M | IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {} |
275 | | /** |
276 | | * Were there IDNA processing errors? |
277 | | * @return TRUE if there were processing errors |
278 | | * @stable ICU 4.6 |
279 | | */ |
280 | 0 | UBool hasErrors() const { return errors!=0; } |
281 | | /** |
282 | | * Returns a bit set indicating IDNA processing errors. |
283 | | * See UIDNA_ERROR_... constants in uidna.h. |
284 | | * @return bit set of processing errors |
285 | | * @stable ICU 4.6 |
286 | | */ |
287 | 6.52M | uint32_t getErrors() const { return errors; } |
288 | | /** |
289 | | * Returns TRUE if transitional and nontransitional processing produce different results. |
290 | | * This is the case when the input label or domain name contains |
291 | | * one or more deviation characters outside a Punycode label (see UTS #46). |
292 | | * <ul> |
293 | | * <li>With nontransitional processing, such characters are |
294 | | * copied to the destination string. |
295 | | * <li>With transitional processing, such characters are |
296 | | * mapped (sharp s/sigma) or removed (joiner/nonjoiner). |
297 | | * </ul> |
298 | | * @return TRUE if transitional and nontransitional processing produce different results |
299 | | * @stable ICU 4.6 |
300 | | */ |
301 | 6.52M | UBool isTransitionalDifferent() const { return isTransDiff; } |
302 | | |
303 | | private: |
304 | | friend class UTS46; |
305 | | |
306 | | IDNAInfo(const IDNAInfo &other); // no copying |
307 | | IDNAInfo &operator=(const IDNAInfo &other); // no copying |
308 | | |
309 | 6.52M | void reset() { |
310 | 6.52M | errors=labelErrors=0; |
311 | 6.52M | isTransDiff=FALSE; |
312 | 6.52M | isBiDi=FALSE; |
313 | 6.52M | isOkBiDi=TRUE; |
314 | 6.52M | } |
315 | | |
316 | | uint32_t errors, labelErrors; |
317 | | UBool isTransDiff; |
318 | | UBool isBiDi; |
319 | | UBool isOkBiDi; |
320 | | }; |
321 | | |
322 | | U_NAMESPACE_END |
323 | | |
324 | | #endif // UCONFIG_NO_IDNA |
325 | | #endif // __IDNA_H__ |