Coverage Report

Created: 2025-07-11 06:23

/src/icu/source/common/unicode/idna.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*   Copyright (C) 2010-2012, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
*   file name:  idna.h
9
*   encoding:   UTF-8
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
*   created on: 2010mar05
14
*   created by: Markus W. Scherer
15
*/
16
17
#ifndef __IDNA_H__
18
#define __IDNA_H__
19
20
/**
21
 * \file
22
 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
23
 */
24
25
#include "unicode/utypes.h"
26
27
#if !UCONFIG_NO_IDNA
28
29
#include "unicode/bytestream.h"
30
#include "unicode/stringpiece.h"
31
#include "unicode/uidna.h"
32
#include "unicode/unistr.h"
33
34
U_NAMESPACE_BEGIN
35
36
class IDNAInfo;
37
38
/**
39
 * Abstract base class for IDNA processing.
40
 * See http://www.unicode.org/reports/tr46/
41
 * and http://www.ietf.org/rfc/rfc3490.txt
42
 *
43
 * The IDNA class is not intended for public subclassing.
44
 *
45
 * This C++ API currently only implements UTS #46.
46
 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
47
 * and IDNA2003 (functions that do not use a service object).
48
 * @stable ICU 4.6
49
 */
50
class U_COMMON_API IDNA : public UObject {
51
public:
52
    /**
53
     * Destructor.
54
     * @stable ICU 4.6
55
     */
56
    ~IDNA();
57
58
    /**
59
     * Returns an IDNA instance which implements UTS #46.
60
     * Returns an unmodifiable instance, owned by the caller.
61
     * Cache it for multiple operations, and delete it when done.
62
     * The instance is thread-safe, that is, it can be used concurrently.
63
     *
64
     * UTS #46 defines Unicode IDNA Compatibility Processing,
65
     * updated to the latest version of Unicode and compatible with both
66
     * IDNA2003 and IDNA2008.
67
     *
68
     * The worker functions use transitional processing, including deviation mappings,
69
     * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
70
     * is used in which case the deviation characters are passed through without change.
71
     *
72
     * Disallowed characters are mapped to U+FFFD.
73
     *
74
     * For available options see the uidna.h header.
75
     * Operations with the UTS #46 instance do not support the
76
     * UIDNA_ALLOW_UNASSIGNED option.
77
     *
78
     * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
79
     * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
80
     * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
81
     *
82
     * @param options Bit set to modify the processing and error checking.
83
     *                See option bit set values in uidna.h.
84
     * @param errorCode Standard ICU error code. Its input value must
85
     *                  pass the U_SUCCESS() test, or else the function returns
86
     *                  immediately. Check for U_FAILURE() on output or use with
87
     *                  function chaining. (See User Guide for details.)
88
     * @return the UTS #46 IDNA instance, if successful
89
     * @stable ICU 4.6
90
     */
91
    static IDNA *
92
    createUTS46Instance(uint32_t options, UErrorCode &errorCode);
93
94
    /**
95
     * Converts a single domain name label into its ASCII form for DNS lookup.
96
     * If any processing step fails, then info.hasErrors() will be TRUE and
97
     * the result might not be an ASCII string.
98
     * The label might be modified according to the types of errors.
99
     * Labels with severe errors will be left in (or turned into) their Unicode form.
100
     *
101
     * The UErrorCode indicates an error only in exceptional cases,
102
     * such as a U_MEMORY_ALLOCATION_ERROR.
103
     *
104
     * @param label Input domain name label
105
     * @param dest Destination string object
106
     * @param info Output container of IDNA processing details.
107
     * @param errorCode Standard ICU error code. Its input value must
108
     *                  pass the U_SUCCESS() test, or else the function returns
109
     *                  immediately. Check for U_FAILURE() on output or use with
110
     *                  function chaining. (See User Guide for details.)
111
     * @return dest
112
     * @stable ICU 4.6
113
     */
114
    virtual UnicodeString &
115
    labelToASCII(const UnicodeString &label, UnicodeString &dest,
116
                 IDNAInfo &info, UErrorCode &errorCode) const = 0;
117
118
    /**
119
     * Converts a single domain name label into its Unicode form for human-readable display.
120
     * If any processing step fails, then info.hasErrors() will be TRUE.
121
     * The label might be modified according to the types of errors.
122
     *
123
     * The UErrorCode indicates an error only in exceptional cases,
124
     * such as a U_MEMORY_ALLOCATION_ERROR.
125
     *
126
     * @param label Input domain name label
127
     * @param dest Destination string object
128
     * @param info Output container of IDNA processing details.
129
     * @param errorCode Standard ICU error code. Its input value must
130
     *                  pass the U_SUCCESS() test, or else the function returns
131
     *                  immediately. Check for U_FAILURE() on output or use with
132
     *                  function chaining. (See User Guide for details.)
133
     * @return dest
134
     * @stable ICU 4.6
135
     */
136
    virtual UnicodeString &
137
    labelToUnicode(const UnicodeString &label, UnicodeString &dest,
138
                   IDNAInfo &info, UErrorCode &errorCode) const = 0;
139
140
    /**
141
     * Converts a whole domain name into its ASCII form for DNS lookup.
142
     * If any processing step fails, then info.hasErrors() will be TRUE and
143
     * the result might not be an ASCII string.
144
     * The domain name might be modified according to the types of errors.
145
     * Labels with severe errors will be left in (or turned into) their Unicode form.
146
     *
147
     * The UErrorCode indicates an error only in exceptional cases,
148
     * such as a U_MEMORY_ALLOCATION_ERROR.
149
     *
150
     * @param name Input domain name
151
     * @param dest Destination string object
152
     * @param info Output container of IDNA processing details.
153
     * @param errorCode Standard ICU error code. Its input value must
154
     *                  pass the U_SUCCESS() test, or else the function returns
155
     *                  immediately. Check for U_FAILURE() on output or use with
156
     *                  function chaining. (See User Guide for details.)
157
     * @return dest
158
     * @stable ICU 4.6
159
     */
160
    virtual UnicodeString &
161
    nameToASCII(const UnicodeString &name, UnicodeString &dest,
162
                IDNAInfo &info, UErrorCode &errorCode) const = 0;
163
164
    /**
165
     * Converts a whole domain name into its Unicode form for human-readable display.
166
     * If any processing step fails, then info.hasErrors() will be TRUE.
167
     * The domain name might be modified according to the types of errors.
168
     *
169
     * The UErrorCode indicates an error only in exceptional cases,
170
     * such as a U_MEMORY_ALLOCATION_ERROR.
171
     *
172
     * @param name Input domain name
173
     * @param dest Destination string object
174
     * @param info Output container of IDNA processing details.
175
     * @param errorCode Standard ICU error code. Its input value must
176
     *                  pass the U_SUCCESS() test, or else the function returns
177
     *                  immediately. Check for U_FAILURE() on output or use with
178
     *                  function chaining. (See User Guide for details.)
179
     * @return dest
180
     * @stable ICU 4.6
181
     */
182
    virtual UnicodeString &
183
    nameToUnicode(const UnicodeString &name, UnicodeString &dest,
184
                  IDNAInfo &info, UErrorCode &errorCode) const = 0;
185
186
    // UTF-8 versions of the processing methods ---------------------------- ***
187
188
    /**
189
     * Converts a single domain name label into its ASCII form for DNS lookup.
190
     * UTF-8 version of labelToASCII(), same behavior.
191
     *
192
     * @param label Input domain name label
193
     * @param dest Destination byte sink; Flush()ed if successful
194
     * @param info Output container of IDNA processing details.
195
     * @param errorCode Standard ICU error code. Its input value must
196
     *                  pass the U_SUCCESS() test, or else the function returns
197
     *                  immediately. Check for U_FAILURE() on output or use with
198
     *                  function chaining. (See User Guide for details.)
199
     * @return dest
200
     * @stable ICU 4.6
201
     */
202
    virtual void
203
    labelToASCII_UTF8(StringPiece label, ByteSink &dest,
204
                      IDNAInfo &info, UErrorCode &errorCode) const;
205
206
    /**
207
     * Converts a single domain name label into its Unicode form for human-readable display.
208
     * UTF-8 version of labelToUnicode(), same behavior.
209
     *
210
     * @param label Input domain name label
211
     * @param dest Destination byte sink; Flush()ed if successful
212
     * @param info Output container of IDNA processing details.
213
     * @param errorCode Standard ICU error code. Its input value must
214
     *                  pass the U_SUCCESS() test, or else the function returns
215
     *                  immediately. Check for U_FAILURE() on output or use with
216
     *                  function chaining. (See User Guide for details.)
217
     * @return dest
218
     * @stable ICU 4.6
219
     */
220
    virtual void
221
    labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
222
                       IDNAInfo &info, UErrorCode &errorCode) const;
223
224
    /**
225
     * Converts a whole domain name into its ASCII form for DNS lookup.
226
     * UTF-8 version of nameToASCII(), same behavior.
227
     *
228
     * @param name Input domain name
229
     * @param dest Destination byte sink; Flush()ed if successful
230
     * @param info Output container of IDNA processing details.
231
     * @param errorCode Standard ICU error code. Its input value must
232
     *                  pass the U_SUCCESS() test, or else the function returns
233
     *                  immediately. Check for U_FAILURE() on output or use with
234
     *                  function chaining. (See User Guide for details.)
235
     * @return dest
236
     * @stable ICU 4.6
237
     */
238
    virtual void
239
    nameToASCII_UTF8(StringPiece name, ByteSink &dest,
240
                     IDNAInfo &info, UErrorCode &errorCode) const;
241
242
    /**
243
     * Converts a whole domain name into its Unicode form for human-readable display.
244
     * UTF-8 version of nameToUnicode(), same behavior.
245
     *
246
     * @param name Input domain name
247
     * @param dest Destination byte sink; Flush()ed if successful
248
     * @param info Output container of IDNA processing details.
249
     * @param errorCode Standard ICU error code. Its input value must
250
     *                  pass the U_SUCCESS() test, or else the function returns
251
     *                  immediately. Check for U_FAILURE() on output or use with
252
     *                  function chaining. (See User Guide for details.)
253
     * @return dest
254
     * @stable ICU 4.6
255
     */
256
    virtual void
257
    nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
258
                      IDNAInfo &info, UErrorCode &errorCode) const;
259
};
260
261
class UTS46;
262
263
/**
264
 * Output container for IDNA processing errors.
265
 * The IDNAInfo class is not suitable for subclassing.
266
 * @stable ICU 4.6
267
 */
268
class U_COMMON_API IDNAInfo : public UMemory {
269
public:
270
    /**
271
     * Constructor for stack allocation.
272
     * @stable ICU 4.6
273
     */
274
6.52M
    IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
275
    /**
276
     * Were there IDNA processing errors?
277
     * @return TRUE if there were processing errors
278
     * @stable ICU 4.6
279
     */
280
0
    UBool hasErrors() const { return errors!=0; }
281
    /**
282
     * Returns a bit set indicating IDNA processing errors.
283
     * See UIDNA_ERROR_... constants in uidna.h.
284
     * @return bit set of processing errors
285
     * @stable ICU 4.6
286
     */
287
6.52M
    uint32_t getErrors() const { return errors; }
288
    /**
289
     * Returns TRUE if transitional and nontransitional processing produce different results.
290
     * This is the case when the input label or domain name contains
291
     * one or more deviation characters outside a Punycode label (see UTS #46).
292
     * <ul>
293
     * <li>With nontransitional processing, such characters are
294
     * copied to the destination string.
295
     * <li>With transitional processing, such characters are
296
     * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
297
     * </ul>
298
     * @return TRUE if transitional and nontransitional processing produce different results
299
     * @stable ICU 4.6
300
     */
301
6.52M
    UBool isTransitionalDifferent() const { return isTransDiff; }
302
303
private:
304
    friend class UTS46;
305
306
    IDNAInfo(const IDNAInfo &other);  // no copying
307
    IDNAInfo &operator=(const IDNAInfo &other);  // no copying
308
309
6.52M
    void reset() {
310
6.52M
        errors=labelErrors=0;
311
6.52M
        isTransDiff=FALSE;
312
6.52M
        isBiDi=FALSE;
313
6.52M
        isOkBiDi=TRUE;
314
6.52M
    }
315
316
    uint32_t errors, labelErrors;
317
    UBool isTransDiff;
318
    UBool isBiDi;
319
    UBool isOkBiDi;
320
};
321
322
U_NAMESPACE_END
323
324
#endif  // UCONFIG_NO_IDNA
325
#endif  // __IDNA_H__