/src/icu/source/common/uts46.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ******************************************************************************* |
5 | | * Copyright (C) 2010-2015, International Business Machines |
6 | | * Corporation and others. All Rights Reserved. |
7 | | ******************************************************************************* |
8 | | * file name: uts46.cpp |
9 | | * encoding: UTF-8 |
10 | | * tab size: 8 (not used) |
11 | | * indentation:4 |
12 | | * |
13 | | * created on: 2010mar09 |
14 | | * created by: Markus W. Scherer |
15 | | */ |
16 | | |
17 | | #include "unicode/utypes.h" |
18 | | |
19 | | #if !UCONFIG_NO_IDNA |
20 | | |
21 | | #include "unicode/idna.h" |
22 | | #include "unicode/normalizer2.h" |
23 | | #include "unicode/uscript.h" |
24 | | #include "unicode/ustring.h" |
25 | | #include "unicode/utf16.h" |
26 | | #include "cmemory.h" |
27 | | #include "cstring.h" |
28 | | #include "punycode.h" |
29 | | #include "ubidi_props.h" |
30 | | #include "ustr_imp.h" |
31 | | |
32 | | // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG: |
33 | | // |
34 | | // The domain name length limit is 255 octets in an internal DNS representation |
35 | | // where the last ("root") label is the empty label |
36 | | // represented by length byte 0 alone. |
37 | | // In a conventional string, this translates to 253 characters, or 254 |
38 | | // if there is a trailing dot for the root label. |
39 | | |
40 | | U_NAMESPACE_BEGIN |
41 | | |
42 | | // Severe errors which usually result in a U+FFFD replacement character in the result string. |
43 | | const uint32_t severeErrors= |
44 | | UIDNA_ERROR_LEADING_COMBINING_MARK| |
45 | | UIDNA_ERROR_DISALLOWED| |
46 | | UIDNA_ERROR_PUNYCODE| |
47 | | UIDNA_ERROR_LABEL_HAS_DOT| |
48 | | UIDNA_ERROR_INVALID_ACE_LABEL; |
49 | | |
50 | | static inline UBool |
51 | 1.15k | isASCIIString(const UnicodeString &dest) { |
52 | 1.15k | const UChar *s=dest.getBuffer(); |
53 | 1.15k | const UChar *limit=s+dest.length(); |
54 | 159k | while(s<limit) { |
55 | 159k | if(*s++>0x7f) { |
56 | 545 | return FALSE; |
57 | 545 | } |
58 | 159k | } |
59 | 608 | return TRUE; |
60 | 1.15k | } |
61 | | |
62 | | static UBool |
63 | | isASCIIOkBiDi(const UChar *s, int32_t length); |
64 | | |
65 | | static UBool |
66 | | isASCIIOkBiDi(const char *s, int32_t length); |
67 | | |
68 | | // IDNA class default implementations -------------------------------------- *** |
69 | | |
70 | 4.45k | IDNA::~IDNA() {} |
71 | | |
72 | | void |
73 | | IDNA::labelToASCII_UTF8(StringPiece label, ByteSink &dest, |
74 | 0 | IDNAInfo &info, UErrorCode &errorCode) const { |
75 | 0 | if(U_SUCCESS(errorCode)) { |
76 | 0 | UnicodeString destString; |
77 | 0 | labelToASCII(UnicodeString::fromUTF8(label), destString, |
78 | 0 | info, errorCode).toUTF8(dest); |
79 | 0 | } |
80 | 0 | } |
81 | | |
82 | | void |
83 | | IDNA::labelToUnicodeUTF8(StringPiece label, ByteSink &dest, |
84 | 0 | IDNAInfo &info, UErrorCode &errorCode) const { |
85 | 0 | if(U_SUCCESS(errorCode)) { |
86 | 0 | UnicodeString destString; |
87 | 0 | labelToUnicode(UnicodeString::fromUTF8(label), destString, |
88 | 0 | info, errorCode).toUTF8(dest); |
89 | 0 | } |
90 | 0 | } |
91 | | |
92 | | void |
93 | | IDNA::nameToASCII_UTF8(StringPiece name, ByteSink &dest, |
94 | 0 | IDNAInfo &info, UErrorCode &errorCode) const { |
95 | 0 | if(U_SUCCESS(errorCode)) { |
96 | 0 | UnicodeString destString; |
97 | 0 | nameToASCII(UnicodeString::fromUTF8(name), destString, |
98 | 0 | info, errorCode).toUTF8(dest); |
99 | 0 | } |
100 | 0 | } |
101 | | |
102 | | void |
103 | | IDNA::nameToUnicodeUTF8(StringPiece name, ByteSink &dest, |
104 | 0 | IDNAInfo &info, UErrorCode &errorCode) const { |
105 | 0 | if(U_SUCCESS(errorCode)) { |
106 | 0 | UnicodeString destString; |
107 | 0 | nameToUnicode(UnicodeString::fromUTF8(name), destString, |
108 | 0 | info, errorCode).toUTF8(dest); |
109 | 0 | } |
110 | 0 | } |
111 | | |
112 | | // UTS46 class declaration ------------------------------------------------- *** |
113 | | |
114 | | class UTS46 : public IDNA { |
115 | | public: |
116 | | UTS46(uint32_t options, UErrorCode &errorCode); |
117 | | virtual ~UTS46(); |
118 | | |
119 | | virtual UnicodeString & |
120 | | labelToASCII(const UnicodeString &label, UnicodeString &dest, |
121 | | IDNAInfo &info, UErrorCode &errorCode) const; |
122 | | |
123 | | virtual UnicodeString & |
124 | | labelToUnicode(const UnicodeString &label, UnicodeString &dest, |
125 | | IDNAInfo &info, UErrorCode &errorCode) const; |
126 | | |
127 | | virtual UnicodeString & |
128 | | nameToASCII(const UnicodeString &name, UnicodeString &dest, |
129 | | IDNAInfo &info, UErrorCode &errorCode) const; |
130 | | |
131 | | virtual UnicodeString & |
132 | | nameToUnicode(const UnicodeString &name, UnicodeString &dest, |
133 | | IDNAInfo &info, UErrorCode &errorCode) const; |
134 | | |
135 | | virtual void |
136 | | labelToASCII_UTF8(StringPiece label, ByteSink &dest, |
137 | | IDNAInfo &info, UErrorCode &errorCode) const; |
138 | | |
139 | | virtual void |
140 | | labelToUnicodeUTF8(StringPiece label, ByteSink &dest, |
141 | | IDNAInfo &info, UErrorCode &errorCode) const; |
142 | | |
143 | | virtual void |
144 | | nameToASCII_UTF8(StringPiece name, ByteSink &dest, |
145 | | IDNAInfo &info, UErrorCode &errorCode) const; |
146 | | |
147 | | virtual void |
148 | | nameToUnicodeUTF8(StringPiece name, ByteSink &dest, |
149 | | IDNAInfo &info, UErrorCode &errorCode) const; |
150 | | |
151 | | private: |
152 | | UnicodeString & |
153 | | process(const UnicodeString &src, |
154 | | UBool isLabel, UBool toASCII, |
155 | | UnicodeString &dest, |
156 | | IDNAInfo &info, UErrorCode &errorCode) const; |
157 | | |
158 | | void |
159 | | processUTF8(StringPiece src, |
160 | | UBool isLabel, UBool toASCII, |
161 | | ByteSink &dest, |
162 | | IDNAInfo &info, UErrorCode &errorCode) const; |
163 | | |
164 | | UnicodeString & |
165 | | processUnicode(const UnicodeString &src, |
166 | | int32_t labelStart, int32_t mappingStart, |
167 | | UBool isLabel, UBool toASCII, |
168 | | UnicodeString &dest, |
169 | | IDNAInfo &info, UErrorCode &errorCode) const; |
170 | | |
171 | | // returns the new dest.length() |
172 | | int32_t |
173 | | mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart, |
174 | | UErrorCode &errorCode) const; |
175 | | |
176 | | // returns the new label length |
177 | | int32_t |
178 | | processLabel(UnicodeString &dest, |
179 | | int32_t labelStart, int32_t labelLength, |
180 | | UBool toASCII, |
181 | | IDNAInfo &info, UErrorCode &errorCode) const; |
182 | | int32_t |
183 | | markBadACELabel(UnicodeString &dest, |
184 | | int32_t labelStart, int32_t labelLength, |
185 | | UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const; |
186 | | |
187 | | void |
188 | | checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const; |
189 | | |
190 | | UBool |
191 | | isLabelOkContextJ(const UChar *label, int32_t labelLength) const; |
192 | | |
193 | | void |
194 | | checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const; |
195 | | |
196 | | const Normalizer2 &uts46Norm2; // uts46.nrm |
197 | | uint32_t options; |
198 | | }; |
199 | | |
200 | | IDNA * |
201 | 4.45k | IDNA::createUTS46Instance(uint32_t options, UErrorCode &errorCode) { |
202 | 4.45k | if(U_SUCCESS(errorCode)) { |
203 | 4.45k | IDNA *idna=new UTS46(options, errorCode); |
204 | 4.45k | if(idna==NULL) { |
205 | 0 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
206 | 4.45k | } else if(U_FAILURE(errorCode)) { |
207 | 0 | delete idna; |
208 | 0 | idna=NULL; |
209 | 0 | } |
210 | 4.45k | return idna; |
211 | 4.45k | } else { |
212 | 0 | return NULL; |
213 | 0 | } |
214 | 4.45k | } |
215 | | |
216 | | // UTS46 implementation ---------------------------------------------------- *** |
217 | | |
218 | | UTS46::UTS46(uint32_t opt, UErrorCode &errorCode) |
219 | | : uts46Norm2(*Normalizer2::getInstance(NULL, "uts46", UNORM2_COMPOSE, errorCode)), |
220 | 4.45k | options(opt) {} |
221 | | |
222 | | UTS46::~UTS46() {} |
223 | | |
224 | | UnicodeString & |
225 | | UTS46::labelToASCII(const UnicodeString &label, UnicodeString &dest, |
226 | 0 | IDNAInfo &info, UErrorCode &errorCode) const { |
227 | 0 | return process(label, TRUE, TRUE, dest, info, errorCode); |
228 | 0 | } |
229 | | |
230 | | UnicodeString & |
231 | | UTS46::labelToUnicode(const UnicodeString &label, UnicodeString &dest, |
232 | 0 | IDNAInfo &info, UErrorCode &errorCode) const { |
233 | 0 | return process(label, TRUE, FALSE, dest, info, errorCode); |
234 | 0 | } |
235 | | |
236 | | UnicodeString & |
237 | | UTS46::nameToASCII(const UnicodeString &name, UnicodeString &dest, |
238 | 6.95M | IDNAInfo &info, UErrorCode &errorCode) const { |
239 | 6.95M | process(name, FALSE, TRUE, dest, info, errorCode); |
240 | 6.95M | if( dest.length()>=254 && (info.errors&UIDNA_ERROR_DOMAIN_NAME_TOO_LONG)==0 && |
241 | 6.95M | isASCIIString(dest) && |
242 | 6.95M | (dest.length()>254 || dest[253]!=0x2e) |
243 | 6.95M | ) { |
244 | 411 | info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; |
245 | 411 | } |
246 | 6.95M | return dest; |
247 | 6.95M | } |
248 | | |
249 | | UnicodeString & |
250 | | UTS46::nameToUnicode(const UnicodeString &name, UnicodeString &dest, |
251 | 0 | IDNAInfo &info, UErrorCode &errorCode) const { |
252 | 0 | return process(name, FALSE, FALSE, dest, info, errorCode); |
253 | 0 | } |
254 | | |
255 | | void |
256 | | UTS46::labelToASCII_UTF8(StringPiece label, ByteSink &dest, |
257 | 0 | IDNAInfo &info, UErrorCode &errorCode) const { |
258 | 0 | processUTF8(label, TRUE, TRUE, dest, info, errorCode); |
259 | 0 | } |
260 | | |
261 | | void |
262 | | UTS46::labelToUnicodeUTF8(StringPiece label, ByteSink &dest, |
263 | 0 | IDNAInfo &info, UErrorCode &errorCode) const { |
264 | 0 | processUTF8(label, TRUE, FALSE, dest, info, errorCode); |
265 | 0 | } |
266 | | |
267 | | void |
268 | | UTS46::nameToASCII_UTF8(StringPiece name, ByteSink &dest, |
269 | 0 | IDNAInfo &info, UErrorCode &errorCode) const { |
270 | 0 | processUTF8(name, FALSE, TRUE, dest, info, errorCode); |
271 | 0 | } |
272 | | |
273 | | void |
274 | | UTS46::nameToUnicodeUTF8(StringPiece name, ByteSink &dest, |
275 | 0 | IDNAInfo &info, UErrorCode &errorCode) const { |
276 | 0 | processUTF8(name, FALSE, FALSE, dest, info, errorCode); |
277 | 0 | } |
278 | | |
279 | | // UTS #46 data for ASCII characters. |
280 | | // The normalizer (using uts46.nrm) maps uppercase ASCII letters to lowercase |
281 | | // and passes through all other ASCII characters. |
282 | | // If UIDNA_USE_STD3_RULES is set, then non-LDH characters are disallowed |
283 | | // using this data. |
284 | | // The ASCII fastpath also uses this data. |
285 | | // Values: -1=disallowed 0==valid 1==mapped (lowercase) |
286 | | static const int8_t asciiData[128]={ |
287 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
288 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
289 | | // 002D..002E; valid # HYPHEN-MINUS..FULL STOP |
290 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, -1, |
291 | | // 0030..0039; valid # DIGIT ZERO..DIGIT NINE |
292 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, |
293 | | // 0041..005A; mapped # LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z |
294 | | -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
295 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, |
296 | | // 0061..007A; valid # LATIN SMALL LETTER A..LATIN SMALL LETTER Z |
297 | | -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
298 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1 |
299 | | }; |
300 | | |
301 | | UnicodeString & |
302 | | UTS46::process(const UnicodeString &src, |
303 | | UBool isLabel, UBool toASCII, |
304 | | UnicodeString &dest, |
305 | 6.95M | IDNAInfo &info, UErrorCode &errorCode) const { |
306 | | // uts46Norm2.normalize() would do all of this error checking and setup, |
307 | | // but with the ASCII fastpath we do not always call it, and do not |
308 | | // call it first. |
309 | 6.95M | if(U_FAILURE(errorCode)) { |
310 | 0 | dest.setToBogus(); |
311 | 0 | return dest; |
312 | 0 | } |
313 | 6.95M | const UChar *srcArray=src.getBuffer(); |
314 | 6.95M | if(&dest==&src || srcArray==NULL) { |
315 | 0 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
316 | 0 | dest.setToBogus(); |
317 | 0 | return dest; |
318 | 0 | } |
319 | | // Arguments are fine, reset output values. |
320 | 6.95M | dest.remove(); |
321 | 6.95M | info.reset(); |
322 | 6.95M | int32_t srcLength=src.length(); |
323 | 6.95M | if(srcLength==0) { |
324 | 0 | info.errors|=UIDNA_ERROR_EMPTY_LABEL; |
325 | 0 | return dest; |
326 | 0 | } |
327 | 6.95M | UChar *destArray=dest.getBuffer(srcLength); |
328 | 6.95M | if(destArray==NULL) { |
329 | 0 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
330 | 0 | return dest; |
331 | 0 | } |
332 | | // ASCII fastpath |
333 | 6.95M | UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; |
334 | 6.95M | int32_t labelStart=0; |
335 | 6.95M | int32_t i; |
336 | 7.40M | for(i=0;; ++i) { |
337 | 7.40M | if(i==srcLength) { |
338 | 0 | if(toASCII) { |
339 | 0 | if((i-labelStart)>63) { |
340 | 0 | info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
341 | 0 | } |
342 | | // There is a trailing dot if labelStart==i. |
343 | 0 | if(!isLabel && i>=254 && (i>254 || labelStart<i)) { |
344 | 0 | info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; |
345 | 0 | } |
346 | 0 | } |
347 | 0 | info.errors|=info.labelErrors; |
348 | 0 | dest.releaseBuffer(i); |
349 | 0 | return dest; |
350 | 0 | } |
351 | 7.40M | UChar c=srcArray[i]; |
352 | 7.40M | if(c>0x7f) { |
353 | 6.94M | break; |
354 | 6.94M | } |
355 | 457k | int cData=asciiData[c]; |
356 | 457k | if(cData>0) { |
357 | 28.1k | destArray[i]=c+0x20; // Lowercase an uppercase ASCII letter. |
358 | 429k | } else if(cData<0 && disallowNonLDHDot) { |
359 | 2.05k | break; // Replacing with U+FFFD can be complicated for toASCII. |
360 | 427k | } else { |
361 | 427k | destArray[i]=c; |
362 | 427k | if(c==0x2d) { // hyphen |
363 | 11.7k | if(i==(labelStart+3) && srcArray[i-1]==0x2d) { |
364 | | // "??--..." is Punycode or forbidden. |
365 | 4.36k | ++i; // '-' was copied to dest already |
366 | 4.36k | break; |
367 | 4.36k | } |
368 | 7.40k | if(i==labelStart) { |
369 | | // label starts with "-" |
370 | 1.36k | info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; |
371 | 1.36k | } |
372 | 7.40k | if((i+1)==srcLength || srcArray[i+1]==0x2e) { |
373 | | // label ends with "-" |
374 | 451 | info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; |
375 | 451 | } |
376 | 415k | } else if(c==0x2e) { // dot |
377 | 380k | if(isLabel) { |
378 | | // Replacing with U+FFFD can be complicated for toASCII. |
379 | 0 | ++i; // '.' was copied to dest already |
380 | 0 | break; |
381 | 0 | } |
382 | 380k | if(i==labelStart) { |
383 | 362k | info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; |
384 | 362k | } |
385 | 380k | if(toASCII && (i-labelStart)>63) { |
386 | 225 | info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
387 | 225 | } |
388 | 380k | info.errors|=info.labelErrors; |
389 | 380k | info.labelErrors=0; |
390 | 380k | labelStart=i+1; |
391 | 380k | } |
392 | 427k | } |
393 | 457k | } |
394 | 6.95M | info.errors|=info.labelErrors; |
395 | 6.95M | dest.releaseBuffer(i); |
396 | 6.95M | processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode); |
397 | 6.95M | if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 && |
398 | 6.95M | (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(dest.getBuffer(), labelStart))) |
399 | 6.95M | ) { |
400 | 0 | info.errors|=UIDNA_ERROR_BIDI; |
401 | 0 | } |
402 | 6.95M | return dest; |
403 | 6.95M | } |
404 | | |
405 | | void |
406 | | UTS46::processUTF8(StringPiece src, |
407 | | UBool isLabel, UBool toASCII, |
408 | | ByteSink &dest, |
409 | 0 | IDNAInfo &info, UErrorCode &errorCode) const { |
410 | 0 | if(U_FAILURE(errorCode)) { |
411 | 0 | return; |
412 | 0 | } |
413 | 0 | const char *srcArray=src.data(); |
414 | 0 | int32_t srcLength=src.length(); |
415 | 0 | if(srcArray==NULL && srcLength!=0) { |
416 | 0 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
417 | 0 | return; |
418 | 0 | } |
419 | | // Arguments are fine, reset output values. |
420 | 0 | info.reset(); |
421 | 0 | if(srcLength==0) { |
422 | 0 | info.errors|=UIDNA_ERROR_EMPTY_LABEL; |
423 | 0 | dest.Flush(); |
424 | 0 | return; |
425 | 0 | } |
426 | 0 | UnicodeString destString; |
427 | 0 | int32_t labelStart=0; |
428 | 0 | if(srcLength<=256) { // length of stackArray[] |
429 | | // ASCII fastpath |
430 | 0 | char stackArray[256]; |
431 | 0 | int32_t destCapacity; |
432 | 0 | char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20, |
433 | 0 | stackArray, UPRV_LENGTHOF(stackArray), &destCapacity); |
434 | 0 | UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; |
435 | 0 | int32_t i; |
436 | 0 | for(i=0;; ++i) { |
437 | 0 | if(i==srcLength) { |
438 | 0 | if(toASCII) { |
439 | 0 | if((i-labelStart)>63) { |
440 | 0 | info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
441 | 0 | } |
442 | | // There is a trailing dot if labelStart==i. |
443 | 0 | if(!isLabel && i>=254 && (i>254 || labelStart<i)) { |
444 | 0 | info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; |
445 | 0 | } |
446 | 0 | } |
447 | 0 | info.errors|=info.labelErrors; |
448 | 0 | dest.Append(destArray, i); |
449 | 0 | dest.Flush(); |
450 | 0 | return; |
451 | 0 | } |
452 | 0 | char c=srcArray[i]; |
453 | 0 | if((int8_t)c<0) { // (uint8_t)c>0x7f |
454 | 0 | break; |
455 | 0 | } |
456 | 0 | int cData=asciiData[(int)c]; // Cast: gcc warns about indexing with a char. |
457 | 0 | if(cData>0) { |
458 | 0 | destArray[i]=c+0x20; // Lowercase an uppercase ASCII letter. |
459 | 0 | } else if(cData<0 && disallowNonLDHDot) { |
460 | 0 | break; // Replacing with U+FFFD can be complicated for toASCII. |
461 | 0 | } else { |
462 | 0 | destArray[i]=c; |
463 | 0 | if(c==0x2d) { // hyphen |
464 | 0 | if(i==(labelStart+3) && srcArray[i-1]==0x2d) { |
465 | | // "??--..." is Punycode or forbidden. |
466 | 0 | break; |
467 | 0 | } |
468 | 0 | if(i==labelStart) { |
469 | | // label starts with "-" |
470 | 0 | info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; |
471 | 0 | } |
472 | 0 | if((i+1)==srcLength || srcArray[i+1]==0x2e) { |
473 | | // label ends with "-" |
474 | 0 | info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; |
475 | 0 | } |
476 | 0 | } else if(c==0x2e) { // dot |
477 | 0 | if(isLabel) { |
478 | 0 | break; // Replacing with U+FFFD can be complicated for toASCII. |
479 | 0 | } |
480 | 0 | if(i==labelStart) { |
481 | 0 | info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; |
482 | 0 | } |
483 | 0 | if(toASCII && (i-labelStart)>63) { |
484 | 0 | info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
485 | 0 | } |
486 | 0 | info.errors|=info.labelErrors; |
487 | 0 | info.labelErrors=0; |
488 | 0 | labelStart=i+1; |
489 | 0 | } |
490 | 0 | } |
491 | 0 | } |
492 | 0 | info.errors|=info.labelErrors; |
493 | | // Convert the processed ASCII prefix of the current label to UTF-16. |
494 | 0 | int32_t mappingStart=i-labelStart; |
495 | 0 | destString=UnicodeString::fromUTF8(StringPiece(destArray+labelStart, mappingStart)); |
496 | | // Output the previous ASCII labels and process the rest of src in UTF-16. |
497 | 0 | dest.Append(destArray, labelStart); |
498 | 0 | processUnicode(UnicodeString::fromUTF8(StringPiece(src, labelStart)), 0, mappingStart, |
499 | 0 | isLabel, toASCII, |
500 | 0 | destString, info, errorCode); |
501 | 0 | } else { |
502 | | // src is too long for the ASCII fastpath implementation. |
503 | 0 | processUnicode(UnicodeString::fromUTF8(src), 0, 0, |
504 | 0 | isLabel, toASCII, |
505 | 0 | destString, info, errorCode); |
506 | 0 | } |
507 | 0 | destString.toUTF8(dest); // calls dest.Flush() |
508 | 0 | if(toASCII && !isLabel) { |
509 | | // length==labelStart==254 means that there is a trailing dot (ok) and |
510 | | // destString is empty (do not index at 253-labelStart). |
511 | 0 | int32_t length=labelStart+destString.length(); |
512 | 0 | if( length>=254 && isASCIIString(destString) && |
513 | 0 | (length>254 || |
514 | 0 | (labelStart<254 && destString[253-labelStart]!=0x2e)) |
515 | 0 | ) { |
516 | 0 | info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; |
517 | 0 | } |
518 | 0 | } |
519 | 0 | if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 && |
520 | 0 | (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(srcArray, labelStart))) |
521 | 0 | ) { |
522 | 0 | info.errors|=UIDNA_ERROR_BIDI; |
523 | 0 | } |
524 | 0 | } |
525 | | |
526 | | UnicodeString & |
527 | | UTS46::processUnicode(const UnicodeString &src, |
528 | | int32_t labelStart, int32_t mappingStart, |
529 | | UBool isLabel, UBool toASCII, |
530 | | UnicodeString &dest, |
531 | 6.95M | IDNAInfo &info, UErrorCode &errorCode) const { |
532 | 6.95M | if(mappingStart==0) { |
533 | 6.57M | uts46Norm2.normalize(src, dest, errorCode); |
534 | 6.57M | } else { |
535 | 379k | uts46Norm2.normalizeSecondAndAppend(dest, src.tempSubString(mappingStart), errorCode); |
536 | 379k | } |
537 | 6.95M | if(U_FAILURE(errorCode)) { |
538 | 0 | return dest; |
539 | 0 | } |
540 | 6.95M | UBool doMapDevChars= |
541 | 6.95M | toASCII ? (options&UIDNA_NONTRANSITIONAL_TO_ASCII)==0 : |
542 | 6.95M | (options&UIDNA_NONTRANSITIONAL_TO_UNICODE)==0; |
543 | 6.95M | const UChar *destArray=dest.getBuffer(); |
544 | 6.95M | int32_t destLength=dest.length(); |
545 | 6.95M | int32_t labelLimit=labelStart; |
546 | 15.0M | while(labelLimit<destLength) { |
547 | 8.09M | UChar c=destArray[labelLimit]; |
548 | 8.09M | if(c==0x2e && !isLabel) { |
549 | 167k | int32_t labelLength=labelLimit-labelStart; |
550 | 167k | int32_t newLength=processLabel(dest, labelStart, labelLength, |
551 | 167k | toASCII, info, errorCode); |
552 | 167k | info.errors|=info.labelErrors; |
553 | 167k | info.labelErrors=0; |
554 | 167k | if(U_FAILURE(errorCode)) { |
555 | 197 | return dest; |
556 | 197 | } |
557 | 167k | destArray=dest.getBuffer(); |
558 | 167k | destLength+=newLength-labelLength; |
559 | 167k | labelLimit=labelStart+=newLength+1; |
560 | 7.92M | } else if(0xdf<=c && c<=0x200d && (c==0xdf || c==0x3c2 || c>=0x200c)) { |
561 | 432k | info.isTransDiff=TRUE; |
562 | 432k | if(doMapDevChars) { |
563 | 0 | destLength=mapDevChars(dest, labelStart, labelLimit, errorCode); |
564 | 0 | if(U_FAILURE(errorCode)) { |
565 | 0 | return dest; |
566 | 0 | } |
567 | 0 | destArray=dest.getBuffer(); |
568 | | // Do not increment labelLimit in case c was removed. |
569 | | // All deviation characters have been mapped, no need to check for them again. |
570 | 0 | doMapDevChars=FALSE; |
571 | 432k | } else { |
572 | 432k | ++labelLimit; |
573 | 432k | } |
574 | 7.49M | } else { |
575 | 7.49M | ++labelLimit; |
576 | 7.49M | } |
577 | 8.09M | } |
578 | | // Permit an empty label at the end (0<labelStart==labelLimit==destLength is ok) |
579 | | // but not an empty label elsewhere nor a completely empty domain name. |
580 | | // processLabel() sets UIDNA_ERROR_EMPTY_LABEL when labelLength==0. |
581 | 6.95M | if(0==labelStart || labelStart<labelLimit) { |
582 | 6.79M | processLabel(dest, labelStart, labelLimit-labelStart, |
583 | 6.79M | toASCII, info, errorCode); |
584 | 6.79M | info.errors|=info.labelErrors; |
585 | 6.79M | } |
586 | 6.95M | return dest; |
587 | 6.95M | } |
588 | | |
589 | | int32_t |
590 | | UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart, |
591 | 0 | UErrorCode &errorCode) const { |
592 | 0 | if(U_FAILURE(errorCode)) { |
593 | 0 | return 0; |
594 | 0 | } |
595 | 0 | int32_t length=dest.length(); |
596 | 0 | UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length); |
597 | 0 | if(s==NULL) { |
598 | 0 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
599 | 0 | return length; |
600 | 0 | } |
601 | 0 | int32_t capacity=dest.getCapacity(); |
602 | 0 | UBool didMapDevChars=FALSE; |
603 | 0 | int32_t readIndex=mappingStart, writeIndex=mappingStart; |
604 | 0 | do { |
605 | 0 | UChar c=s[readIndex++]; |
606 | 0 | switch(c) { |
607 | 0 | case 0xdf: |
608 | | // Map sharp s to ss. |
609 | 0 | didMapDevChars=TRUE; |
610 | 0 | s[writeIndex++]=0x73; // Replace sharp s with first s. |
611 | | // Insert second s and account for possible buffer reallocation. |
612 | 0 | if(writeIndex==readIndex) { |
613 | 0 | if(length==capacity) { |
614 | 0 | dest.releaseBuffer(length); |
615 | 0 | s=dest.getBuffer(length+1); |
616 | 0 | if(s==NULL) { |
617 | 0 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
618 | 0 | return length; |
619 | 0 | } |
620 | 0 | capacity=dest.getCapacity(); |
621 | 0 | } |
622 | 0 | u_memmove(s+writeIndex+1, s+writeIndex, length-writeIndex); |
623 | 0 | ++readIndex; |
624 | 0 | } |
625 | 0 | s[writeIndex++]=0x73; |
626 | 0 | ++length; |
627 | 0 | break; |
628 | 0 | case 0x3c2: // Map final sigma to nonfinal sigma. |
629 | 0 | didMapDevChars=TRUE; |
630 | 0 | s[writeIndex++]=0x3c3; |
631 | 0 | break; |
632 | 0 | case 0x200c: // Ignore/remove ZWNJ. |
633 | 0 | case 0x200d: // Ignore/remove ZWJ. |
634 | 0 | didMapDevChars=TRUE; |
635 | 0 | --length; |
636 | 0 | break; |
637 | 0 | default: |
638 | | // Only really necessary if writeIndex was different from readIndex. |
639 | 0 | s[writeIndex++]=c; |
640 | 0 | break; |
641 | 0 | } |
642 | 0 | } while(writeIndex<length); |
643 | 0 | dest.releaseBuffer(length); |
644 | 0 | if(didMapDevChars) { |
645 | | // Mapping deviation characters might have resulted in an un-NFC string. |
646 | | // We could use either the NFC or the UTS #46 normalizer. |
647 | | // By using the UTS #46 normalizer again, we avoid having to load a second .nrm data file. |
648 | 0 | UnicodeString normalized; |
649 | 0 | uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCode); |
650 | 0 | if(U_SUCCESS(errorCode)) { |
651 | 0 | dest.replace(labelStart, 0x7fffffff, normalized); |
652 | 0 | if(dest.isBogus()) { |
653 | 0 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
654 | 0 | } |
655 | 0 | return dest.length(); |
656 | 0 | } |
657 | 0 | } |
658 | 0 | return length; |
659 | 0 | } |
660 | | |
661 | | // Some non-ASCII characters are equivalent to sequences with |
662 | | // non-LDH ASCII characters. To find them: |
663 | | // grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt) |
664 | | static inline UBool |
665 | 7.26M | isNonASCIIDisallowedSTD3Valid(UChar32 c) { |
666 | 7.26M | return c==0x2260 || c==0x226E || c==0x226F; |
667 | 7.26M | } |
668 | | |
669 | | // Replace the label in dest with the label string, if the label was modified. |
670 | | // If &label==&dest then the label was modified in-place and labelLength |
671 | | // is the new label length, different from label.length(). |
672 | | // If &label!=&dest then labelLength==label.length(). |
673 | | // Returns labelLength (= the new label length). |
674 | | static int32_t |
675 | | replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLength, |
676 | 6.96M | const UnicodeString &label, int32_t labelLength, UErrorCode &errorCode) { |
677 | 6.96M | if(U_FAILURE(errorCode)) { |
678 | 0 | return 0; |
679 | 0 | } |
680 | 6.96M | if(&label!=&dest) { |
681 | 5.86M | dest.replace(destLabelStart, destLabelLength, label); |
682 | 5.86M | if(dest.isBogus()) { |
683 | 0 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
684 | 0 | return 0; |
685 | 0 | } |
686 | 5.86M | } |
687 | 6.96M | return labelLength; |
688 | 6.96M | } |
689 | | |
690 | | int32_t |
691 | | UTS46::processLabel(UnicodeString &dest, |
692 | | int32_t labelStart, int32_t labelLength, |
693 | | UBool toASCII, |
694 | 6.96M | IDNAInfo &info, UErrorCode &errorCode) const { |
695 | 6.96M | if(U_FAILURE(errorCode)) { |
696 | 0 | return 0; |
697 | 0 | } |
698 | 6.96M | UnicodeString fromPunycode; |
699 | 6.96M | UnicodeString *labelString; |
700 | 6.96M | const UChar *label=dest.getBuffer()+labelStart; |
701 | 6.96M | int32_t destLabelStart=labelStart; |
702 | 6.96M | int32_t destLabelLength=labelLength; |
703 | 6.96M | UBool wasPunycode; |
704 | 6.96M | if(labelLength>=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && label[3]==0x2d) { |
705 | | // Label starts with "xn--", try to un-Punycode it. |
706 | 7.56k | wasPunycode=TRUE; |
707 | 7.56k | UChar *unicodeBuffer=fromPunycode.getBuffer(-1); // capacity==-1: most labels should fit |
708 | 7.56k | if(unicodeBuffer==NULL) { |
709 | | // Should never occur if we used capacity==-1 which uses the internal buffer. |
710 | 0 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
711 | 0 | return labelLength; |
712 | 0 | } |
713 | 7.56k | UErrorCode punycodeErrorCode=U_ZERO_ERROR; |
714 | 7.56k | int32_t unicodeLength=u_strFromPunycode(label+4, labelLength-4, |
715 | 7.56k | unicodeBuffer, fromPunycode.getCapacity(), |
716 | 7.56k | NULL, &punycodeErrorCode); |
717 | 7.56k | if(punycodeErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
718 | 998 | fromPunycode.releaseBuffer(0); |
719 | 998 | unicodeBuffer=fromPunycode.getBuffer(unicodeLength); |
720 | 998 | if(unicodeBuffer==NULL) { |
721 | 0 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
722 | 0 | return labelLength; |
723 | 0 | } |
724 | 998 | punycodeErrorCode=U_ZERO_ERROR; |
725 | 998 | unicodeLength=u_strFromPunycode(label+4, labelLength-4, |
726 | 998 | unicodeBuffer, fromPunycode.getCapacity(), |
727 | 998 | NULL, &punycodeErrorCode); |
728 | 998 | } |
729 | 7.56k | fromPunycode.releaseBuffer(unicodeLength); |
730 | 7.56k | if(U_FAILURE(punycodeErrorCode)) { |
731 | 3.21k | info.labelErrors|=UIDNA_ERROR_PUNYCODE; |
732 | 3.21k | return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode); |
733 | 3.21k | } |
734 | | // Check for NFC, and for characters that are not |
735 | | // valid or deviation characters according to the normalizer. |
736 | | // If there is something wrong, then the string will change. |
737 | | // Note that the normalizer passes through non-LDH ASCII and deviation characters. |
738 | | // Deviation characters are ok in Punycode even in transitional processing. |
739 | | // In the code further below, if we find non-LDH ASCII and we have UIDNA_USE_STD3_RULES |
740 | | // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too. |
741 | 4.34k | UBool isValid=uts46Norm2.isNormalized(fromPunycode, errorCode); |
742 | 4.34k | if(U_FAILURE(errorCode)) { |
743 | 0 | return labelLength; |
744 | 0 | } |
745 | 4.34k | if(!isValid) { |
746 | 2.45k | info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; |
747 | 2.45k | return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode); |
748 | 2.45k | } |
749 | 1.89k | labelString=&fromPunycode; |
750 | 1.89k | label=fromPunycode.getBuffer(); |
751 | 1.89k | labelStart=0; |
752 | 1.89k | labelLength=fromPunycode.length(); |
753 | 6.96M | } else { |
754 | 6.96M | wasPunycode=FALSE; |
755 | 6.96M | labelString=&dest; |
756 | 6.96M | } |
757 | | // Validity check |
758 | 6.96M | if(labelLength==0) { |
759 | 375k | info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; |
760 | 375k | return replaceLabel(dest, destLabelStart, destLabelLength, |
761 | 375k | *labelString, labelLength, errorCode); |
762 | 375k | } |
763 | | // labelLength>0 |
764 | 6.58M | if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { |
765 | | // label starts with "??--" |
766 | 222 | info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; |
767 | 222 | } |
768 | 6.58M | if(label[0]==0x2d) { |
769 | | // label starts with "-" |
770 | 1.63k | info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; |
771 | 1.63k | } |
772 | 6.58M | if(label[labelLength-1]==0x2d) { |
773 | | // label ends with "-" |
774 | 936 | info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; |
775 | 936 | } |
776 | | // If the label was not a Punycode label, then it was the result of |
777 | | // mapping, normalization and label segmentation. |
778 | | // If the label was in Punycode, then we mapped it again above |
779 | | // and checked its validity. |
780 | | // Now we handle the STD3 restriction to LDH characters (if set) |
781 | | // and we look for U+FFFD which indicates disallowed characters |
782 | | // in a non-Punycode label or U+FFFD itself in a Punycode label. |
783 | | // We also check for dots which can come from the input to a single-label function. |
784 | | // Ok to cast away const because we own the UnicodeString. |
785 | 6.58M | UChar *s=(UChar *)label; |
786 | 6.58M | const UChar *limit=label+labelLength; |
787 | 6.58M | UChar oredChars=0; |
788 | | // If we enforce STD3 rules, then ASCII characters other than LDH and dot are disallowed. |
789 | 6.58M | UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; |
790 | 7.85M | do { |
791 | 7.85M | UChar c=*s; |
792 | 7.85M | if(c<=0x7f) { |
793 | 593k | if(c==0x2e) { |
794 | 0 | info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; |
795 | 0 | *s=0xfffd; |
796 | 593k | } else if(disallowNonLDHDot && asciiData[c]<0) { |
797 | 164k | info.labelErrors|=UIDNA_ERROR_DISALLOWED; |
798 | 164k | *s=0xfffd; |
799 | 164k | } |
800 | 7.26M | } else { |
801 | 7.26M | oredChars|=c; |
802 | 7.26M | if(disallowNonLDHDot && isNonASCIIDisallowedSTD3Valid(c)) { |
803 | 564 | info.labelErrors|=UIDNA_ERROR_DISALLOWED; |
804 | 564 | *s=0xfffd; |
805 | 7.26M | } else if(c==0xfffd) { |
806 | 7.17k | info.labelErrors|=UIDNA_ERROR_DISALLOWED; |
807 | 7.17k | } |
808 | 7.26M | } |
809 | 7.85M | ++s; |
810 | 7.85M | } while(s<limit); |
811 | | // Check for a leading combining mark after other validity checks |
812 | | // so that we don't report UIDNA_ERROR_DISALLOWED for the U+FFFD from here. |
813 | 6.58M | UChar32 c; |
814 | 6.58M | int32_t cpLength=0; |
815 | | // "Unsafe" is ok because unpaired surrogates were mapped to U+FFFD. |
816 | 6.58M | U16_NEXT_UNSAFE(label, cpLength, c); |
817 | 6.58M | if((U_GET_GC_MASK(c)&U_GC_M_MASK)!=0) { |
818 | 691k | info.labelErrors|=UIDNA_ERROR_LEADING_COMBINING_MARK; |
819 | 691k | labelString->replace(labelStart, cpLength, (UChar)0xfffd); |
820 | 691k | label=labelString->getBuffer()+labelStart; |
821 | 691k | labelLength+=1-cpLength; |
822 | 691k | if(labelString==&dest) { |
823 | 690k | destLabelLength=labelLength; |
824 | 690k | } |
825 | 691k | } |
826 | 6.58M | if((info.labelErrors&severeErrors)==0) { |
827 | | // Do contextual checks only if we do not have U+FFFD from a severe error |
828 | | // because U+FFFD can make these checks fail. |
829 | 5.86M | if((options&UIDNA_CHECK_BIDI)!=0 && (!info.isBiDi || info.isOkBiDi)) { |
830 | 0 | checkLabelBiDi(label, labelLength, info); |
831 | 0 | } |
832 | 5.86M | if( (options&UIDNA_CHECK_CONTEXTJ)!=0 && (oredChars&0x200c)==0x200c && |
833 | 5.86M | !isLabelOkContextJ(label, labelLength) |
834 | 5.86M | ) { |
835 | 0 | info.labelErrors|=UIDNA_ERROR_CONTEXTJ; |
836 | 0 | } |
837 | 5.86M | if((options&UIDNA_CHECK_CONTEXTO)!=0 && oredChars>=0xb7) { |
838 | 0 | checkLabelContextO(label, labelLength, info); |
839 | 0 | } |
840 | 5.86M | if(toASCII) { |
841 | 5.86M | if(wasPunycode) { |
842 | | // Leave a Punycode label unchanged if it has no severe errors. |
843 | 866 | if(destLabelLength>63) { |
844 | 236 | info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
845 | 236 | } |
846 | 866 | return destLabelLength; |
847 | 5.86M | } else if(oredChars>=0x80) { |
848 | | // Contains non-ASCII characters. |
849 | 5.86M | UnicodeString punycode; |
850 | 5.86M | UChar *buffer=punycode.getBuffer(63); // 63==maximum DNS label length |
851 | 5.86M | if(buffer==NULL) { |
852 | 0 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
853 | 0 | return destLabelLength; |
854 | 0 | } |
855 | 5.86M | buffer[0]=0x78; // Write "xn--". |
856 | 5.86M | buffer[1]=0x6e; |
857 | 5.86M | buffer[2]=0x2d; |
858 | 5.86M | buffer[3]=0x2d; |
859 | 5.86M | int32_t punycodeLength=u_strToPunycode(label, labelLength, |
860 | 5.86M | buffer+4, punycode.getCapacity()-4, |
861 | 5.86M | NULL, &errorCode); |
862 | 5.86M | if(errorCode==U_BUFFER_OVERFLOW_ERROR) { |
863 | 872 | errorCode=U_ZERO_ERROR; |
864 | 872 | punycode.releaseBuffer(4); |
865 | 872 | buffer=punycode.getBuffer(4+punycodeLength); |
866 | 872 | if(buffer==NULL) { |
867 | 0 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
868 | 0 | return destLabelLength; |
869 | 0 | } |
870 | 872 | punycodeLength=u_strToPunycode(label, labelLength, |
871 | 872 | buffer+4, punycode.getCapacity()-4, |
872 | 872 | NULL, &errorCode); |
873 | 872 | } |
874 | 5.86M | punycodeLength+=4; |
875 | 5.86M | punycode.releaseBuffer(punycodeLength); |
876 | 5.86M | if(U_FAILURE(errorCode)) { |
877 | 207 | return destLabelLength; |
878 | 207 | } |
879 | 5.86M | if(punycodeLength>63) { |
880 | 1.56k | info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
881 | 1.56k | } |
882 | 5.86M | return replaceLabel(dest, destLabelStart, destLabelLength, |
883 | 5.86M | punycode, punycodeLength, errorCode); |
884 | 5.86M | } else { |
885 | | // all-ASCII label |
886 | 5.20k | if(labelLength>63) { |
887 | 206 | info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
888 | 206 | } |
889 | 5.20k | } |
890 | 5.86M | } |
891 | 5.86M | } else { |
892 | | // If a Punycode label has severe errors, |
893 | | // then leave it but make sure it does not look valid. |
894 | 720k | if(wasPunycode) { |
895 | 695 | info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; |
896 | 695 | return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info, errorCode); |
897 | 695 | } |
898 | 720k | } |
899 | 724k | return replaceLabel(dest, destLabelStart, destLabelLength, |
900 | 724k | *labelString, labelLength, errorCode); |
901 | 6.58M | } |
902 | | |
903 | | // Make sure an ACE label does not look valid. |
904 | | // Append U+FFFD if the label has only LDH characters. |
905 | | // If UIDNA_USE_STD3_RULES, also replace disallowed ASCII characters with U+FFFD. |
906 | | int32_t |
907 | | UTS46::markBadACELabel(UnicodeString &dest, |
908 | | int32_t labelStart, int32_t labelLength, |
909 | 6.36k | UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const { |
910 | 6.36k | if(U_FAILURE(errorCode)) { |
911 | 0 | return 0; |
912 | 0 | } |
913 | 6.36k | UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; |
914 | 6.36k | UBool isASCII=TRUE; |
915 | 6.36k | UBool onlyLDH=TRUE; |
916 | 6.36k | const UChar *label=dest.getBuffer()+labelStart; |
917 | | // Ok to cast away const because we own the UnicodeString. |
918 | 6.36k | UChar *s=(UChar *)label+4; // After the initial "xn--". |
919 | 6.36k | const UChar *limit=label+labelLength; |
920 | 54.8k | do { |
921 | 54.8k | UChar c=*s; |
922 | 54.8k | if(c<=0x7f) { |
923 | 43.3k | if(c==0x2e) { |
924 | 0 | info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; |
925 | 0 | *s=0xfffd; |
926 | 0 | isASCII=onlyLDH=FALSE; |
927 | 43.3k | } else if(asciiData[c]<0) { |
928 | 1.82k | onlyLDH=FALSE; |
929 | 1.82k | if(disallowNonLDHDot) { |
930 | 1.82k | *s=0xfffd; |
931 | 1.82k | isASCII=FALSE; |
932 | 1.82k | } |
933 | 1.82k | } |
934 | 43.3k | } else { |
935 | 11.4k | isASCII=onlyLDH=FALSE; |
936 | 11.4k | } |
937 | 54.8k | } while(++s<limit); |
938 | 6.36k | if(onlyLDH) { |
939 | 3.07k | dest.insert(labelStart+labelLength, (UChar)0xfffd); |
940 | 3.07k | if(dest.isBogus()) { |
941 | 0 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
942 | 0 | return 0; |
943 | 0 | } |
944 | 3.07k | ++labelLength; |
945 | 3.28k | } else { |
946 | 3.28k | if(toASCII && isASCII && labelLength>63) { |
947 | 0 | info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
948 | 0 | } |
949 | 3.28k | } |
950 | 6.36k | return labelLength; |
951 | 6.36k | } |
952 | | |
953 | | const uint32_t L_MASK=U_MASK(U_LEFT_TO_RIGHT); |
954 | | const uint32_t R_AL_MASK=U_MASK(U_RIGHT_TO_LEFT)|U_MASK(U_RIGHT_TO_LEFT_ARABIC); |
955 | | const uint32_t L_R_AL_MASK=L_MASK|R_AL_MASK; |
956 | | |
957 | | const uint32_t R_AL_AN_MASK=R_AL_MASK|U_MASK(U_ARABIC_NUMBER); |
958 | | |
959 | | const uint32_t EN_AN_MASK=U_MASK(U_EUROPEAN_NUMBER)|U_MASK(U_ARABIC_NUMBER); |
960 | | const uint32_t R_AL_EN_AN_MASK=R_AL_MASK|EN_AN_MASK; |
961 | | const uint32_t L_EN_MASK=L_MASK|U_MASK(U_EUROPEAN_NUMBER); |
962 | | |
963 | | const uint32_t ES_CS_ET_ON_BN_NSM_MASK= |
964 | | U_MASK(U_EUROPEAN_NUMBER_SEPARATOR)| |
965 | | U_MASK(U_COMMON_NUMBER_SEPARATOR)| |
966 | | U_MASK(U_EUROPEAN_NUMBER_TERMINATOR)| |
967 | | U_MASK(U_OTHER_NEUTRAL)| |
968 | | U_MASK(U_BOUNDARY_NEUTRAL)| |
969 | | U_MASK(U_DIR_NON_SPACING_MARK); |
970 | | const uint32_t L_EN_ES_CS_ET_ON_BN_NSM_MASK=L_EN_MASK|ES_CS_ET_ON_BN_NSM_MASK; |
971 | | const uint32_t R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK=R_AL_MASK|EN_AN_MASK|ES_CS_ET_ON_BN_NSM_MASK; |
972 | | |
973 | | // We scan the whole label and check both for whether it contains RTL characters |
974 | | // and whether it passes the BiDi Rule. |
975 | | // In a BiDi domain name, all labels must pass the BiDi Rule, but we might find |
976 | | // that a domain name is a BiDi domain name (has an RTL label) only after |
977 | | // processing several earlier labels. |
978 | | void |
979 | 0 | UTS46::checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const { |
980 | | // IDNA2008 BiDi rule |
981 | | // Get the directionality of the first character. |
982 | 0 | UChar32 c; |
983 | 0 | int32_t i=0; |
984 | 0 | U16_NEXT_UNSAFE(label, i, c); |
985 | 0 | uint32_t firstMask=U_MASK(u_charDirection(c)); |
986 | | // 1. The first character must be a character with BIDI property L, R |
987 | | // or AL. If it has the R or AL property, it is an RTL label; if it |
988 | | // has the L property, it is an LTR label. |
989 | 0 | if((firstMask&~L_R_AL_MASK)!=0) { |
990 | 0 | info.isOkBiDi=FALSE; |
991 | 0 | } |
992 | | // Get the directionality of the last non-NSM character. |
993 | 0 | uint32_t lastMask; |
994 | 0 | for(;;) { |
995 | 0 | if(i>=labelLength) { |
996 | 0 | lastMask=firstMask; |
997 | 0 | break; |
998 | 0 | } |
999 | 0 | U16_PREV_UNSAFE(label, labelLength, c); |
1000 | 0 | UCharDirection dir=u_charDirection(c); |
1001 | 0 | if(dir!=U_DIR_NON_SPACING_MARK) { |
1002 | 0 | lastMask=U_MASK(dir); |
1003 | 0 | break; |
1004 | 0 | } |
1005 | 0 | } |
1006 | | // 3. In an RTL label, the end of the label must be a character with |
1007 | | // BIDI property R, AL, EN or AN, followed by zero or more |
1008 | | // characters with BIDI property NSM. |
1009 | | // 6. In an LTR label, the end of the label must be a character with |
1010 | | // BIDI property L or EN, followed by zero or more characters with |
1011 | | // BIDI property NSM. |
1012 | 0 | if( (firstMask&L_MASK)!=0 ? |
1013 | 0 | (lastMask&~L_EN_MASK)!=0 : |
1014 | 0 | (lastMask&~R_AL_EN_AN_MASK)!=0 |
1015 | 0 | ) { |
1016 | 0 | info.isOkBiDi=FALSE; |
1017 | 0 | } |
1018 | | // Get the directionalities of the intervening characters. |
1019 | 0 | uint32_t mask=0; |
1020 | 0 | while(i<labelLength) { |
1021 | 0 | U16_NEXT_UNSAFE(label, i, c); |
1022 | 0 | mask|=U_MASK(u_charDirection(c)); |
1023 | 0 | } |
1024 | 0 | if(firstMask&L_MASK) { |
1025 | | // 5. In an LTR label, only characters with the BIDI properties L, EN, |
1026 | | // ES, CS, ET, ON, BN and NSM are allowed. |
1027 | 0 | if((mask&~L_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) { |
1028 | 0 | info.isOkBiDi=FALSE; |
1029 | 0 | } |
1030 | 0 | } else { |
1031 | | // 2. In an RTL label, only characters with the BIDI properties R, AL, |
1032 | | // AN, EN, ES, CS, ET, ON, BN and NSM are allowed. |
1033 | 0 | if((mask&~R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) { |
1034 | 0 | info.isOkBiDi=FALSE; |
1035 | 0 | } |
1036 | | // 4. In an RTL label, if an EN is present, no AN may be present, and |
1037 | | // vice versa. |
1038 | 0 | if((mask&EN_AN_MASK)==EN_AN_MASK) { |
1039 | 0 | info.isOkBiDi=FALSE; |
1040 | 0 | } |
1041 | 0 | } |
1042 | | // An RTL label is a label that contains at least one character of type |
1043 | | // R, AL or AN. [...] |
1044 | | // A "BIDI domain name" is a domain name that contains at least one RTL |
1045 | | // label. [...] |
1046 | | // The following rule, consisting of six conditions, applies to labels |
1047 | | // in BIDI domain names. |
1048 | 0 | if(((firstMask|mask|lastMask)&R_AL_AN_MASK)!=0) { |
1049 | 0 | info.isBiDi=TRUE; |
1050 | 0 | } |
1051 | 0 | } |
1052 | | |
1053 | | // Special code for the ASCII prefix of a BiDi domain name. |
1054 | | // The ASCII prefix is all-LTR. |
1055 | | |
1056 | | // IDNA2008 BiDi rule, parts relevant to ASCII labels: |
1057 | | // 1. The first character must be a character with BIDI property L [...] |
1058 | | // 5. In an LTR label, only characters with the BIDI properties L, EN, |
1059 | | // ES, CS, ET, ON, BN and NSM are allowed. |
1060 | | // 6. In an LTR label, the end of the label must be a character with |
1061 | | // BIDI property L or EN [...] |
1062 | | |
1063 | | // UTF-16 version, called for mapped ASCII prefix. |
1064 | | // Cannot contain uppercase A-Z. |
1065 | | // s[length-1] must be the trailing dot. |
1066 | | static UBool |
1067 | 0 | isASCIIOkBiDi(const UChar *s, int32_t length) { |
1068 | 0 | int32_t labelStart=0; |
1069 | 0 | for(int32_t i=0; i<length; ++i) { |
1070 | 0 | UChar c=s[i]; |
1071 | 0 | if(c==0x2e) { // dot |
1072 | 0 | if(i>labelStart) { |
1073 | 0 | c=s[i-1]; |
1074 | 0 | if(!(0x61<=c && c<=0x7a) && !(0x30<=c && c<=0x39)) { |
1075 | | // Last character in the label is not an L or EN. |
1076 | 0 | return FALSE; |
1077 | 0 | } |
1078 | 0 | } |
1079 | 0 | labelStart=i+1; |
1080 | 0 | } else if(i==labelStart) { |
1081 | 0 | if(!(0x61<=c && c<=0x7a)) { |
1082 | | // First character in the label is not an L. |
1083 | 0 | return FALSE; |
1084 | 0 | } |
1085 | 0 | } else { |
1086 | 0 | if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) { |
1087 | | // Intermediate character in the label is a B, S or WS. |
1088 | 0 | return FALSE; |
1089 | 0 | } |
1090 | 0 | } |
1091 | 0 | } |
1092 | 0 | return TRUE; |
1093 | 0 | } |
1094 | | |
1095 | | // UTF-8 version, called for source ASCII prefix. |
1096 | | // Can contain uppercase A-Z. |
1097 | | // s[length-1] must be the trailing dot. |
1098 | | static UBool |
1099 | 0 | isASCIIOkBiDi(const char *s, int32_t length) { |
1100 | 0 | int32_t labelStart=0; |
1101 | 0 | for(int32_t i=0; i<length; ++i) { |
1102 | 0 | char c=s[i]; |
1103 | 0 | if(c==0x2e) { // dot |
1104 | 0 | if(i>labelStart) { |
1105 | 0 | c=s[i-1]; |
1106 | 0 | if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a) && !(0x30<=c && c<=0x39)) { |
1107 | | // Last character in the label is not an L or EN. |
1108 | 0 | return FALSE; |
1109 | 0 | } |
1110 | 0 | } |
1111 | 0 | labelStart=i+1; |
1112 | 0 | } else if(i==labelStart) { |
1113 | 0 | if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a)) { |
1114 | | // First character in the label is not an L. |
1115 | 0 | return FALSE; |
1116 | 0 | } |
1117 | 0 | } else { |
1118 | 0 | if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) { |
1119 | | // Intermediate character in the label is a B, S or WS. |
1120 | 0 | return FALSE; |
1121 | 0 | } |
1122 | 0 | } |
1123 | 0 | } |
1124 | 0 | return TRUE; |
1125 | 0 | } |
1126 | | |
1127 | | UBool |
1128 | 0 | UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const { |
1129 | 0 | const UBiDiProps *bdp=ubidi_getSingleton(); |
1130 | | // [IDNA2008-Tables] |
1131 | | // 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER |
1132 | 0 | for(int32_t i=0; i<labelLength; ++i) { |
1133 | 0 | if(label[i]==0x200c) { |
1134 | | // Appendix A.1. ZERO WIDTH NON-JOINER |
1135 | | // Rule Set: |
1136 | | // False; |
1137 | | // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True; |
1138 | | // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C |
1139 | | // (Joining_Type:T)*(Joining_Type:{R,D})) Then True; |
1140 | 0 | if(i==0) { |
1141 | 0 | return FALSE; |
1142 | 0 | } |
1143 | 0 | UChar32 c; |
1144 | 0 | int32_t j=i; |
1145 | 0 | U16_PREV_UNSAFE(label, j, c); |
1146 | 0 | if(uts46Norm2.getCombiningClass(c)==9) { |
1147 | 0 | continue; |
1148 | 0 | } |
1149 | | // check precontext (Joining_Type:{L,D})(Joining_Type:T)* |
1150 | 0 | for(;;) { |
1151 | 0 | UJoiningType type=ubidi_getJoiningType(bdp, c); |
1152 | 0 | if(type==U_JT_TRANSPARENT) { |
1153 | 0 | if(j==0) { |
1154 | 0 | return FALSE; |
1155 | 0 | } |
1156 | 0 | U16_PREV_UNSAFE(label, j, c); |
1157 | 0 | } else if(type==U_JT_LEFT_JOINING || type==U_JT_DUAL_JOINING) { |
1158 | 0 | break; // precontext fulfilled |
1159 | 0 | } else { |
1160 | 0 | return FALSE; |
1161 | 0 | } |
1162 | 0 | } |
1163 | | // check postcontext (Joining_Type:T)*(Joining_Type:{R,D}) |
1164 | 0 | for(j=i+1;;) { |
1165 | 0 | if(j==labelLength) { |
1166 | 0 | return FALSE; |
1167 | 0 | } |
1168 | 0 | U16_NEXT_UNSAFE(label, j, c); |
1169 | 0 | UJoiningType type=ubidi_getJoiningType(bdp, c); |
1170 | 0 | if(type==U_JT_TRANSPARENT) { |
1171 | | // just skip this character |
1172 | 0 | } else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) { |
1173 | 0 | break; // postcontext fulfilled |
1174 | 0 | } else { |
1175 | 0 | return FALSE; |
1176 | 0 | } |
1177 | 0 | } |
1178 | 0 | } else if(label[i]==0x200d) { |
1179 | | // Appendix A.2. ZERO WIDTH JOINER (U+200D) |
1180 | | // Rule Set: |
1181 | | // False; |
1182 | | // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True; |
1183 | 0 | if(i==0) { |
1184 | 0 | return FALSE; |
1185 | 0 | } |
1186 | 0 | UChar32 c; |
1187 | 0 | int32_t j=i; |
1188 | 0 | U16_PREV_UNSAFE(label, j, c); |
1189 | 0 | if(uts46Norm2.getCombiningClass(c)!=9) { |
1190 | 0 | return FALSE; |
1191 | 0 | } |
1192 | 0 | } |
1193 | 0 | } |
1194 | 0 | return TRUE; |
1195 | 0 | } |
1196 | | |
1197 | | void |
1198 | 0 | UTS46::checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const { |
1199 | 0 | int32_t labelEnd=labelLength-1; // inclusive |
1200 | 0 | int32_t arabicDigits=0; // -1 for 066x, +1 for 06Fx |
1201 | 0 | for(int32_t i=0; i<=labelEnd; ++i) { |
1202 | 0 | UChar32 c=label[i]; |
1203 | 0 | if(c<0xb7) { |
1204 | | // ASCII fastpath |
1205 | 0 | } else if(c<=0x6f9) { |
1206 | 0 | if(c==0xb7) { |
1207 | | // Appendix A.3. MIDDLE DOT (U+00B7) |
1208 | | // Rule Set: |
1209 | | // False; |
1210 | | // If Before(cp) .eq. U+006C And |
1211 | | // After(cp) .eq. U+006C Then True; |
1212 | 0 | if(!(0<i && label[i-1]==0x6c && |
1213 | 0 | i<labelEnd && label[i+1]==0x6c)) { |
1214 | 0 | info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; |
1215 | 0 | } |
1216 | 0 | } else if(c==0x375) { |
1217 | | // Appendix A.4. GREEK LOWER NUMERAL SIGN (KERAIA) (U+0375) |
1218 | | // Rule Set: |
1219 | | // False; |
1220 | | // If Script(After(cp)) .eq. Greek Then True; |
1221 | 0 | UScriptCode script=USCRIPT_INVALID_CODE; |
1222 | 0 | if(i<labelEnd) { |
1223 | 0 | UErrorCode errorCode=U_ZERO_ERROR; |
1224 | 0 | int32_t j=i+1; |
1225 | 0 | U16_NEXT(label, j, labelLength, c); |
1226 | 0 | script=uscript_getScript(c, &errorCode); |
1227 | 0 | } |
1228 | 0 | if(script!=USCRIPT_GREEK) { |
1229 | 0 | info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; |
1230 | 0 | } |
1231 | 0 | } else if(c==0x5f3 || c==0x5f4) { |
1232 | | // Appendix A.5. HEBREW PUNCTUATION GERESH (U+05F3) |
1233 | | // Rule Set: |
1234 | | // False; |
1235 | | // If Script(Before(cp)) .eq. Hebrew Then True; |
1236 | | // |
1237 | | // Appendix A.6. HEBREW PUNCTUATION GERSHAYIM (U+05F4) |
1238 | | // Rule Set: |
1239 | | // False; |
1240 | | // If Script(Before(cp)) .eq. Hebrew Then True; |
1241 | 0 | UScriptCode script=USCRIPT_INVALID_CODE; |
1242 | 0 | if(0<i) { |
1243 | 0 | UErrorCode errorCode=U_ZERO_ERROR; |
1244 | 0 | int32_t j=i; |
1245 | 0 | U16_PREV(label, 0, j, c); |
1246 | 0 | script=uscript_getScript(c, &errorCode); |
1247 | 0 | } |
1248 | 0 | if(script!=USCRIPT_HEBREW) { |
1249 | 0 | info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; |
1250 | 0 | } |
1251 | 0 | } else if(0x660<=c /* && c<=0x6f9 */) { |
1252 | | // Appendix A.8. ARABIC-INDIC DIGITS (0660..0669) |
1253 | | // Rule Set: |
1254 | | // True; |
1255 | | // For All Characters: |
1256 | | // If cp .in. 06F0..06F9 Then False; |
1257 | | // End For; |
1258 | | // |
1259 | | // Appendix A.9. EXTENDED ARABIC-INDIC DIGITS (06F0..06F9) |
1260 | | // Rule Set: |
1261 | | // True; |
1262 | | // For All Characters: |
1263 | | // If cp .in. 0660..0669 Then False; |
1264 | | // End For; |
1265 | 0 | if(c<=0x669) { |
1266 | 0 | if(arabicDigits>0) { |
1267 | 0 | info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS; |
1268 | 0 | } |
1269 | 0 | arabicDigits=-1; |
1270 | 0 | } else if(0x6f0<=c) { |
1271 | 0 | if(arabicDigits<0) { |
1272 | 0 | info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS; |
1273 | 0 | } |
1274 | 0 | arabicDigits=1; |
1275 | 0 | } |
1276 | 0 | } |
1277 | 0 | } else if(c==0x30fb) { |
1278 | | // Appendix A.7. KATAKANA MIDDLE DOT (U+30FB) |
1279 | | // Rule Set: |
1280 | | // False; |
1281 | | // For All Characters: |
1282 | | // If Script(cp) .in. {Hiragana, Katakana, Han} Then True; |
1283 | | // End For; |
1284 | 0 | UErrorCode errorCode=U_ZERO_ERROR; |
1285 | 0 | for(int j=0;;) { |
1286 | 0 | if(j>labelEnd) { |
1287 | 0 | info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; |
1288 | 0 | break; |
1289 | 0 | } |
1290 | 0 | U16_NEXT(label, j, labelLength, c); |
1291 | 0 | UScriptCode script=uscript_getScript(c, &errorCode); |
1292 | 0 | if(script==USCRIPT_HIRAGANA || script==USCRIPT_KATAKANA || script==USCRIPT_HAN) { |
1293 | 0 | break; |
1294 | 0 | } |
1295 | 0 | } |
1296 | 0 | } |
1297 | 0 | } |
1298 | 0 | } |
1299 | | |
1300 | | U_NAMESPACE_END |
1301 | | |
1302 | | // C API ------------------------------------------------------------------- *** |
1303 | | |
1304 | | U_NAMESPACE_USE |
1305 | | |
1306 | | U_CAPI UIDNA * U_EXPORT2 |
1307 | 4.45k | uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode) { |
1308 | 4.45k | return reinterpret_cast<UIDNA *>(IDNA::createUTS46Instance(options, *pErrorCode)); |
1309 | 4.45k | } |
1310 | | |
1311 | | U_CAPI void U_EXPORT2 |
1312 | 4.45k | uidna_close(UIDNA *idna) { |
1313 | 4.45k | delete reinterpret_cast<IDNA *>(idna); |
1314 | 4.45k | } |
1315 | | |
1316 | | static UBool |
1317 | | checkArgs(const void *label, int32_t length, |
1318 | | void *dest, int32_t capacity, |
1319 | 6.95M | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { |
1320 | 6.95M | if(U_FAILURE(*pErrorCode)) { |
1321 | 0 | return FALSE; |
1322 | 0 | } |
1323 | | // sizeof(UIDNAInfo)=16 in the first API version. |
1324 | 6.95M | if(pInfo==NULL || pInfo->size<16) { |
1325 | 0 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
1326 | 0 | return FALSE; |
1327 | 0 | } |
1328 | 6.95M | if( (label==NULL ? length!=0 : length<-1) || |
1329 | 6.95M | (dest==NULL ? capacity!=0 : capacity<0) || |
1330 | 6.95M | (dest==label && label!=NULL) |
1331 | 6.95M | ) { |
1332 | 0 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
1333 | 0 | return FALSE; |
1334 | 0 | } |
1335 | | // Set all *pInfo bytes to 0 except for the size field itself. |
1336 | 6.95M | uprv_memset(&pInfo->size+1, 0, pInfo->size-sizeof(pInfo->size)); |
1337 | 6.95M | return TRUE; |
1338 | 6.95M | } |
1339 | | |
1340 | | static void |
1341 | 6.95M | idnaInfoToStruct(IDNAInfo &info, UIDNAInfo *pInfo) { |
1342 | 6.95M | pInfo->isTransitionalDifferent=info.isTransitionalDifferent(); |
1343 | 6.95M | pInfo->errors=info.getErrors(); |
1344 | 6.95M | } |
1345 | | |
1346 | | U_CAPI int32_t U_EXPORT2 |
1347 | | uidna_labelToASCII(const UIDNA *idna, |
1348 | | const UChar *label, int32_t length, |
1349 | | UChar *dest, int32_t capacity, |
1350 | 0 | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { |
1351 | 0 | if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { |
1352 | 0 | return 0; |
1353 | 0 | } |
1354 | 0 | UnicodeString src((UBool)(length<0), label, length); |
1355 | 0 | UnicodeString destString(dest, 0, capacity); |
1356 | 0 | IDNAInfo info; |
1357 | 0 | reinterpret_cast<const IDNA *>(idna)->labelToASCII(src, destString, info, *pErrorCode); |
1358 | 0 | idnaInfoToStruct(info, pInfo); |
1359 | 0 | return destString.extract(dest, capacity, *pErrorCode); |
1360 | 0 | } |
1361 | | |
1362 | | U_CAPI int32_t U_EXPORT2 |
1363 | | uidna_labelToUnicode(const UIDNA *idna, |
1364 | | const UChar *label, int32_t length, |
1365 | | UChar *dest, int32_t capacity, |
1366 | 0 | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { |
1367 | 0 | if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { |
1368 | 0 | return 0; |
1369 | 0 | } |
1370 | 0 | UnicodeString src((UBool)(length<0), label, length); |
1371 | 0 | UnicodeString destString(dest, 0, capacity); |
1372 | 0 | IDNAInfo info; |
1373 | 0 | reinterpret_cast<const IDNA *>(idna)->labelToUnicode(src, destString, info, *pErrorCode); |
1374 | 0 | idnaInfoToStruct(info, pInfo); |
1375 | 0 | return destString.extract(dest, capacity, *pErrorCode); |
1376 | 0 | } |
1377 | | |
1378 | | U_CAPI int32_t U_EXPORT2 |
1379 | | uidna_nameToASCII(const UIDNA *idna, |
1380 | | const UChar *name, int32_t length, |
1381 | | UChar *dest, int32_t capacity, |
1382 | 6.95M | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { |
1383 | 6.95M | if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { |
1384 | 0 | return 0; |
1385 | 0 | } |
1386 | 6.95M | UnicodeString src((UBool)(length<0), name, length); |
1387 | 6.95M | UnicodeString destString(dest, 0, capacity); |
1388 | 6.95M | IDNAInfo info; |
1389 | 6.95M | reinterpret_cast<const IDNA *>(idna)->nameToASCII(src, destString, info, *pErrorCode); |
1390 | 6.95M | idnaInfoToStruct(info, pInfo); |
1391 | 6.95M | return destString.extract(dest, capacity, *pErrorCode); |
1392 | 6.95M | } |
1393 | | |
1394 | | U_CAPI int32_t U_EXPORT2 |
1395 | | uidna_nameToUnicode(const UIDNA *idna, |
1396 | | const UChar *name, int32_t length, |
1397 | | UChar *dest, int32_t capacity, |
1398 | 0 | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { |
1399 | 0 | if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { |
1400 | 0 | return 0; |
1401 | 0 | } |
1402 | 0 | UnicodeString src((UBool)(length<0), name, length); |
1403 | 0 | UnicodeString destString(dest, 0, capacity); |
1404 | 0 | IDNAInfo info; |
1405 | 0 | reinterpret_cast<const IDNA *>(idna)->nameToUnicode(src, destString, info, *pErrorCode); |
1406 | 0 | idnaInfoToStruct(info, pInfo); |
1407 | 0 | return destString.extract(dest, capacity, *pErrorCode); |
1408 | 0 | } |
1409 | | |
1410 | | U_CAPI int32_t U_EXPORT2 |
1411 | | uidna_labelToASCII_UTF8(const UIDNA *idna, |
1412 | | const char *label, int32_t length, |
1413 | | char *dest, int32_t capacity, |
1414 | 0 | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { |
1415 | 0 | if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { |
1416 | 0 | return 0; |
1417 | 0 | } |
1418 | 0 | StringPiece src(label, length<0 ? uprv_strlen(label) : length); |
1419 | 0 | CheckedArrayByteSink sink(dest, capacity); |
1420 | 0 | IDNAInfo info; |
1421 | 0 | reinterpret_cast<const IDNA *>(idna)->labelToASCII_UTF8(src, sink, info, *pErrorCode); |
1422 | 0 | idnaInfoToStruct(info, pInfo); |
1423 | 0 | return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); |
1424 | 0 | } |
1425 | | |
1426 | | U_CAPI int32_t U_EXPORT2 |
1427 | | uidna_labelToUnicodeUTF8(const UIDNA *idna, |
1428 | | const char *label, int32_t length, |
1429 | | char *dest, int32_t capacity, |
1430 | 0 | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { |
1431 | 0 | if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { |
1432 | 0 | return 0; |
1433 | 0 | } |
1434 | 0 | StringPiece src(label, length<0 ? uprv_strlen(label) : length); |
1435 | 0 | CheckedArrayByteSink sink(dest, capacity); |
1436 | 0 | IDNAInfo info; |
1437 | 0 | reinterpret_cast<const IDNA *>(idna)->labelToUnicodeUTF8(src, sink, info, *pErrorCode); |
1438 | 0 | idnaInfoToStruct(info, pInfo); |
1439 | 0 | return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); |
1440 | 0 | } |
1441 | | |
1442 | | U_CAPI int32_t U_EXPORT2 |
1443 | | uidna_nameToASCII_UTF8(const UIDNA *idna, |
1444 | | const char *name, int32_t length, |
1445 | | char *dest, int32_t capacity, |
1446 | 0 | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { |
1447 | 0 | if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { |
1448 | 0 | return 0; |
1449 | 0 | } |
1450 | 0 | StringPiece src(name, length<0 ? uprv_strlen(name) : length); |
1451 | 0 | CheckedArrayByteSink sink(dest, capacity); |
1452 | 0 | IDNAInfo info; |
1453 | 0 | reinterpret_cast<const IDNA *>(idna)->nameToASCII_UTF8(src, sink, info, *pErrorCode); |
1454 | 0 | idnaInfoToStruct(info, pInfo); |
1455 | 0 | return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); |
1456 | 0 | } |
1457 | | |
1458 | | U_CAPI int32_t U_EXPORT2 |
1459 | | uidna_nameToUnicodeUTF8(const UIDNA *idna, |
1460 | | const char *name, int32_t length, |
1461 | | char *dest, int32_t capacity, |
1462 | 0 | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { |
1463 | 0 | if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { |
1464 | 0 | return 0; |
1465 | 0 | } |
1466 | 0 | StringPiece src(name, length<0 ? uprv_strlen(name) : length); |
1467 | 0 | CheckedArrayByteSink sink(dest, capacity); |
1468 | 0 | IDNAInfo info; |
1469 | 0 | reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pErrorCode); |
1470 | 0 | idnaInfoToStruct(info, pInfo); |
1471 | 0 | return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); |
1472 | 0 | } |
1473 | | |
1474 | | #endif // UCONFIG_NO_IDNA |