/src/icu/source/common/uts46.cpp
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | // © 2016 and later: Unicode, Inc. and others. | 
| 2 |  | // License & terms of use: http://www.unicode.org/copyright.html | 
| 3 |  | /* | 
| 4 |  | ******************************************************************************* | 
| 5 |  | *   Copyright (C) 2010-2015, International Business Machines | 
| 6 |  | *   Corporation and others.  All Rights Reserved. | 
| 7 |  | ******************************************************************************* | 
| 8 |  | *   file name:  uts46.cpp | 
| 9 |  | *   encoding:   UTF-8 | 
| 10 |  | *   tab size:   8 (not used) | 
| 11 |  | *   indentation:4 | 
| 12 |  | * | 
| 13 |  | *   created on: 2010mar09 | 
| 14 |  | *   created by: Markus W. Scherer | 
| 15 |  | */ | 
| 16 |  |  | 
| 17 |  | #include "unicode/utypes.h" | 
| 18 |  |  | 
| 19 |  | #if !UCONFIG_NO_IDNA | 
| 20 |  |  | 
| 21 |  | #include "unicode/idna.h" | 
| 22 |  | #include "unicode/normalizer2.h" | 
| 23 |  | #include "unicode/uscript.h" | 
| 24 |  | #include "unicode/ustring.h" | 
| 25 |  | #include "unicode/utf16.h" | 
| 26 |  | #include "cmemory.h" | 
| 27 |  | #include "cstring.h" | 
| 28 |  | #include "punycode.h" | 
| 29 |  | #include "ubidi_props.h" | 
| 30 |  | #include "ustr_imp.h" | 
| 31 |  |  | 
| 32 |  | // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG: | 
| 33 |  | // | 
| 34 |  | // The domain name length limit is 255 octets in an internal DNS representation | 
| 35 |  | // where the last ("root") label is the empty label | 
| 36 |  | // represented by length byte 0 alone. | 
| 37 |  | // In a conventional string, this translates to 253 characters, or 254 | 
| 38 |  | // if there is a trailing dot for the root label. | 
| 39 |  |  | 
| 40 |  | U_NAMESPACE_BEGIN | 
| 41 |  |  | 
| 42 |  | // Severe errors which usually result in a U+FFFD replacement character in the result string. | 
| 43 |  | const uint32_t severeErrors= | 
| 44 |  |     UIDNA_ERROR_LEADING_COMBINING_MARK| | 
| 45 |  |     UIDNA_ERROR_DISALLOWED| | 
| 46 |  |     UIDNA_ERROR_PUNYCODE| | 
| 47 |  |     UIDNA_ERROR_LABEL_HAS_DOT| | 
| 48 |  |     UIDNA_ERROR_INVALID_ACE_LABEL; | 
| 49 |  |  | 
| 50 |  | static inline UBool | 
| 51 | 0 | isASCIIString(const UnicodeString &dest) { | 
| 52 | 0 |     const UChar *s=dest.getBuffer(); | 
| 53 | 0 |     const UChar *limit=s+dest.length(); | 
| 54 | 0 |     while(s<limit) { | 
| 55 | 0 |         if(*s++>0x7f) { | 
| 56 | 0 |             return FALSE; | 
| 57 | 0 |         } | 
| 58 | 0 |     } | 
| 59 | 0 |     return TRUE; | 
| 60 | 0 | } | 
| 61 |  |  | 
| 62 |  | static UBool | 
| 63 |  | isASCIIOkBiDi(const UChar *s, int32_t length); | 
| 64 |  |  | 
| 65 |  | static UBool | 
| 66 |  | isASCIIOkBiDi(const char *s, int32_t length); | 
| 67 |  |  | 
| 68 |  | // IDNA class default implementations -------------------------------------- *** | 
| 69 |  |  | 
| 70 | 0 | IDNA::~IDNA() {} | 
| 71 |  |  | 
| 72 |  | void | 
| 73 |  | IDNA::labelToASCII_UTF8(StringPiece label, ByteSink &dest, | 
| 74 | 0 |                         IDNAInfo &info, UErrorCode &errorCode) const { | 
| 75 | 0 |     if(U_SUCCESS(errorCode)) { | 
| 76 | 0 |         UnicodeString destString; | 
| 77 | 0 |         labelToASCII(UnicodeString::fromUTF8(label), destString, | 
| 78 | 0 |                      info, errorCode).toUTF8(dest); | 
| 79 | 0 |     } | 
| 80 | 0 | } | 
| 81 |  |  | 
| 82 |  | void | 
| 83 |  | IDNA::labelToUnicodeUTF8(StringPiece label, ByteSink &dest, | 
| 84 | 0 |                          IDNAInfo &info, UErrorCode &errorCode) const { | 
| 85 | 0 |     if(U_SUCCESS(errorCode)) { | 
| 86 | 0 |         UnicodeString destString; | 
| 87 | 0 |         labelToUnicode(UnicodeString::fromUTF8(label), destString, | 
| 88 | 0 |                        info, errorCode).toUTF8(dest); | 
| 89 | 0 |     } | 
| 90 | 0 | } | 
| 91 |  |  | 
| 92 |  | void | 
| 93 |  | IDNA::nameToASCII_UTF8(StringPiece name, ByteSink &dest, | 
| 94 | 0 |                        IDNAInfo &info, UErrorCode &errorCode) const { | 
| 95 | 0 |     if(U_SUCCESS(errorCode)) { | 
| 96 | 0 |         UnicodeString destString; | 
| 97 | 0 |         nameToASCII(UnicodeString::fromUTF8(name), destString, | 
| 98 | 0 |                     info, errorCode).toUTF8(dest); | 
| 99 | 0 |     } | 
| 100 | 0 | } | 
| 101 |  |  | 
| 102 |  | void | 
| 103 |  | IDNA::nameToUnicodeUTF8(StringPiece name, ByteSink &dest, | 
| 104 | 0 |                         IDNAInfo &info, UErrorCode &errorCode) const { | 
| 105 | 0 |     if(U_SUCCESS(errorCode)) { | 
| 106 | 0 |         UnicodeString destString; | 
| 107 | 0 |         nameToUnicode(UnicodeString::fromUTF8(name), destString, | 
| 108 | 0 |                       info, errorCode).toUTF8(dest); | 
| 109 | 0 |     } | 
| 110 | 0 | } | 
| 111 |  |  | 
| 112 |  | // UTS46 class declaration ------------------------------------------------- *** | 
| 113 |  |  | 
| 114 |  | class UTS46 : public IDNA { | 
| 115 |  | public: | 
| 116 |  |     UTS46(uint32_t options, UErrorCode &errorCode); | 
| 117 |  |     virtual ~UTS46(); | 
| 118 |  |  | 
| 119 |  |     virtual UnicodeString & | 
| 120 |  |     labelToASCII(const UnicodeString &label, UnicodeString &dest, | 
| 121 |  |                  IDNAInfo &info, UErrorCode &errorCode) const; | 
| 122 |  |  | 
| 123 |  |     virtual UnicodeString & | 
| 124 |  |     labelToUnicode(const UnicodeString &label, UnicodeString &dest, | 
| 125 |  |                    IDNAInfo &info, UErrorCode &errorCode) const; | 
| 126 |  |  | 
| 127 |  |     virtual UnicodeString & | 
| 128 |  |     nameToASCII(const UnicodeString &name, UnicodeString &dest, | 
| 129 |  |                 IDNAInfo &info, UErrorCode &errorCode) const; | 
| 130 |  |  | 
| 131 |  |     virtual UnicodeString & | 
| 132 |  |     nameToUnicode(const UnicodeString &name, UnicodeString &dest, | 
| 133 |  |                   IDNAInfo &info, UErrorCode &errorCode) const; | 
| 134 |  |  | 
| 135 |  |     virtual void | 
| 136 |  |     labelToASCII_UTF8(StringPiece label, ByteSink &dest, | 
| 137 |  |                       IDNAInfo &info, UErrorCode &errorCode) const; | 
| 138 |  |  | 
| 139 |  |     virtual void | 
| 140 |  |     labelToUnicodeUTF8(StringPiece label, ByteSink &dest, | 
| 141 |  |                        IDNAInfo &info, UErrorCode &errorCode) const; | 
| 142 |  |  | 
| 143 |  |     virtual void | 
| 144 |  |     nameToASCII_UTF8(StringPiece name, ByteSink &dest, | 
| 145 |  |                      IDNAInfo &info, UErrorCode &errorCode) const; | 
| 146 |  |  | 
| 147 |  |     virtual void | 
| 148 |  |     nameToUnicodeUTF8(StringPiece name, ByteSink &dest, | 
| 149 |  |                       IDNAInfo &info, UErrorCode &errorCode) const; | 
| 150 |  |  | 
| 151 |  | private: | 
| 152 |  |     UnicodeString & | 
| 153 |  |     process(const UnicodeString &src, | 
| 154 |  |             UBool isLabel, UBool toASCII, | 
| 155 |  |             UnicodeString &dest, | 
| 156 |  |             IDNAInfo &info, UErrorCode &errorCode) const; | 
| 157 |  |  | 
| 158 |  |     void | 
| 159 |  |     processUTF8(StringPiece src, | 
| 160 |  |                 UBool isLabel, UBool toASCII, | 
| 161 |  |                 ByteSink &dest, | 
| 162 |  |                 IDNAInfo &info, UErrorCode &errorCode) const; | 
| 163 |  |  | 
| 164 |  |     UnicodeString & | 
| 165 |  |     processUnicode(const UnicodeString &src, | 
| 166 |  |                    int32_t labelStart, int32_t mappingStart, | 
| 167 |  |                    UBool isLabel, UBool toASCII, | 
| 168 |  |                    UnicodeString &dest, | 
| 169 |  |                    IDNAInfo &info, UErrorCode &errorCode) const; | 
| 170 |  |  | 
| 171 |  |     // returns the new dest.length() | 
| 172 |  |     int32_t | 
| 173 |  |     mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart, | 
| 174 |  |                 UErrorCode &errorCode) const; | 
| 175 |  |  | 
| 176 |  |     // returns the new label length | 
| 177 |  |     int32_t | 
| 178 |  |     processLabel(UnicodeString &dest, | 
| 179 |  |                  int32_t labelStart, int32_t labelLength, | 
| 180 |  |                  UBool toASCII, | 
| 181 |  |                  IDNAInfo &info, UErrorCode &errorCode) const; | 
| 182 |  |     int32_t | 
| 183 |  |     markBadACELabel(UnicodeString &dest, | 
| 184 |  |                     int32_t labelStart, int32_t labelLength, | 
| 185 |  |                     UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const; | 
| 186 |  |  | 
| 187 |  |     void | 
| 188 |  |     checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const; | 
| 189 |  |  | 
| 190 |  |     UBool | 
| 191 |  |     isLabelOkContextJ(const UChar *label, int32_t labelLength) const; | 
| 192 |  |  | 
| 193 |  |     void | 
| 194 |  |     checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const; | 
| 195 |  |  | 
| 196 |  |     const Normalizer2 &uts46Norm2;  // uts46.nrm | 
| 197 |  |     uint32_t options; | 
| 198 |  | }; | 
| 199 |  |  | 
| 200 |  | IDNA * | 
| 201 | 0 | IDNA::createUTS46Instance(uint32_t options, UErrorCode &errorCode) { | 
| 202 | 0 |     if(U_SUCCESS(errorCode)) { | 
| 203 | 0 |         IDNA *idna=new UTS46(options, errorCode); | 
| 204 | 0 |         if(idna==NULL) { | 
| 205 | 0 |             errorCode=U_MEMORY_ALLOCATION_ERROR; | 
| 206 | 0 |         } else if(U_FAILURE(errorCode)) { | 
| 207 | 0 |             delete idna; | 
| 208 | 0 |             idna=NULL; | 
| 209 | 0 |         } | 
| 210 | 0 |         return idna; | 
| 211 | 0 |     } else { | 
| 212 | 0 |         return NULL; | 
| 213 | 0 |     } | 
| 214 | 0 | } | 
| 215 |  |  | 
| 216 |  | // UTS46 implementation ---------------------------------------------------- *** | 
| 217 |  |  | 
| 218 |  | UTS46::UTS46(uint32_t opt, UErrorCode &errorCode) | 
| 219 |  |         : uts46Norm2(*Normalizer2::getInstance(NULL, "uts46", UNORM2_COMPOSE, errorCode)), | 
| 220 | 0 |           options(opt) {} | 
| 221 |  |  | 
| 222 |  | UTS46::~UTS46() {} | 
| 223 |  |  | 
| 224 |  | UnicodeString & | 
| 225 |  | UTS46::labelToASCII(const UnicodeString &label, UnicodeString &dest, | 
| 226 | 0 |                     IDNAInfo &info, UErrorCode &errorCode) const { | 
| 227 | 0 |     return process(label, TRUE, TRUE, dest, info, errorCode); | 
| 228 | 0 | } | 
| 229 |  |  | 
| 230 |  | UnicodeString & | 
| 231 |  | UTS46::labelToUnicode(const UnicodeString &label, UnicodeString &dest, | 
| 232 | 0 |                       IDNAInfo &info, UErrorCode &errorCode) const { | 
| 233 | 0 |     return process(label, TRUE, FALSE, dest, info, errorCode); | 
| 234 | 0 | } | 
| 235 |  |  | 
| 236 |  | UnicodeString & | 
| 237 |  | UTS46::nameToASCII(const UnicodeString &name, UnicodeString &dest, | 
| 238 | 0 |                    IDNAInfo &info, UErrorCode &errorCode) const { | 
| 239 | 0 |     process(name, FALSE, TRUE, dest, info, errorCode); | 
| 240 | 0 |     if( dest.length()>=254 && (info.errors&UIDNA_ERROR_DOMAIN_NAME_TOO_LONG)==0 && | 
| 241 | 0 |         isASCIIString(dest) && | 
| 242 | 0 |         (dest.length()>254 || dest[253]!=0x2e) | 
| 243 | 0 |     ) { | 
| 244 | 0 |         info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; | 
| 245 | 0 |     } | 
| 246 | 0 |     return dest; | 
| 247 | 0 | } | 
| 248 |  |  | 
| 249 |  | UnicodeString & | 
| 250 |  | UTS46::nameToUnicode(const UnicodeString &name, UnicodeString &dest, | 
| 251 | 0 |                      IDNAInfo &info, UErrorCode &errorCode) const { | 
| 252 | 0 |     return process(name, FALSE, FALSE, dest, info, errorCode); | 
| 253 | 0 | } | 
| 254 |  |  | 
| 255 |  | void | 
| 256 |  | UTS46::labelToASCII_UTF8(StringPiece label, ByteSink &dest, | 
| 257 | 0 |                          IDNAInfo &info, UErrorCode &errorCode) const { | 
| 258 | 0 |     processUTF8(label, TRUE, TRUE, dest, info, errorCode); | 
| 259 | 0 | } | 
| 260 |  |  | 
| 261 |  | void | 
| 262 |  | UTS46::labelToUnicodeUTF8(StringPiece label, ByteSink &dest, | 
| 263 | 0 |                           IDNAInfo &info, UErrorCode &errorCode) const { | 
| 264 | 0 |     processUTF8(label, TRUE, FALSE, dest, info, errorCode); | 
| 265 | 0 | } | 
| 266 |  |  | 
| 267 |  | void | 
| 268 |  | UTS46::nameToASCII_UTF8(StringPiece name, ByteSink &dest, | 
| 269 | 0 |                         IDNAInfo &info, UErrorCode &errorCode) const { | 
| 270 | 0 |     processUTF8(name, FALSE, TRUE, dest, info, errorCode); | 
| 271 | 0 | } | 
| 272 |  |  | 
| 273 |  | void | 
| 274 |  | UTS46::nameToUnicodeUTF8(StringPiece name, ByteSink &dest, | 
| 275 | 0 |                          IDNAInfo &info, UErrorCode &errorCode) const { | 
| 276 | 0 |     processUTF8(name, FALSE, FALSE, dest, info, errorCode); | 
| 277 | 0 | } | 
| 278 |  |  | 
| 279 |  | // UTS #46 data for ASCII characters. | 
| 280 |  | // The normalizer (using uts46.nrm) maps uppercase ASCII letters to lowercase | 
| 281 |  | // and passes through all other ASCII characters. | 
| 282 |  | // If UIDNA_USE_STD3_RULES is set, then non-LDH characters are disallowed | 
| 283 |  | // using this data. | 
| 284 |  | // The ASCII fastpath also uses this data. | 
| 285 |  | // Values: -1=disallowed  0==valid  1==mapped (lowercase) | 
| 286 |  | static const int8_t asciiData[128]={ | 
| 287 |  |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
| 288 |  |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
| 289 |  |     // 002D..002E; valid  #  HYPHEN-MINUS..FULL STOP | 
| 290 |  |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0, -1, | 
| 291 |  |     // 0030..0039; valid  #  DIGIT ZERO..DIGIT NINE | 
| 292 |  |      0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1, -1, -1, -1, -1, -1, | 
| 293 |  |     // 0041..005A; mapped  #  LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z | 
| 294 |  |     -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, | 
| 295 |  |      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1, -1, -1, -1, -1, | 
| 296 |  |     // 0061..007A; valid  #  LATIN SMALL LETTER A..LATIN SMALL LETTER Z | 
| 297 |  |     -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, | 
| 298 |  |      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1, -1, -1, -1, -1 | 
| 299 |  | }; | 
| 300 |  |  | 
| 301 |  | UnicodeString & | 
| 302 |  | UTS46::process(const UnicodeString &src, | 
| 303 |  |                UBool isLabel, UBool toASCII, | 
| 304 |  |                UnicodeString &dest, | 
| 305 | 0 |                IDNAInfo &info, UErrorCode &errorCode) const { | 
| 306 |  |     // uts46Norm2.normalize() would do all of this error checking and setup, | 
| 307 |  |     // but with the ASCII fastpath we do not always call it, and do not | 
| 308 |  |     // call it first. | 
| 309 | 0 |     if(U_FAILURE(errorCode)) { | 
| 310 | 0 |         dest.setToBogus(); | 
| 311 | 0 |         return dest; | 
| 312 | 0 |     } | 
| 313 | 0 |     const UChar *srcArray=src.getBuffer(); | 
| 314 | 0 |     if(&dest==&src || srcArray==NULL) { | 
| 315 | 0 |         errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
| 316 | 0 |         dest.setToBogus(); | 
| 317 | 0 |         return dest; | 
| 318 | 0 |     } | 
| 319 |  |     // Arguments are fine, reset output values. | 
| 320 | 0 |     dest.remove(); | 
| 321 | 0 |     info.reset(); | 
| 322 | 0 |     int32_t srcLength=src.length(); | 
| 323 | 0 |     if(srcLength==0) { | 
| 324 | 0 |         info.errors|=UIDNA_ERROR_EMPTY_LABEL; | 
| 325 | 0 |         return dest; | 
| 326 | 0 |     } | 
| 327 | 0 |     UChar *destArray=dest.getBuffer(srcLength); | 
| 328 | 0 |     if(destArray==NULL) { | 
| 329 | 0 |         errorCode=U_MEMORY_ALLOCATION_ERROR; | 
| 330 | 0 |         return dest; | 
| 331 | 0 |     } | 
| 332 |  |     // ASCII fastpath | 
| 333 | 0 |     UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | 
| 334 | 0 |     int32_t labelStart=0; | 
| 335 | 0 |     int32_t i; | 
| 336 | 0 |     for(i=0;; ++i) { | 
| 337 | 0 |         if(i==srcLength) { | 
| 338 | 0 |             if(toASCII) { | 
| 339 | 0 |                 if((i-labelStart)>63) { | 
| 340 | 0 |                     info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 
| 341 | 0 |                 } | 
| 342 |  |                 // There is a trailing dot if labelStart==i. | 
| 343 | 0 |                 if(!isLabel && i>=254 && (i>254 || labelStart<i)) { | 
| 344 | 0 |                     info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; | 
| 345 | 0 |                 } | 
| 346 | 0 |             } | 
| 347 | 0 |             info.errors|=info.labelErrors; | 
| 348 | 0 |             dest.releaseBuffer(i); | 
| 349 | 0 |             return dest; | 
| 350 | 0 |         } | 
| 351 | 0 |         UChar c=srcArray[i]; | 
| 352 | 0 |         if(c>0x7f) { | 
| 353 | 0 |             break; | 
| 354 | 0 |         } | 
| 355 | 0 |         int cData=asciiData[c]; | 
| 356 | 0 |         if(cData>0) { | 
| 357 | 0 |             destArray[i]=c+0x20;  // Lowercase an uppercase ASCII letter. | 
| 358 | 0 |         } else if(cData<0 && disallowNonLDHDot) { | 
| 359 | 0 |             break;  // Replacing with U+FFFD can be complicated for toASCII. | 
| 360 | 0 |         } else { | 
| 361 | 0 |             destArray[i]=c; | 
| 362 | 0 |             if(c==0x2d) {  // hyphen | 
| 363 | 0 |                 if(i==(labelStart+3) && srcArray[i-1]==0x2d) { | 
| 364 |  |                     // "??--..." is Punycode or forbidden. | 
| 365 | 0 |                     ++i;  // '-' was copied to dest already | 
| 366 | 0 |                     break; | 
| 367 | 0 |                 } | 
| 368 | 0 |                 if(i==labelStart) { | 
| 369 |  |                     // label starts with "-" | 
| 370 | 0 |                     info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; | 
| 371 | 0 |                 } | 
| 372 | 0 |                 if((i+1)==srcLength || srcArray[i+1]==0x2e) { | 
| 373 |  |                     // label ends with "-" | 
| 374 | 0 |                     info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; | 
| 375 | 0 |                 } | 
| 376 | 0 |             } else if(c==0x2e) {  // dot | 
| 377 | 0 |                 if(isLabel) { | 
| 378 |  |                     // Replacing with U+FFFD can be complicated for toASCII. | 
| 379 | 0 |                     ++i;  // '.' was copied to dest already | 
| 380 | 0 |                     break; | 
| 381 | 0 |                 } | 
| 382 | 0 |                 if(i==labelStart) { | 
| 383 | 0 |                     info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | 
| 384 | 0 |                 } | 
| 385 | 0 |                 if(toASCII && (i-labelStart)>63) { | 
| 386 | 0 |                     info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 
| 387 | 0 |                 } | 
| 388 | 0 |                 info.errors|=info.labelErrors; | 
| 389 | 0 |                 info.labelErrors=0; | 
| 390 | 0 |                 labelStart=i+1; | 
| 391 | 0 |             } | 
| 392 | 0 |         } | 
| 393 | 0 |     } | 
| 394 | 0 |     info.errors|=info.labelErrors; | 
| 395 | 0 |     dest.releaseBuffer(i); | 
| 396 | 0 |     processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode); | 
| 397 | 0 |     if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 && | 
| 398 | 0 |         (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(dest.getBuffer(), labelStart))) | 
| 399 | 0 |     ) { | 
| 400 | 0 |         info.errors|=UIDNA_ERROR_BIDI; | 
| 401 | 0 |     } | 
| 402 | 0 |     return dest; | 
| 403 | 0 | } | 
| 404 |  |  | 
| 405 |  | void | 
| 406 |  | UTS46::processUTF8(StringPiece src, | 
| 407 |  |                    UBool isLabel, UBool toASCII, | 
| 408 |  |                    ByteSink &dest, | 
| 409 | 0 |                    IDNAInfo &info, UErrorCode &errorCode) const { | 
| 410 | 0 |     if(U_FAILURE(errorCode)) { | 
| 411 | 0 |         return; | 
| 412 | 0 |     } | 
| 413 | 0 |     const char *srcArray=src.data(); | 
| 414 | 0 |     int32_t srcLength=src.length(); | 
| 415 | 0 |     if(srcArray==NULL && srcLength!=0) { | 
| 416 | 0 |         errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
| 417 | 0 |         return; | 
| 418 | 0 |     } | 
| 419 |  |     // Arguments are fine, reset output values. | 
| 420 | 0 |     info.reset(); | 
| 421 | 0 |     if(srcLength==0) { | 
| 422 | 0 |         info.errors|=UIDNA_ERROR_EMPTY_LABEL; | 
| 423 | 0 |         dest.Flush(); | 
| 424 | 0 |         return; | 
| 425 | 0 |     } | 
| 426 | 0 |     UnicodeString destString; | 
| 427 | 0 |     int32_t labelStart=0; | 
| 428 | 0 |     if(srcLength<=256) {  // length of stackArray[] | 
| 429 |  |         // ASCII fastpath | 
| 430 | 0 |         char stackArray[256]; | 
| 431 | 0 |         int32_t destCapacity; | 
| 432 | 0 |         char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20, | 
| 433 | 0 |                                              stackArray, UPRV_LENGTHOF(stackArray), &destCapacity); | 
| 434 | 0 |         UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | 
| 435 | 0 |         int32_t i; | 
| 436 | 0 |         for(i=0;; ++i) { | 
| 437 | 0 |             if(i==srcLength) { | 
| 438 | 0 |                 if(toASCII) { | 
| 439 | 0 |                     if((i-labelStart)>63) { | 
| 440 | 0 |                         info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 
| 441 | 0 |                     } | 
| 442 |  |                     // There is a trailing dot if labelStart==i. | 
| 443 | 0 |                     if(!isLabel && i>=254 && (i>254 || labelStart<i)) { | 
| 444 | 0 |                         info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; | 
| 445 | 0 |                     } | 
| 446 | 0 |                 } | 
| 447 | 0 |                 info.errors|=info.labelErrors; | 
| 448 | 0 |                 dest.Append(destArray, i); | 
| 449 | 0 |                 dest.Flush(); | 
| 450 | 0 |                 return; | 
| 451 | 0 |             } | 
| 452 | 0 |             char c=srcArray[i]; | 
| 453 | 0 |             if((int8_t)c<0) {  // (uint8_t)c>0x7f | 
| 454 | 0 |                 break; | 
| 455 | 0 |             } | 
| 456 | 0 |             int cData=asciiData[(int)c];  // Cast: gcc warns about indexing with a char. | 
| 457 | 0 |             if(cData>0) { | 
| 458 | 0 |                 destArray[i]=c+0x20;  // Lowercase an uppercase ASCII letter. | 
| 459 | 0 |             } else if(cData<0 && disallowNonLDHDot) { | 
| 460 | 0 |                 break;  // Replacing with U+FFFD can be complicated for toASCII. | 
| 461 | 0 |             } else { | 
| 462 | 0 |                 destArray[i]=c; | 
| 463 | 0 |                 if(c==0x2d) {  // hyphen | 
| 464 | 0 |                     if(i==(labelStart+3) && srcArray[i-1]==0x2d) { | 
| 465 |  |                         // "??--..." is Punycode or forbidden. | 
| 466 | 0 |                         break; | 
| 467 | 0 |                     } | 
| 468 | 0 |                     if(i==labelStart) { | 
| 469 |  |                         // label starts with "-" | 
| 470 | 0 |                         info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; | 
| 471 | 0 |                     } | 
| 472 | 0 |                     if((i+1)==srcLength || srcArray[i+1]==0x2e) { | 
| 473 |  |                         // label ends with "-" | 
| 474 | 0 |                         info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; | 
| 475 | 0 |                     } | 
| 476 | 0 |                 } else if(c==0x2e) {  // dot | 
| 477 | 0 |                     if(isLabel) { | 
| 478 | 0 |                         break;  // Replacing with U+FFFD can be complicated for toASCII. | 
| 479 | 0 |                     } | 
| 480 | 0 |                     if(i==labelStart) { | 
| 481 | 0 |                         info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | 
| 482 | 0 |                     } | 
| 483 | 0 |                     if(toASCII && (i-labelStart)>63) { | 
| 484 | 0 |                         info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 
| 485 | 0 |                     } | 
| 486 | 0 |                     info.errors|=info.labelErrors; | 
| 487 | 0 |                     info.labelErrors=0; | 
| 488 | 0 |                     labelStart=i+1; | 
| 489 | 0 |                 } | 
| 490 | 0 |             } | 
| 491 | 0 |         } | 
| 492 | 0 |         info.errors|=info.labelErrors; | 
| 493 |  |         // Convert the processed ASCII prefix of the current label to UTF-16. | 
| 494 | 0 |         int32_t mappingStart=i-labelStart; | 
| 495 | 0 |         destString=UnicodeString::fromUTF8(StringPiece(destArray+labelStart, mappingStart)); | 
| 496 |  |         // Output the previous ASCII labels and process the rest of src in UTF-16. | 
| 497 | 0 |         dest.Append(destArray, labelStart); | 
| 498 | 0 |         processUnicode(UnicodeString::fromUTF8(StringPiece(src, labelStart)), 0, mappingStart, | 
| 499 | 0 |                        isLabel, toASCII, | 
| 500 | 0 |                        destString, info, errorCode); | 
| 501 | 0 |     } else { | 
| 502 |  |         // src is too long for the ASCII fastpath implementation. | 
| 503 | 0 |         processUnicode(UnicodeString::fromUTF8(src), 0, 0, | 
| 504 | 0 |                        isLabel, toASCII, | 
| 505 | 0 |                        destString, info, errorCode); | 
| 506 | 0 |     } | 
| 507 | 0 |     destString.toUTF8(dest);  // calls dest.Flush() | 
| 508 | 0 |     if(toASCII && !isLabel) { | 
| 509 |  |         // length==labelStart==254 means that there is a trailing dot (ok) and | 
| 510 |  |         // destString is empty (do not index at 253-labelStart). | 
| 511 | 0 |         int32_t length=labelStart+destString.length(); | 
| 512 | 0 |         if( length>=254 && isASCIIString(destString) && | 
| 513 | 0 |             (length>254 || | 
| 514 | 0 |              (labelStart<254 && destString[253-labelStart]!=0x2e)) | 
| 515 | 0 |         ) { | 
| 516 | 0 |             info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; | 
| 517 | 0 |         } | 
| 518 | 0 |     } | 
| 519 | 0 |     if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 && | 
| 520 | 0 |         (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(srcArray, labelStart))) | 
| 521 | 0 |     ) { | 
| 522 | 0 |         info.errors|=UIDNA_ERROR_BIDI; | 
| 523 | 0 |     } | 
| 524 | 0 | } | 
| 525 |  |  | 
| 526 |  | UnicodeString & | 
| 527 |  | UTS46::processUnicode(const UnicodeString &src, | 
| 528 |  |                       int32_t labelStart, int32_t mappingStart, | 
| 529 |  |                       UBool isLabel, UBool toASCII, | 
| 530 |  |                       UnicodeString &dest, | 
| 531 | 0 |                       IDNAInfo &info, UErrorCode &errorCode) const { | 
| 532 | 0 |     if(mappingStart==0) { | 
| 533 | 0 |         uts46Norm2.normalize(src, dest, errorCode); | 
| 534 | 0 |     } else { | 
| 535 | 0 |         uts46Norm2.normalizeSecondAndAppend(dest, src.tempSubString(mappingStart), errorCode); | 
| 536 | 0 |     } | 
| 537 | 0 |     if(U_FAILURE(errorCode)) { | 
| 538 | 0 |         return dest; | 
| 539 | 0 |     } | 
| 540 | 0 |     UBool doMapDevChars= | 
| 541 | 0 |         toASCII ? (options&UIDNA_NONTRANSITIONAL_TO_ASCII)==0 : | 
| 542 | 0 |                   (options&UIDNA_NONTRANSITIONAL_TO_UNICODE)==0; | 
| 543 | 0 |     const UChar *destArray=dest.getBuffer(); | 
| 544 | 0 |     int32_t destLength=dest.length(); | 
| 545 | 0 |     int32_t labelLimit=labelStart; | 
| 546 | 0 |     while(labelLimit<destLength) { | 
| 547 | 0 |         UChar c=destArray[labelLimit]; | 
| 548 | 0 |         if(c==0x2e && !isLabel) { | 
| 549 | 0 |             int32_t labelLength=labelLimit-labelStart; | 
| 550 | 0 |             int32_t newLength=processLabel(dest, labelStart, labelLength, | 
| 551 | 0 |                                             toASCII, info, errorCode); | 
| 552 | 0 |             info.errors|=info.labelErrors; | 
| 553 | 0 |             info.labelErrors=0; | 
| 554 | 0 |             if(U_FAILURE(errorCode)) { | 
| 555 | 0 |                 return dest; | 
| 556 | 0 |             } | 
| 557 | 0 |             destArray=dest.getBuffer(); | 
| 558 | 0 |             destLength+=newLength-labelLength; | 
| 559 | 0 |             labelLimit=labelStart+=newLength+1; | 
| 560 | 0 |         } else if(0xdf<=c && c<=0x200d && (c==0xdf || c==0x3c2 || c>=0x200c)) { | 
| 561 | 0 |             info.isTransDiff=TRUE; | 
| 562 | 0 |             if(doMapDevChars) { | 
| 563 | 0 |                 destLength=mapDevChars(dest, labelStart, labelLimit, errorCode); | 
| 564 | 0 |                 if(U_FAILURE(errorCode)) { | 
| 565 | 0 |                     return dest; | 
| 566 | 0 |                 } | 
| 567 | 0 |                 destArray=dest.getBuffer(); | 
| 568 |  |                 // Do not increment labelLimit in case c was removed. | 
| 569 |  |                 // All deviation characters have been mapped, no need to check for them again. | 
| 570 | 0 |                 doMapDevChars=FALSE; | 
| 571 | 0 |             } else { | 
| 572 | 0 |                 ++labelLimit; | 
| 573 | 0 |             } | 
| 574 | 0 |         } else { | 
| 575 | 0 |             ++labelLimit; | 
| 576 | 0 |         } | 
| 577 | 0 |     } | 
| 578 |  |     // Permit an empty label at the end (0<labelStart==labelLimit==destLength is ok) | 
| 579 |  |     // but not an empty label elsewhere nor a completely empty domain name. | 
| 580 |  |     // processLabel() sets UIDNA_ERROR_EMPTY_LABEL when labelLength==0. | 
| 581 | 0 |     if(0==labelStart || labelStart<labelLimit) { | 
| 582 | 0 |         processLabel(dest, labelStart, labelLimit-labelStart, | 
| 583 | 0 |                       toASCII, info, errorCode); | 
| 584 | 0 |         info.errors|=info.labelErrors; | 
| 585 | 0 |     } | 
| 586 | 0 |     return dest; | 
| 587 | 0 | } | 
| 588 |  |  | 
| 589 |  | int32_t | 
| 590 |  | UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart, | 
| 591 | 0 |                    UErrorCode &errorCode) const { | 
| 592 | 0 |     if(U_FAILURE(errorCode)) { | 
| 593 | 0 |         return 0; | 
| 594 | 0 |     } | 
| 595 | 0 |     int32_t length=dest.length(); | 
| 596 | 0 |     UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length); | 
| 597 | 0 |     if(s==NULL) { | 
| 598 | 0 |         errorCode=U_MEMORY_ALLOCATION_ERROR; | 
| 599 | 0 |         return length; | 
| 600 | 0 |     } | 
| 601 | 0 |     int32_t capacity=dest.getCapacity(); | 
| 602 | 0 |     UBool didMapDevChars=FALSE; | 
| 603 | 0 |     int32_t readIndex=mappingStart, writeIndex=mappingStart; | 
| 604 | 0 |     do { | 
| 605 | 0 |         UChar c=s[readIndex++]; | 
| 606 | 0 |         switch(c) { | 
| 607 | 0 |         case 0xdf: | 
| 608 |  |             // Map sharp s to ss. | 
| 609 | 0 |             didMapDevChars=TRUE; | 
| 610 | 0 |             s[writeIndex++]=0x73;  // Replace sharp s with first s. | 
| 611 |  |             // Insert second s and account for possible buffer reallocation. | 
| 612 | 0 |             if(writeIndex==readIndex) { | 
| 613 | 0 |                 if(length==capacity) { | 
| 614 | 0 |                     dest.releaseBuffer(length); | 
| 615 | 0 |                     s=dest.getBuffer(length+1); | 
| 616 | 0 |                     if(s==NULL) { | 
| 617 | 0 |                         errorCode=U_MEMORY_ALLOCATION_ERROR; | 
| 618 | 0 |                         return length; | 
| 619 | 0 |                     } | 
| 620 | 0 |                     capacity=dest.getCapacity(); | 
| 621 | 0 |                 } | 
| 622 | 0 |                 u_memmove(s+writeIndex+1, s+writeIndex, length-writeIndex); | 
| 623 | 0 |                 ++readIndex; | 
| 624 | 0 |             } | 
| 625 | 0 |             s[writeIndex++]=0x73; | 
| 626 | 0 |             ++length; | 
| 627 | 0 |             break; | 
| 628 | 0 |         case 0x3c2:  // Map final sigma to nonfinal sigma. | 
| 629 | 0 |             didMapDevChars=TRUE; | 
| 630 | 0 |             s[writeIndex++]=0x3c3; | 
| 631 | 0 |             break; | 
| 632 | 0 |         case 0x200c:  // Ignore/remove ZWNJ. | 
| 633 | 0 |         case 0x200d:  // Ignore/remove ZWJ. | 
| 634 | 0 |             didMapDevChars=TRUE; | 
| 635 | 0 |             --length; | 
| 636 | 0 |             break; | 
| 637 | 0 |         default: | 
| 638 |  |             // Only really necessary if writeIndex was different from readIndex. | 
| 639 | 0 |             s[writeIndex++]=c; | 
| 640 | 0 |             break; | 
| 641 | 0 |         } | 
| 642 | 0 |     } while(writeIndex<length); | 
| 643 | 0 |     dest.releaseBuffer(length); | 
| 644 | 0 |     if(didMapDevChars) { | 
| 645 |  |         // Mapping deviation characters might have resulted in an un-NFC string. | 
| 646 |  |         // We could use either the NFC or the UTS #46 normalizer. | 
| 647 |  |         // By using the UTS #46 normalizer again, we avoid having to load a second .nrm data file. | 
| 648 | 0 |         UnicodeString normalized; | 
| 649 | 0 |         uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCode); | 
| 650 | 0 |         if(U_SUCCESS(errorCode)) { | 
| 651 | 0 |             dest.replace(labelStart, 0x7fffffff, normalized); | 
| 652 | 0 |             if(dest.isBogus()) { | 
| 653 | 0 |                 errorCode=U_MEMORY_ALLOCATION_ERROR; | 
| 654 | 0 |             } | 
| 655 | 0 |             return dest.length(); | 
| 656 | 0 |         } | 
| 657 | 0 |     } | 
| 658 | 0 |     return length; | 
| 659 | 0 | } | 
| 660 |  |  | 
| 661 |  | // Some non-ASCII characters are equivalent to sequences with | 
| 662 |  | // non-LDH ASCII characters. To find them: | 
| 663 |  | // grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt) | 
| 664 |  | static inline UBool | 
| 665 | 0 | isNonASCIIDisallowedSTD3Valid(UChar32 c) { | 
| 666 | 0 |     return c==0x2260 || c==0x226E || c==0x226F; | 
| 667 | 0 | } | 
| 668 |  |  | 
| 669 |  | // Replace the label in dest with the label string, if the label was modified. | 
| 670 |  | // If &label==&dest then the label was modified in-place and labelLength | 
| 671 |  | // is the new label length, different from label.length(). | 
| 672 |  | // If &label!=&dest then labelLength==label.length(). | 
| 673 |  | // Returns labelLength (= the new label length). | 
| 674 |  | static int32_t | 
| 675 |  | replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLength, | 
| 676 | 0 |              const UnicodeString &label, int32_t labelLength, UErrorCode &errorCode) { | 
| 677 | 0 |     if(U_FAILURE(errorCode)) { | 
| 678 | 0 |         return 0; | 
| 679 | 0 |     } | 
| 680 | 0 |     if(&label!=&dest) { | 
| 681 | 0 |         dest.replace(destLabelStart, destLabelLength, label); | 
| 682 | 0 |         if(dest.isBogus()) { | 
| 683 | 0 |             errorCode=U_MEMORY_ALLOCATION_ERROR; | 
| 684 | 0 |             return 0; | 
| 685 | 0 |         } | 
| 686 | 0 |     } | 
| 687 | 0 |     return labelLength; | 
| 688 | 0 | } | 
| 689 |  |  | 
| 690 |  | int32_t | 
| 691 |  | UTS46::processLabel(UnicodeString &dest, | 
| 692 |  |                     int32_t labelStart, int32_t labelLength, | 
| 693 |  |                     UBool toASCII, | 
| 694 | 0 |                     IDNAInfo &info, UErrorCode &errorCode) const { | 
| 695 | 0 |     if(U_FAILURE(errorCode)) { | 
| 696 | 0 |         return 0; | 
| 697 | 0 |     } | 
| 698 | 0 |     UnicodeString fromPunycode; | 
| 699 | 0 |     UnicodeString *labelString; | 
| 700 | 0 |     const UChar *label=dest.getBuffer()+labelStart; | 
| 701 | 0 |     int32_t destLabelStart=labelStart; | 
| 702 | 0 |     int32_t destLabelLength=labelLength; | 
| 703 | 0 |     UBool wasPunycode; | 
| 704 | 0 |     if(labelLength>=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && label[3]==0x2d) { | 
| 705 |  |         // Label starts with "xn--", try to un-Punycode it. | 
| 706 | 0 |         wasPunycode=TRUE; | 
| 707 | 0 |         UChar *unicodeBuffer=fromPunycode.getBuffer(-1);  // capacity==-1: most labels should fit | 
| 708 | 0 |         if(unicodeBuffer==NULL) { | 
| 709 |  |             // Should never occur if we used capacity==-1 which uses the internal buffer. | 
| 710 | 0 |             errorCode=U_MEMORY_ALLOCATION_ERROR; | 
| 711 | 0 |             return labelLength; | 
| 712 | 0 |         } | 
| 713 | 0 |         UErrorCode punycodeErrorCode=U_ZERO_ERROR; | 
| 714 | 0 |         int32_t unicodeLength=u_strFromPunycode(label+4, labelLength-4, | 
| 715 | 0 |                                                 unicodeBuffer, fromPunycode.getCapacity(), | 
| 716 | 0 |                                                 NULL, &punycodeErrorCode); | 
| 717 | 0 |         if(punycodeErrorCode==U_BUFFER_OVERFLOW_ERROR) { | 
| 718 | 0 |             fromPunycode.releaseBuffer(0); | 
| 719 | 0 |             unicodeBuffer=fromPunycode.getBuffer(unicodeLength); | 
| 720 | 0 |             if(unicodeBuffer==NULL) { | 
| 721 | 0 |                 errorCode=U_MEMORY_ALLOCATION_ERROR; | 
| 722 | 0 |                 return labelLength; | 
| 723 | 0 |             } | 
| 724 | 0 |             punycodeErrorCode=U_ZERO_ERROR; | 
| 725 | 0 |             unicodeLength=u_strFromPunycode(label+4, labelLength-4, | 
| 726 | 0 |                                             unicodeBuffer, fromPunycode.getCapacity(), | 
| 727 | 0 |                                             NULL, &punycodeErrorCode); | 
| 728 | 0 |         } | 
| 729 | 0 |         fromPunycode.releaseBuffer(unicodeLength); | 
| 730 | 0 |         if(U_FAILURE(punycodeErrorCode)) { | 
| 731 | 0 |             info.labelErrors|=UIDNA_ERROR_PUNYCODE; | 
| 732 | 0 |             return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode); | 
| 733 | 0 |         } | 
| 734 |  |         // Check for NFC, and for characters that are not | 
| 735 |  |         // valid or deviation characters according to the normalizer. | 
| 736 |  |         // If there is something wrong, then the string will change. | 
| 737 |  |         // Note that the normalizer passes through non-LDH ASCII and deviation characters. | 
| 738 |  |         // Deviation characters are ok in Punycode even in transitional processing. | 
| 739 |  |         // In the code further below, if we find non-LDH ASCII and we have UIDNA_USE_STD3_RULES | 
| 740 |  |         // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too. | 
| 741 | 0 |         UBool isValid=uts46Norm2.isNormalized(fromPunycode, errorCode); | 
| 742 | 0 |         if(U_FAILURE(errorCode)) { | 
| 743 | 0 |             return labelLength; | 
| 744 | 0 |         } | 
| 745 | 0 |         if(!isValid) { | 
| 746 | 0 |             info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; | 
| 747 | 0 |             return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode); | 
| 748 | 0 |         } | 
| 749 | 0 |         labelString=&fromPunycode; | 
| 750 | 0 |         label=fromPunycode.getBuffer(); | 
| 751 | 0 |         labelStart=0; | 
| 752 | 0 |         labelLength=fromPunycode.length(); | 
| 753 | 0 |     } else { | 
| 754 | 0 |         wasPunycode=FALSE; | 
| 755 | 0 |         labelString=&dest; | 
| 756 | 0 |     } | 
| 757 |  |     // Validity check | 
| 758 | 0 |     if(labelLength==0) { | 
| 759 | 0 |         info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | 
| 760 | 0 |         return replaceLabel(dest, destLabelStart, destLabelLength, | 
| 761 | 0 |                             *labelString, labelLength, errorCode); | 
| 762 | 0 |     } | 
| 763 |  |     // labelLength>0 | 
| 764 | 0 |     if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { | 
| 765 |  |         // label starts with "??--" | 
| 766 | 0 |         info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; | 
| 767 | 0 |     } | 
| 768 | 0 |     if(label[0]==0x2d) { | 
| 769 |  |         // label starts with "-" | 
| 770 | 0 |         info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; | 
| 771 | 0 |     } | 
| 772 | 0 |     if(label[labelLength-1]==0x2d) { | 
| 773 |  |         // label ends with "-" | 
| 774 | 0 |         info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; | 
| 775 | 0 |     } | 
| 776 |  |     // If the label was not a Punycode label, then it was the result of | 
| 777 |  |     // mapping, normalization and label segmentation. | 
| 778 |  |     // If the label was in Punycode, then we mapped it again above | 
| 779 |  |     // and checked its validity. | 
| 780 |  |     // Now we handle the STD3 restriction to LDH characters (if set) | 
| 781 |  |     // and we look for U+FFFD which indicates disallowed characters | 
| 782 |  |     // in a non-Punycode label or U+FFFD itself in a Punycode label. | 
| 783 |  |     // We also check for dots which can come from the input to a single-label function. | 
| 784 |  |     // Ok to cast away const because we own the UnicodeString. | 
| 785 | 0 |     UChar *s=(UChar *)label; | 
| 786 | 0 |     const UChar *limit=label+labelLength; | 
| 787 | 0 |     UChar oredChars=0; | 
| 788 |  |     // If we enforce STD3 rules, then ASCII characters other than LDH and dot are disallowed. | 
| 789 | 0 |     UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | 
| 790 | 0 |     do { | 
| 791 | 0 |         UChar c=*s; | 
| 792 | 0 |         if(c<=0x7f) { | 
| 793 | 0 |             if(c==0x2e) { | 
| 794 | 0 |                 info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; | 
| 795 | 0 |                 *s=0xfffd; | 
| 796 | 0 |             } else if(disallowNonLDHDot && asciiData[c]<0) { | 
| 797 | 0 |                 info.labelErrors|=UIDNA_ERROR_DISALLOWED; | 
| 798 | 0 |                 *s=0xfffd; | 
| 799 | 0 |             } | 
| 800 | 0 |         } else { | 
| 801 | 0 |             oredChars|=c; | 
| 802 | 0 |             if(disallowNonLDHDot && isNonASCIIDisallowedSTD3Valid(c)) { | 
| 803 | 0 |                 info.labelErrors|=UIDNA_ERROR_DISALLOWED; | 
| 804 | 0 |                 *s=0xfffd; | 
| 805 | 0 |             } else if(c==0xfffd) { | 
| 806 | 0 |                 info.labelErrors|=UIDNA_ERROR_DISALLOWED; | 
| 807 | 0 |             } | 
| 808 | 0 |         } | 
| 809 | 0 |         ++s; | 
| 810 | 0 |     } while(s<limit); | 
| 811 |  |     // Check for a leading combining mark after other validity checks | 
| 812 |  |     // so that we don't report UIDNA_ERROR_DISALLOWED for the U+FFFD from here. | 
| 813 | 0 |     UChar32 c; | 
| 814 | 0 |     int32_t cpLength=0; | 
| 815 |  |     // "Unsafe" is ok because unpaired surrogates were mapped to U+FFFD. | 
| 816 | 0 |     U16_NEXT_UNSAFE(label, cpLength, c); | 
| 817 | 0 |     if((U_GET_GC_MASK(c)&U_GC_M_MASK)!=0) { | 
| 818 | 0 |         info.labelErrors|=UIDNA_ERROR_LEADING_COMBINING_MARK; | 
| 819 | 0 |         labelString->replace(labelStart, cpLength, (UChar)0xfffd); | 
| 820 | 0 |         label=labelString->getBuffer()+labelStart; | 
| 821 | 0 |         labelLength+=1-cpLength; | 
| 822 | 0 |         if(labelString==&dest) { | 
| 823 | 0 |             destLabelLength=labelLength; | 
| 824 | 0 |         } | 
| 825 | 0 |     } | 
| 826 | 0 |     if((info.labelErrors&severeErrors)==0) { | 
| 827 |  |         // Do contextual checks only if we do not have U+FFFD from a severe error | 
| 828 |  |         // because U+FFFD can make these checks fail. | 
| 829 | 0 |         if((options&UIDNA_CHECK_BIDI)!=0 && (!info.isBiDi || info.isOkBiDi)) { | 
| 830 | 0 |             checkLabelBiDi(label, labelLength, info); | 
| 831 | 0 |         } | 
| 832 | 0 |         if( (options&UIDNA_CHECK_CONTEXTJ)!=0 && (oredChars&0x200c)==0x200c && | 
| 833 | 0 |             !isLabelOkContextJ(label, labelLength) | 
| 834 | 0 |         ) { | 
| 835 | 0 |             info.labelErrors|=UIDNA_ERROR_CONTEXTJ; | 
| 836 | 0 |         } | 
| 837 | 0 |         if((options&UIDNA_CHECK_CONTEXTO)!=0 && oredChars>=0xb7) { | 
| 838 | 0 |             checkLabelContextO(label, labelLength, info); | 
| 839 | 0 |         } | 
| 840 | 0 |         if(toASCII) { | 
| 841 | 0 |             if(wasPunycode) { | 
| 842 |  |                 // Leave a Punycode label unchanged if it has no severe errors. | 
| 843 | 0 |                 if(destLabelLength>63) { | 
| 844 | 0 |                     info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 
| 845 | 0 |                 } | 
| 846 | 0 |                 return destLabelLength; | 
| 847 | 0 |             } else if(oredChars>=0x80) { | 
| 848 |  |                 // Contains non-ASCII characters. | 
| 849 | 0 |                 UnicodeString punycode; | 
| 850 | 0 |                 UChar *buffer=punycode.getBuffer(63);  // 63==maximum DNS label length | 
| 851 | 0 |                 if(buffer==NULL) { | 
| 852 | 0 |                     errorCode=U_MEMORY_ALLOCATION_ERROR; | 
| 853 | 0 |                     return destLabelLength; | 
| 854 | 0 |                 } | 
| 855 | 0 |                 buffer[0]=0x78;  // Write "xn--". | 
| 856 | 0 |                 buffer[1]=0x6e; | 
| 857 | 0 |                 buffer[2]=0x2d; | 
| 858 | 0 |                 buffer[3]=0x2d; | 
| 859 | 0 |                 int32_t punycodeLength=u_strToPunycode(label, labelLength, | 
| 860 | 0 |                                                       buffer+4, punycode.getCapacity()-4, | 
| 861 | 0 |                                                       NULL, &errorCode); | 
| 862 | 0 |                 if(errorCode==U_BUFFER_OVERFLOW_ERROR) { | 
| 863 | 0 |                     errorCode=U_ZERO_ERROR; | 
| 864 | 0 |                     punycode.releaseBuffer(4); | 
| 865 | 0 |                     buffer=punycode.getBuffer(4+punycodeLength); | 
| 866 | 0 |                     if(buffer==NULL) { | 
| 867 | 0 |                         errorCode=U_MEMORY_ALLOCATION_ERROR; | 
| 868 | 0 |                         return destLabelLength; | 
| 869 | 0 |                     } | 
| 870 | 0 |                     punycodeLength=u_strToPunycode(label, labelLength, | 
| 871 | 0 |                                                   buffer+4, punycode.getCapacity()-4, | 
| 872 | 0 |                                                   NULL, &errorCode); | 
| 873 | 0 |                 } | 
| 874 | 0 |                 punycodeLength+=4; | 
| 875 | 0 |                 punycode.releaseBuffer(punycodeLength); | 
| 876 | 0 |                 if(U_FAILURE(errorCode)) { | 
| 877 | 0 |                     return destLabelLength; | 
| 878 | 0 |                 } | 
| 879 | 0 |                 if(punycodeLength>63) { | 
| 880 | 0 |                     info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 
| 881 | 0 |                 } | 
| 882 | 0 |                 return replaceLabel(dest, destLabelStart, destLabelLength, | 
| 883 | 0 |                                     punycode, punycodeLength, errorCode); | 
| 884 | 0 |             } else { | 
| 885 |  |                 // all-ASCII label | 
| 886 | 0 |                 if(labelLength>63) { | 
| 887 | 0 |                     info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 
| 888 | 0 |                 } | 
| 889 | 0 |             } | 
| 890 | 0 |         } | 
| 891 | 0 |     } else { | 
| 892 |  |         // If a Punycode label has severe errors, | 
| 893 |  |         // then leave it but make sure it does not look valid. | 
| 894 | 0 |         if(wasPunycode) { | 
| 895 | 0 |             info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; | 
| 896 | 0 |             return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info, errorCode); | 
| 897 | 0 |         } | 
| 898 | 0 |     } | 
| 899 | 0 |     return replaceLabel(dest, destLabelStart, destLabelLength, | 
| 900 | 0 |                         *labelString, labelLength, errorCode); | 
| 901 | 0 | } | 
| 902 |  |  | 
| 903 |  | // Make sure an ACE label does not look valid. | 
| 904 |  | // Append U+FFFD if the label has only LDH characters. | 
| 905 |  | // If UIDNA_USE_STD3_RULES, also replace disallowed ASCII characters with U+FFFD. | 
| 906 |  | int32_t | 
| 907 |  | UTS46::markBadACELabel(UnicodeString &dest, | 
| 908 |  |                        int32_t labelStart, int32_t labelLength, | 
| 909 | 0 |                        UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const { | 
| 910 | 0 |     if(U_FAILURE(errorCode)) { | 
| 911 | 0 |         return 0; | 
| 912 | 0 |     } | 
| 913 | 0 |     UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | 
| 914 | 0 |     UBool isASCII=TRUE; | 
| 915 | 0 |     UBool onlyLDH=TRUE; | 
| 916 | 0 |     const UChar *label=dest.getBuffer()+labelStart; | 
| 917 |  |     // Ok to cast away const because we own the UnicodeString. | 
| 918 | 0 |     UChar *s=(UChar *)label+4;  // After the initial "xn--". | 
| 919 | 0 |     const UChar *limit=label+labelLength; | 
| 920 | 0 |     do { | 
| 921 | 0 |         UChar c=*s; | 
| 922 | 0 |         if(c<=0x7f) { | 
| 923 | 0 |             if(c==0x2e) { | 
| 924 | 0 |                 info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; | 
| 925 | 0 |                 *s=0xfffd; | 
| 926 | 0 |                 isASCII=onlyLDH=FALSE; | 
| 927 | 0 |             } else if(asciiData[c]<0) { | 
| 928 | 0 |                 onlyLDH=FALSE; | 
| 929 | 0 |                 if(disallowNonLDHDot) { | 
| 930 | 0 |                     *s=0xfffd; | 
| 931 | 0 |                     isASCII=FALSE; | 
| 932 | 0 |                 } | 
| 933 | 0 |             } | 
| 934 | 0 |         } else { | 
| 935 | 0 |             isASCII=onlyLDH=FALSE; | 
| 936 | 0 |         } | 
| 937 | 0 |     } while(++s<limit); | 
| 938 | 0 |     if(onlyLDH) { | 
| 939 | 0 |         dest.insert(labelStart+labelLength, (UChar)0xfffd); | 
| 940 | 0 |         if(dest.isBogus()) { | 
| 941 | 0 |             errorCode=U_MEMORY_ALLOCATION_ERROR; | 
| 942 | 0 |             return 0; | 
| 943 | 0 |         } | 
| 944 | 0 |         ++labelLength; | 
| 945 | 0 |     } else { | 
| 946 | 0 |         if(toASCII && isASCII && labelLength>63) { | 
| 947 | 0 |             info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 
| 948 | 0 |         } | 
| 949 | 0 |     } | 
| 950 | 0 |     return labelLength; | 
| 951 | 0 | } | 
| 952 |  |  | 
| 953 |  | const uint32_t L_MASK=U_MASK(U_LEFT_TO_RIGHT); | 
| 954 |  | const uint32_t R_AL_MASK=U_MASK(U_RIGHT_TO_LEFT)|U_MASK(U_RIGHT_TO_LEFT_ARABIC); | 
| 955 |  | const uint32_t L_R_AL_MASK=L_MASK|R_AL_MASK; | 
| 956 |  |  | 
| 957 |  | const uint32_t R_AL_AN_MASK=R_AL_MASK|U_MASK(U_ARABIC_NUMBER); | 
| 958 |  |  | 
| 959 |  | const uint32_t EN_AN_MASK=U_MASK(U_EUROPEAN_NUMBER)|U_MASK(U_ARABIC_NUMBER); | 
| 960 |  | const uint32_t R_AL_EN_AN_MASK=R_AL_MASK|EN_AN_MASK; | 
| 961 |  | const uint32_t L_EN_MASK=L_MASK|U_MASK(U_EUROPEAN_NUMBER); | 
| 962 |  |  | 
| 963 |  | const uint32_t ES_CS_ET_ON_BN_NSM_MASK= | 
| 964 |  |     U_MASK(U_EUROPEAN_NUMBER_SEPARATOR)| | 
| 965 |  |     U_MASK(U_COMMON_NUMBER_SEPARATOR)| | 
| 966 |  |     U_MASK(U_EUROPEAN_NUMBER_TERMINATOR)| | 
| 967 |  |     U_MASK(U_OTHER_NEUTRAL)| | 
| 968 |  |     U_MASK(U_BOUNDARY_NEUTRAL)| | 
| 969 |  |     U_MASK(U_DIR_NON_SPACING_MARK); | 
| 970 |  | const uint32_t L_EN_ES_CS_ET_ON_BN_NSM_MASK=L_EN_MASK|ES_CS_ET_ON_BN_NSM_MASK; | 
| 971 |  | const uint32_t R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK=R_AL_MASK|EN_AN_MASK|ES_CS_ET_ON_BN_NSM_MASK; | 
| 972 |  |  | 
| 973 |  | // We scan the whole label and check both for whether it contains RTL characters | 
| 974 |  | // and whether it passes the BiDi Rule. | 
| 975 |  | // In a BiDi domain name, all labels must pass the BiDi Rule, but we might find | 
| 976 |  | // that a domain name is a BiDi domain name (has an RTL label) only after | 
| 977 |  | // processing several earlier labels. | 
| 978 |  | void | 
| 979 | 0 | UTS46::checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const { | 
| 980 |  |     // IDNA2008 BiDi rule | 
| 981 |  |     // Get the directionality of the first character. | 
| 982 | 0 |     UChar32 c; | 
| 983 | 0 |     int32_t i=0; | 
| 984 | 0 |     U16_NEXT_UNSAFE(label, i, c); | 
| 985 | 0 |     uint32_t firstMask=U_MASK(u_charDirection(c)); | 
| 986 |  |     // 1. The first character must be a character with BIDI property L, R | 
| 987 |  |     // or AL.  If it has the R or AL property, it is an RTL label; if it | 
| 988 |  |     // has the L property, it is an LTR label. | 
| 989 | 0 |     if((firstMask&~L_R_AL_MASK)!=0) { | 
| 990 | 0 |         info.isOkBiDi=FALSE; | 
| 991 | 0 |     } | 
| 992 |  |     // Get the directionality of the last non-NSM character. | 
| 993 | 0 |     uint32_t lastMask; | 
| 994 | 0 |     for(;;) { | 
| 995 | 0 |         if(i>=labelLength) { | 
| 996 | 0 |             lastMask=firstMask; | 
| 997 | 0 |             break; | 
| 998 | 0 |         } | 
| 999 | 0 |         U16_PREV_UNSAFE(label, labelLength, c); | 
| 1000 | 0 |         UCharDirection dir=u_charDirection(c); | 
| 1001 | 0 |         if(dir!=U_DIR_NON_SPACING_MARK) { | 
| 1002 | 0 |             lastMask=U_MASK(dir); | 
| 1003 | 0 |             break; | 
| 1004 | 0 |         } | 
| 1005 | 0 |     } | 
| 1006 |  |     // 3. In an RTL label, the end of the label must be a character with | 
| 1007 |  |     // BIDI property R, AL, EN or AN, followed by zero or more | 
| 1008 |  |     // characters with BIDI property NSM. | 
| 1009 |  |     // 6. In an LTR label, the end of the label must be a character with | 
| 1010 |  |     // BIDI property L or EN, followed by zero or more characters with | 
| 1011 |  |     // BIDI property NSM. | 
| 1012 | 0 |     if( (firstMask&L_MASK)!=0 ? | 
| 1013 | 0 |             (lastMask&~L_EN_MASK)!=0 : | 
| 1014 | 0 |             (lastMask&~R_AL_EN_AN_MASK)!=0 | 
| 1015 | 0 |     ) { | 
| 1016 | 0 |         info.isOkBiDi=FALSE; | 
| 1017 | 0 |     } | 
| 1018 |  |     // Get the directionalities of the intervening characters. | 
| 1019 | 0 |     uint32_t mask=0; | 
| 1020 | 0 |     while(i<labelLength) { | 
| 1021 | 0 |         U16_NEXT_UNSAFE(label, i, c); | 
| 1022 | 0 |         mask|=U_MASK(u_charDirection(c)); | 
| 1023 | 0 |     } | 
| 1024 | 0 |     if(firstMask&L_MASK) { | 
| 1025 |  |         // 5. In an LTR label, only characters with the BIDI properties L, EN, | 
| 1026 |  |         // ES, CS, ET, ON, BN and NSM are allowed. | 
| 1027 | 0 |         if((mask&~L_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) { | 
| 1028 | 0 |             info.isOkBiDi=FALSE; | 
| 1029 | 0 |         } | 
| 1030 | 0 |     } else { | 
| 1031 |  |         // 2. In an RTL label, only characters with the BIDI properties R, AL, | 
| 1032 |  |         // AN, EN, ES, CS, ET, ON, BN and NSM are allowed. | 
| 1033 | 0 |         if((mask&~R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) { | 
| 1034 | 0 |             info.isOkBiDi=FALSE; | 
| 1035 | 0 |         } | 
| 1036 |  |         // 4. In an RTL label, if an EN is present, no AN may be present, and | 
| 1037 |  |         // vice versa. | 
| 1038 | 0 |         if((mask&EN_AN_MASK)==EN_AN_MASK) { | 
| 1039 | 0 |             info.isOkBiDi=FALSE; | 
| 1040 | 0 |         } | 
| 1041 | 0 |     } | 
| 1042 |  |     // An RTL label is a label that contains at least one character of type | 
| 1043 |  |     // R, AL or AN. [...] | 
| 1044 |  |     // A "BIDI domain name" is a domain name that contains at least one RTL | 
| 1045 |  |     // label. [...] | 
| 1046 |  |     // The following rule, consisting of six conditions, applies to labels | 
| 1047 |  |     // in BIDI domain names. | 
| 1048 | 0 |     if(((firstMask|mask|lastMask)&R_AL_AN_MASK)!=0) { | 
| 1049 | 0 |         info.isBiDi=TRUE; | 
| 1050 | 0 |     } | 
| 1051 | 0 | } | 
| 1052 |  |  | 
| 1053 |  | // Special code for the ASCII prefix of a BiDi domain name. | 
| 1054 |  | // The ASCII prefix is all-LTR. | 
| 1055 |  |  | 
| 1056 |  | // IDNA2008 BiDi rule, parts relevant to ASCII labels: | 
| 1057 |  | // 1. The first character must be a character with BIDI property L [...] | 
| 1058 |  | // 5. In an LTR label, only characters with the BIDI properties L, EN, | 
| 1059 |  | // ES, CS, ET, ON, BN and NSM are allowed. | 
| 1060 |  | // 6. In an LTR label, the end of the label must be a character with | 
| 1061 |  | // BIDI property L or EN [...] | 
| 1062 |  |  | 
| 1063 |  | // UTF-16 version, called for mapped ASCII prefix. | 
| 1064 |  | // Cannot contain uppercase A-Z. | 
| 1065 |  | // s[length-1] must be the trailing dot. | 
| 1066 |  | static UBool | 
| 1067 | 0 | isASCIIOkBiDi(const UChar *s, int32_t length) { | 
| 1068 | 0 |     int32_t labelStart=0; | 
| 1069 | 0 |     for(int32_t i=0; i<length; ++i) { | 
| 1070 | 0 |         UChar c=s[i]; | 
| 1071 | 0 |         if(c==0x2e) {  // dot | 
| 1072 | 0 |             if(i>labelStart) { | 
| 1073 | 0 |                 c=s[i-1]; | 
| 1074 | 0 |                 if(!(0x61<=c && c<=0x7a) && !(0x30<=c && c<=0x39)) { | 
| 1075 |  |                     // Last character in the label is not an L or EN. | 
| 1076 | 0 |                     return FALSE; | 
| 1077 | 0 |                 } | 
| 1078 | 0 |             } | 
| 1079 | 0 |             labelStart=i+1; | 
| 1080 | 0 |         } else if(i==labelStart) { | 
| 1081 | 0 |             if(!(0x61<=c && c<=0x7a)) { | 
| 1082 |  |                 // First character in the label is not an L. | 
| 1083 | 0 |                 return FALSE; | 
| 1084 | 0 |             } | 
| 1085 | 0 |         } else { | 
| 1086 | 0 |             if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) { | 
| 1087 |  |                 // Intermediate character in the label is a B, S or WS. | 
| 1088 | 0 |                 return FALSE; | 
| 1089 | 0 |             } | 
| 1090 | 0 |         } | 
| 1091 | 0 |     } | 
| 1092 | 0 |     return TRUE; | 
| 1093 | 0 | } | 
| 1094 |  |  | 
| 1095 |  | // UTF-8 version, called for source ASCII prefix. | 
| 1096 |  | // Can contain uppercase A-Z. | 
| 1097 |  | // s[length-1] must be the trailing dot. | 
| 1098 |  | static UBool | 
| 1099 | 0 | isASCIIOkBiDi(const char *s, int32_t length) { | 
| 1100 | 0 |     int32_t labelStart=0; | 
| 1101 | 0 |     for(int32_t i=0; i<length; ++i) { | 
| 1102 | 0 |         char c=s[i]; | 
| 1103 | 0 |         if(c==0x2e) {  // dot | 
| 1104 | 0 |             if(i>labelStart) { | 
| 1105 | 0 |                 c=s[i-1]; | 
| 1106 | 0 |                 if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a) && !(0x30<=c && c<=0x39)) { | 
| 1107 |  |                     // Last character in the label is not an L or EN. | 
| 1108 | 0 |                     return FALSE; | 
| 1109 | 0 |                 } | 
| 1110 | 0 |             } | 
| 1111 | 0 |             labelStart=i+1; | 
| 1112 | 0 |         } else if(i==labelStart) { | 
| 1113 | 0 |             if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a)) { | 
| 1114 |  |                 // First character in the label is not an L. | 
| 1115 | 0 |                 return FALSE; | 
| 1116 | 0 |             } | 
| 1117 | 0 |         } else { | 
| 1118 | 0 |             if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) { | 
| 1119 |  |                 // Intermediate character in the label is a B, S or WS. | 
| 1120 | 0 |                 return FALSE; | 
| 1121 | 0 |             } | 
| 1122 | 0 |         } | 
| 1123 | 0 |     } | 
| 1124 | 0 |     return TRUE; | 
| 1125 | 0 | } | 
| 1126 |  |  | 
| 1127 |  | UBool | 
| 1128 | 0 | UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const { | 
| 1129 | 0 |     const UBiDiProps *bdp=ubidi_getSingleton(); | 
| 1130 |  |     // [IDNA2008-Tables] | 
| 1131 |  |     // 200C..200D  ; CONTEXTJ    # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER | 
| 1132 | 0 |     for(int32_t i=0; i<labelLength; ++i) { | 
| 1133 | 0 |         if(label[i]==0x200c) { | 
| 1134 |  |             // Appendix A.1. ZERO WIDTH NON-JOINER | 
| 1135 |  |             // Rule Set: | 
| 1136 |  |             //  False; | 
| 1137 |  |             //  If Canonical_Combining_Class(Before(cp)) .eq.  Virama Then True; | 
| 1138 |  |             //  If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C | 
| 1139 |  |             //     (Joining_Type:T)*(Joining_Type:{R,D})) Then True; | 
| 1140 | 0 |             if(i==0) { | 
| 1141 | 0 |                 return FALSE; | 
| 1142 | 0 |             } | 
| 1143 | 0 |             UChar32 c; | 
| 1144 | 0 |             int32_t j=i; | 
| 1145 | 0 |             U16_PREV_UNSAFE(label, j, c); | 
| 1146 | 0 |             if(uts46Norm2.getCombiningClass(c)==9) { | 
| 1147 | 0 |                 continue; | 
| 1148 | 0 |             } | 
| 1149 |  |             // check precontext (Joining_Type:{L,D})(Joining_Type:T)* | 
| 1150 | 0 |             for(;;) { | 
| 1151 | 0 |                 UJoiningType type=ubidi_getJoiningType(bdp, c); | 
| 1152 | 0 |                 if(type==U_JT_TRANSPARENT) { | 
| 1153 | 0 |                     if(j==0) { | 
| 1154 | 0 |                         return FALSE; | 
| 1155 | 0 |                     } | 
| 1156 | 0 |                     U16_PREV_UNSAFE(label, j, c); | 
| 1157 | 0 |                 } else if(type==U_JT_LEFT_JOINING || type==U_JT_DUAL_JOINING) { | 
| 1158 | 0 |                     break;  // precontext fulfilled | 
| 1159 | 0 |                 } else { | 
| 1160 | 0 |                     return FALSE; | 
| 1161 | 0 |                 } | 
| 1162 | 0 |             } | 
| 1163 |  |             // check postcontext (Joining_Type:T)*(Joining_Type:{R,D}) | 
| 1164 | 0 |             for(j=i+1;;) { | 
| 1165 | 0 |                 if(j==labelLength) { | 
| 1166 | 0 |                     return FALSE; | 
| 1167 | 0 |                 } | 
| 1168 | 0 |                 U16_NEXT_UNSAFE(label, j, c); | 
| 1169 | 0 |                 UJoiningType type=ubidi_getJoiningType(bdp, c); | 
| 1170 | 0 |                 if(type==U_JT_TRANSPARENT) { | 
| 1171 |  |                     // just skip this character | 
| 1172 | 0 |                 } else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) { | 
| 1173 | 0 |                     break;  // postcontext fulfilled | 
| 1174 | 0 |                 } else { | 
| 1175 | 0 |                     return FALSE; | 
| 1176 | 0 |                 } | 
| 1177 | 0 |             } | 
| 1178 | 0 |         } else if(label[i]==0x200d) { | 
| 1179 |  |             // Appendix A.2. ZERO WIDTH JOINER (U+200D) | 
| 1180 |  |             // Rule Set: | 
| 1181 |  |             //  False; | 
| 1182 |  |             //  If Canonical_Combining_Class(Before(cp)) .eq.  Virama Then True; | 
| 1183 | 0 |             if(i==0) { | 
| 1184 | 0 |                 return FALSE; | 
| 1185 | 0 |             } | 
| 1186 | 0 |             UChar32 c; | 
| 1187 | 0 |             int32_t j=i; | 
| 1188 | 0 |             U16_PREV_UNSAFE(label, j, c); | 
| 1189 | 0 |             if(uts46Norm2.getCombiningClass(c)!=9) { | 
| 1190 | 0 |                 return FALSE; | 
| 1191 | 0 |             } | 
| 1192 | 0 |         } | 
| 1193 | 0 |     } | 
| 1194 | 0 |     return TRUE; | 
| 1195 | 0 | } | 
| 1196 |  |  | 
| 1197 |  | void | 
| 1198 | 0 | UTS46::checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const { | 
| 1199 | 0 |     int32_t labelEnd=labelLength-1;  // inclusive | 
| 1200 | 0 |     int32_t arabicDigits=0;  // -1 for 066x, +1 for 06Fx | 
| 1201 | 0 |     for(int32_t i=0; i<=labelEnd; ++i) { | 
| 1202 | 0 |         UChar32 c=label[i]; | 
| 1203 | 0 |         if(c<0xb7) { | 
| 1204 |  |             // ASCII fastpath | 
| 1205 | 0 |         } else if(c<=0x6f9) { | 
| 1206 | 0 |             if(c==0xb7) { | 
| 1207 |  |                 // Appendix A.3. MIDDLE DOT (U+00B7) | 
| 1208 |  |                 // Rule Set: | 
| 1209 |  |                 //  False; | 
| 1210 |  |                 //  If Before(cp) .eq.  U+006C And | 
| 1211 |  |                 //     After(cp) .eq.  U+006C Then True; | 
| 1212 | 0 |                 if(!(0<i && label[i-1]==0x6c && | 
| 1213 | 0 |                      i<labelEnd && label[i+1]==0x6c)) { | 
| 1214 | 0 |                     info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; | 
| 1215 | 0 |                 } | 
| 1216 | 0 |             } else if(c==0x375) { | 
| 1217 |  |                 // Appendix A.4. GREEK LOWER NUMERAL SIGN (KERAIA) (U+0375) | 
| 1218 |  |                 // Rule Set: | 
| 1219 |  |                 //  False; | 
| 1220 |  |                 //  If Script(After(cp)) .eq.  Greek Then True; | 
| 1221 | 0 |                 UScriptCode script=USCRIPT_INVALID_CODE; | 
| 1222 | 0 |                 if(i<labelEnd) { | 
| 1223 | 0 |                     UErrorCode errorCode=U_ZERO_ERROR; | 
| 1224 | 0 |                     int32_t j=i+1; | 
| 1225 | 0 |                     U16_NEXT(label, j, labelLength, c); | 
| 1226 | 0 |                     script=uscript_getScript(c, &errorCode); | 
| 1227 | 0 |                 } | 
| 1228 | 0 |                 if(script!=USCRIPT_GREEK) { | 
| 1229 | 0 |                     info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; | 
| 1230 | 0 |                 } | 
| 1231 | 0 |             } else if(c==0x5f3 || c==0x5f4) { | 
| 1232 |  |                 // Appendix A.5. HEBREW PUNCTUATION GERESH (U+05F3) | 
| 1233 |  |                 // Rule Set: | 
| 1234 |  |                 //  False; | 
| 1235 |  |                 //  If Script(Before(cp)) .eq.  Hebrew Then True; | 
| 1236 |  |                 // | 
| 1237 |  |                 // Appendix A.6. HEBREW PUNCTUATION GERSHAYIM (U+05F4) | 
| 1238 |  |                 // Rule Set: | 
| 1239 |  |                 //  False; | 
| 1240 |  |                 //  If Script(Before(cp)) .eq.  Hebrew Then True; | 
| 1241 | 0 |                 UScriptCode script=USCRIPT_INVALID_CODE; | 
| 1242 | 0 |                 if(0<i) { | 
| 1243 | 0 |                     UErrorCode errorCode=U_ZERO_ERROR; | 
| 1244 | 0 |                     int32_t j=i; | 
| 1245 | 0 |                     U16_PREV(label, 0, j, c); | 
| 1246 | 0 |                     script=uscript_getScript(c, &errorCode); | 
| 1247 | 0 |                 } | 
| 1248 | 0 |                 if(script!=USCRIPT_HEBREW) { | 
| 1249 | 0 |                     info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; | 
| 1250 | 0 |                 } | 
| 1251 | 0 |             } else if(0x660<=c /* && c<=0x6f9 */) { | 
| 1252 |  |                 // Appendix A.8. ARABIC-INDIC DIGITS (0660..0669) | 
| 1253 |  |                 // Rule Set: | 
| 1254 |  |                 //  True; | 
| 1255 |  |                 //  For All Characters: | 
| 1256 |  |                 //    If cp .in. 06F0..06F9 Then False; | 
| 1257 |  |                 //  End For; | 
| 1258 |  |                 // | 
| 1259 |  |                 // Appendix A.9. EXTENDED ARABIC-INDIC DIGITS (06F0..06F9) | 
| 1260 |  |                 // Rule Set: | 
| 1261 |  |                 //  True; | 
| 1262 |  |                 //  For All Characters: | 
| 1263 |  |                 //    If cp .in. 0660..0669 Then False; | 
| 1264 |  |                 //  End For; | 
| 1265 | 0 |                 if(c<=0x669) { | 
| 1266 | 0 |                     if(arabicDigits>0) { | 
| 1267 | 0 |                         info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS; | 
| 1268 | 0 |                     } | 
| 1269 | 0 |                     arabicDigits=-1; | 
| 1270 | 0 |                 } else if(0x6f0<=c) { | 
| 1271 | 0 |                     if(arabicDigits<0) { | 
| 1272 | 0 |                         info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS; | 
| 1273 | 0 |                     } | 
| 1274 | 0 |                     arabicDigits=1; | 
| 1275 | 0 |                 } | 
| 1276 | 0 |             } | 
| 1277 | 0 |         } else if(c==0x30fb) { | 
| 1278 |  |             // Appendix A.7. KATAKANA MIDDLE DOT (U+30FB) | 
| 1279 |  |             // Rule Set: | 
| 1280 |  |             //  False; | 
| 1281 |  |             //  For All Characters: | 
| 1282 |  |             //    If Script(cp) .in. {Hiragana, Katakana, Han} Then True; | 
| 1283 |  |             //  End For; | 
| 1284 | 0 |             UErrorCode errorCode=U_ZERO_ERROR; | 
| 1285 | 0 |             for(int j=0;;) { | 
| 1286 | 0 |                 if(j>labelEnd) { | 
| 1287 | 0 |                     info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; | 
| 1288 | 0 |                     break; | 
| 1289 | 0 |                 } | 
| 1290 | 0 |                 U16_NEXT(label, j, labelLength, c); | 
| 1291 | 0 |                 UScriptCode script=uscript_getScript(c, &errorCode); | 
| 1292 | 0 |                 if(script==USCRIPT_HIRAGANA || script==USCRIPT_KATAKANA || script==USCRIPT_HAN) { | 
| 1293 | 0 |                     break; | 
| 1294 | 0 |                 } | 
| 1295 | 0 |             } | 
| 1296 | 0 |         } | 
| 1297 | 0 |     } | 
| 1298 | 0 | } | 
| 1299 |  |  | 
| 1300 |  | U_NAMESPACE_END | 
| 1301 |  |  | 
| 1302 |  | // C API ------------------------------------------------------------------- *** | 
| 1303 |  |  | 
| 1304 |  | U_NAMESPACE_USE | 
| 1305 |  |  | 
| 1306 |  | U_CAPI UIDNA * U_EXPORT2 | 
| 1307 | 0 | uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode) { | 
| 1308 | 0 |     return reinterpret_cast<UIDNA *>(IDNA::createUTS46Instance(options, *pErrorCode)); | 
| 1309 | 0 | } | 
| 1310 |  |  | 
| 1311 |  | U_CAPI void U_EXPORT2 | 
| 1312 | 0 | uidna_close(UIDNA *idna) { | 
| 1313 | 0 |     delete reinterpret_cast<IDNA *>(idna); | 
| 1314 | 0 | } | 
| 1315 |  |  | 
| 1316 |  | static UBool | 
| 1317 |  | checkArgs(const void *label, int32_t length, | 
| 1318 |  |           void *dest, int32_t capacity, | 
| 1319 | 0 |           UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | 
| 1320 | 0 |     if(U_FAILURE(*pErrorCode)) { | 
| 1321 | 0 |         return FALSE; | 
| 1322 | 0 |     } | 
| 1323 |  |     // sizeof(UIDNAInfo)=16 in the first API version. | 
| 1324 | 0 |     if(pInfo==NULL || pInfo->size<16) { | 
| 1325 | 0 |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
| 1326 | 0 |         return FALSE; | 
| 1327 | 0 |     } | 
| 1328 | 0 |     if( (label==NULL ? length!=0 : length<-1) || | 
| 1329 | 0 |         (dest==NULL ? capacity!=0 : capacity<0) || | 
| 1330 | 0 |         (dest==label && label!=NULL) | 
| 1331 | 0 |     ) { | 
| 1332 | 0 |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
| 1333 | 0 |         return FALSE; | 
| 1334 | 0 |     } | 
| 1335 |  |     // Set all *pInfo bytes to 0 except for the size field itself. | 
| 1336 | 0 |     uprv_memset(&pInfo->size+1, 0, pInfo->size-sizeof(pInfo->size)); | 
| 1337 | 0 |     return TRUE; | 
| 1338 | 0 | } | 
| 1339 |  |  | 
| 1340 |  | static void | 
| 1341 | 0 | idnaInfoToStruct(IDNAInfo &info, UIDNAInfo *pInfo) { | 
| 1342 | 0 |     pInfo->isTransitionalDifferent=info.isTransitionalDifferent(); | 
| 1343 | 0 |     pInfo->errors=info.getErrors(); | 
| 1344 | 0 | } | 
| 1345 |  |  | 
| 1346 |  | U_CAPI int32_t U_EXPORT2 | 
| 1347 |  | uidna_labelToASCII(const UIDNA *idna, | 
| 1348 |  |                    const UChar *label, int32_t length, | 
| 1349 |  |                    UChar *dest, int32_t capacity, | 
| 1350 | 0 |                    UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | 
| 1351 | 0 |     if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { | 
| 1352 | 0 |         return 0; | 
| 1353 | 0 |     } | 
| 1354 | 0 |     UnicodeString src((UBool)(length<0), label, length); | 
| 1355 | 0 |     UnicodeString destString(dest, 0, capacity); | 
| 1356 | 0 |     IDNAInfo info; | 
| 1357 | 0 |     reinterpret_cast<const IDNA *>(idna)->labelToASCII(src, destString, info, *pErrorCode); | 
| 1358 | 0 |     idnaInfoToStruct(info, pInfo); | 
| 1359 | 0 |     return destString.extract(dest, capacity, *pErrorCode); | 
| 1360 | 0 | } | 
| 1361 |  |  | 
| 1362 |  | U_CAPI int32_t U_EXPORT2 | 
| 1363 |  | uidna_labelToUnicode(const UIDNA *idna, | 
| 1364 |  |                      const UChar *label, int32_t length, | 
| 1365 |  |                      UChar *dest, int32_t capacity, | 
| 1366 | 0 |                      UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | 
| 1367 | 0 |     if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { | 
| 1368 | 0 |         return 0; | 
| 1369 | 0 |     } | 
| 1370 | 0 |     UnicodeString src((UBool)(length<0), label, length); | 
| 1371 | 0 |     UnicodeString destString(dest, 0, capacity); | 
| 1372 | 0 |     IDNAInfo info; | 
| 1373 | 0 |     reinterpret_cast<const IDNA *>(idna)->labelToUnicode(src, destString, info, *pErrorCode); | 
| 1374 | 0 |     idnaInfoToStruct(info, pInfo); | 
| 1375 | 0 |     return destString.extract(dest, capacity, *pErrorCode); | 
| 1376 | 0 | } | 
| 1377 |  |  | 
| 1378 |  | U_CAPI int32_t U_EXPORT2 | 
| 1379 |  | uidna_nameToASCII(const UIDNA *idna, | 
| 1380 |  |                   const UChar *name, int32_t length, | 
| 1381 |  |                   UChar *dest, int32_t capacity, | 
| 1382 | 0 |                   UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | 
| 1383 | 0 |     if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { | 
| 1384 | 0 |         return 0; | 
| 1385 | 0 |     } | 
| 1386 | 0 |     UnicodeString src((UBool)(length<0), name, length); | 
| 1387 | 0 |     UnicodeString destString(dest, 0, capacity); | 
| 1388 | 0 |     IDNAInfo info; | 
| 1389 | 0 |     reinterpret_cast<const IDNA *>(idna)->nameToASCII(src, destString, info, *pErrorCode); | 
| 1390 | 0 |     idnaInfoToStruct(info, pInfo); | 
| 1391 | 0 |     return destString.extract(dest, capacity, *pErrorCode); | 
| 1392 | 0 | } | 
| 1393 |  |  | 
| 1394 |  | U_CAPI int32_t U_EXPORT2 | 
| 1395 |  | uidna_nameToUnicode(const UIDNA *idna, | 
| 1396 |  |                     const UChar *name, int32_t length, | 
| 1397 |  |                     UChar *dest, int32_t capacity, | 
| 1398 | 0 |                     UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | 
| 1399 | 0 |     if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { | 
| 1400 | 0 |         return 0; | 
| 1401 | 0 |     } | 
| 1402 | 0 |     UnicodeString src((UBool)(length<0), name, length); | 
| 1403 | 0 |     UnicodeString destString(dest, 0, capacity); | 
| 1404 | 0 |     IDNAInfo info; | 
| 1405 | 0 |     reinterpret_cast<const IDNA *>(idna)->nameToUnicode(src, destString, info, *pErrorCode); | 
| 1406 | 0 |     idnaInfoToStruct(info, pInfo); | 
| 1407 | 0 |     return destString.extract(dest, capacity, *pErrorCode); | 
| 1408 | 0 | } | 
| 1409 |  |  | 
| 1410 |  | U_CAPI int32_t U_EXPORT2 | 
| 1411 |  | uidna_labelToASCII_UTF8(const UIDNA *idna, | 
| 1412 |  |                         const char *label, int32_t length, | 
| 1413 |  |                         char *dest, int32_t capacity, | 
| 1414 | 0 |                         UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | 
| 1415 | 0 |     if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { | 
| 1416 | 0 |         return 0; | 
| 1417 | 0 |     } | 
| 1418 | 0 |     StringPiece src(label, length<0 ? uprv_strlen(label) : length); | 
| 1419 | 0 |     CheckedArrayByteSink sink(dest, capacity); | 
| 1420 | 0 |     IDNAInfo info; | 
| 1421 | 0 |     reinterpret_cast<const IDNA *>(idna)->labelToASCII_UTF8(src, sink, info, *pErrorCode); | 
| 1422 | 0 |     idnaInfoToStruct(info, pInfo); | 
| 1423 | 0 |     return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); | 
| 1424 | 0 | } | 
| 1425 |  |  | 
| 1426 |  | U_CAPI int32_t U_EXPORT2 | 
| 1427 |  | uidna_labelToUnicodeUTF8(const UIDNA *idna, | 
| 1428 |  |                          const char *label, int32_t length, | 
| 1429 |  |                          char *dest, int32_t capacity, | 
| 1430 | 0 |                          UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | 
| 1431 | 0 |     if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { | 
| 1432 | 0 |         return 0; | 
| 1433 | 0 |     } | 
| 1434 | 0 |     StringPiece src(label, length<0 ? uprv_strlen(label) : length); | 
| 1435 | 0 |     CheckedArrayByteSink sink(dest, capacity); | 
| 1436 | 0 |     IDNAInfo info; | 
| 1437 | 0 |     reinterpret_cast<const IDNA *>(idna)->labelToUnicodeUTF8(src, sink, info, *pErrorCode); | 
| 1438 | 0 |     idnaInfoToStruct(info, pInfo); | 
| 1439 | 0 |     return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); | 
| 1440 | 0 | } | 
| 1441 |  |  | 
| 1442 |  | U_CAPI int32_t U_EXPORT2 | 
| 1443 |  | uidna_nameToASCII_UTF8(const UIDNA *idna, | 
| 1444 |  |                        const char *name, int32_t length, | 
| 1445 |  |                        char *dest, int32_t capacity, | 
| 1446 | 0 |                        UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | 
| 1447 | 0 |     if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { | 
| 1448 | 0 |         return 0; | 
| 1449 | 0 |     } | 
| 1450 | 0 |     StringPiece src(name, length<0 ? uprv_strlen(name) : length); | 
| 1451 | 0 |     CheckedArrayByteSink sink(dest, capacity); | 
| 1452 | 0 |     IDNAInfo info; | 
| 1453 | 0 |     reinterpret_cast<const IDNA *>(idna)->nameToASCII_UTF8(src, sink, info, *pErrorCode); | 
| 1454 | 0 |     idnaInfoToStruct(info, pInfo); | 
| 1455 | 0 |     return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); | 
| 1456 | 0 | } | 
| 1457 |  |  | 
| 1458 |  | U_CAPI int32_t U_EXPORT2 | 
| 1459 |  | uidna_nameToUnicodeUTF8(const UIDNA *idna, | 
| 1460 |  |                         const char *name, int32_t length, | 
| 1461 |  |                         char *dest, int32_t capacity, | 
| 1462 | 0 |                         UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | 
| 1463 | 0 |     if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { | 
| 1464 | 0 |         return 0; | 
| 1465 | 0 |     } | 
| 1466 | 0 |     StringPiece src(name, length<0 ? uprv_strlen(name) : length); | 
| 1467 | 0 |     CheckedArrayByteSink sink(dest, capacity); | 
| 1468 | 0 |     IDNAInfo info; | 
| 1469 | 0 |     reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pErrorCode); | 
| 1470 | 0 |     idnaInfoToStruct(info, pInfo); | 
| 1471 | 0 |     return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); | 
| 1472 | 0 | } | 
| 1473 |  |  | 
| 1474 |  | #endif  // UCONFIG_NO_IDNA |