/src/icu/source/common/norm2allmodes.h
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | // © 2016 and later: Unicode, Inc. and others. | 
| 2 |  | // License & terms of use: http://www.unicode.org/copyright.html | 
| 3 |  | /* | 
| 4 |  | ******************************************************************************* | 
| 5 |  | * Copyright (C) 2014, International Business Machines | 
| 6 |  | * Corporation and others.  All Rights Reserved. | 
| 7 |  | ******************************************************************************* | 
| 8 |  | * loadednormalizer2impl.h | 
| 9 |  | * | 
| 10 |  | * created on: 2014sep07 | 
| 11 |  | * created by: Markus W. Scherer | 
| 12 |  | */ | 
| 13 |  |  | 
| 14 |  | #ifndef __NORM2ALLMODES_H__ | 
| 15 |  | #define __NORM2ALLMODES_H__ | 
| 16 |  |  | 
| 17 |  | #include "unicode/utypes.h" | 
| 18 |  |  | 
| 19 |  | #if !UCONFIG_NO_NORMALIZATION | 
| 20 |  |  | 
| 21 |  | #include "unicode/normalizer2.h" | 
| 22 |  | #include "unicode/unistr.h" | 
| 23 |  | #include "cpputils.h" | 
| 24 |  | #include "normalizer2impl.h" | 
| 25 |  |  | 
| 26 |  | U_NAMESPACE_BEGIN | 
| 27 |  |  | 
| 28 |  | // Intermediate class: | 
| 29 |  | // Has Normalizer2Impl and does boilerplate argument checking and setup. | 
| 30 |  | class Normalizer2WithImpl : public Normalizer2 { | 
| 31 |  | public: | 
| 32 | 8 |     Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} | 
| 33 |  |     virtual ~Normalizer2WithImpl(); | 
| 34 |  |  | 
| 35 |  |     // normalize | 
| 36 |  |     virtual UnicodeString & | 
| 37 |  |     normalize(const UnicodeString &src, | 
| 38 |  |               UnicodeString &dest, | 
| 39 | 6.57M |               UErrorCode &errorCode) const { | 
| 40 | 6.57M |         if(U_FAILURE(errorCode)) { | 
| 41 | 0 |             dest.setToBogus(); | 
| 42 | 0 |             return dest; | 
| 43 | 0 |         } | 
| 44 | 6.57M |         const UChar *sArray=src.getBuffer(); | 
| 45 | 6.57M |         if(&dest==&src || sArray==NULL) { | 
| 46 | 0 |             errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
| 47 | 0 |             dest.setToBogus(); | 
| 48 | 0 |             return dest; | 
| 49 | 0 |         } | 
| 50 | 6.57M |         dest.remove(); | 
| 51 | 6.57M |         ReorderingBuffer buffer(impl, dest); | 
| 52 | 6.57M |         if(buffer.init(src.length(), errorCode)) { | 
| 53 | 6.57M |             normalize(sArray, sArray+src.length(), buffer, errorCode); | 
| 54 | 6.57M |         } | 
| 55 | 6.57M |         return dest; | 
| 56 | 6.57M |     } | 
| 57 |  |     virtual void | 
| 58 |  |     normalize(const UChar *src, const UChar *limit, | 
| 59 |  |               ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; | 
| 60 |  |  | 
| 61 |  |     // normalize and append | 
| 62 |  |     virtual UnicodeString & | 
| 63 |  |     normalizeSecondAndAppend(UnicodeString &first, | 
| 64 |  |                              const UnicodeString &second, | 
| 65 | 379k |                              UErrorCode &errorCode) const { | 
| 66 | 379k |         return normalizeSecondAndAppend(first, second, TRUE, errorCode); | 
| 67 | 379k |     } | 
| 68 |  |     virtual UnicodeString & | 
| 69 |  |     append(UnicodeString &first, | 
| 70 |  |            const UnicodeString &second, | 
| 71 | 0 |            UErrorCode &errorCode) const { | 
| 72 | 0 |         return normalizeSecondAndAppend(first, second, FALSE, errorCode); | 
| 73 | 0 |     } | 
| 74 |  |     UnicodeString & | 
| 75 |  |     normalizeSecondAndAppend(UnicodeString &first, | 
| 76 |  |                              const UnicodeString &second, | 
| 77 |  |                              UBool doNormalize, | 
| 78 | 379k |                              UErrorCode &errorCode) const { | 
| 79 | 379k |         uprv_checkCanGetBuffer(first, errorCode); | 
| 80 | 379k |         if(U_FAILURE(errorCode)) { | 
| 81 | 0 |             return first; | 
| 82 | 0 |         } | 
| 83 | 379k |         const UChar *secondArray=second.getBuffer(); | 
| 84 | 379k |         if(&first==&second || secondArray==NULL) { | 
| 85 | 0 |             errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
| 86 | 0 |             return first; | 
| 87 | 0 |         } | 
| 88 | 379k |         int32_t firstLength=first.length(); | 
| 89 | 379k |         UnicodeString safeMiddle; | 
| 90 | 379k |         { | 
| 91 | 379k |             ReorderingBuffer buffer(impl, first); | 
| 92 | 379k |             if(buffer.init(firstLength+second.length(), errorCode)) { | 
| 93 | 379k |                 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, | 
| 94 | 379k |                                    safeMiddle, buffer, errorCode); | 
| 95 | 379k |             } | 
| 96 | 379k |         }  // The ReorderingBuffer destructor finalizes the first string. | 
| 97 | 379k |         if(U_FAILURE(errorCode)) { | 
| 98 |  |             // Restore the modified suffix of the first string. | 
| 99 | 0 |             first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); | 
| 100 | 0 |         } | 
| 101 | 379k |         return first; | 
| 102 | 379k |     } | 
| 103 |  |     virtual void | 
| 104 |  |     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, | 
| 105 |  |                        UnicodeString &safeMiddle, | 
| 106 |  |                        ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; | 
| 107 |  |     virtual UBool | 
| 108 | 0 |     getDecomposition(UChar32 c, UnicodeString &decomposition) const { | 
| 109 | 0 |         UChar buffer[4]; | 
| 110 | 0 |         int32_t length; | 
| 111 | 0 |         const UChar *d=impl.getDecomposition(c, buffer, length); | 
| 112 | 0 |         if(d==NULL) { | 
| 113 | 0 |             return FALSE; | 
| 114 | 0 |         } | 
| 115 | 0 |         if(d==buffer) { | 
| 116 | 0 |             decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c) | 
| 117 | 0 |         } else { | 
| 118 | 0 |             decomposition.setTo(FALSE, d, length);  // read-only alias | 
| 119 | 0 |         } | 
| 120 | 0 |         return TRUE; | 
| 121 | 0 |     } | 
| 122 |  |     virtual UBool | 
| 123 | 0 |     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { | 
| 124 | 0 |         UChar buffer[30]; | 
| 125 | 0 |         int32_t length; | 
| 126 | 0 |         const UChar *d=impl.getRawDecomposition(c, buffer, length); | 
| 127 | 0 |         if(d==NULL) { | 
| 128 | 0 |             return FALSE; | 
| 129 | 0 |         } | 
| 130 | 0 |         if(d==buffer) { | 
| 131 | 0 |             decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition) | 
| 132 | 0 |         } else { | 
| 133 | 0 |             decomposition.setTo(FALSE, d, length);  // read-only alias | 
| 134 | 0 |         } | 
| 135 | 0 |         return TRUE; | 
| 136 | 0 |     } | 
| 137 |  |     virtual UChar32 | 
| 138 | 0 |     composePair(UChar32 a, UChar32 b) const { | 
| 139 | 0 |         return impl.composePair(a, b); | 
| 140 | 0 |     } | 
| 141 |  |  | 
| 142 |  |     virtual uint8_t | 
| 143 | 0 |     getCombiningClass(UChar32 c) const { | 
| 144 | 0 |         return impl.getCC(impl.getNorm16(c)); | 
| 145 | 0 |     } | 
| 146 |  |  | 
| 147 |  |     // quick checks | 
| 148 |  |     virtual UBool | 
| 149 | 0 |     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { | 
| 150 | 0 |         if(U_FAILURE(errorCode)) { | 
| 151 | 0 |             return FALSE; | 
| 152 | 0 |         } | 
| 153 | 0 |         const UChar *sArray=s.getBuffer(); | 
| 154 | 0 |         if(sArray==NULL) { | 
| 155 | 0 |             errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
| 156 | 0 |             return FALSE; | 
| 157 | 0 |         } | 
| 158 | 0 |         const UChar *sLimit=sArray+s.length(); | 
| 159 | 0 |         return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); | 
| 160 | 0 |     } | 
| 161 |  |     virtual UNormalizationCheckResult | 
| 162 | 0 |     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { | 
| 163 | 0 |         return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; | 
| 164 | 0 |     } | 
| 165 |  |     virtual int32_t | 
| 166 | 0 |     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { | 
| 167 | 0 |         if(U_FAILURE(errorCode)) { | 
| 168 | 0 |             return 0; | 
| 169 | 0 |         } | 
| 170 | 0 |         const UChar *sArray=s.getBuffer(); | 
| 171 | 0 |         if(sArray==NULL) { | 
| 172 | 0 |             errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
| 173 | 0 |             return 0; | 
| 174 | 0 |         } | 
| 175 | 0 |         return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); | 
| 176 | 0 |     } | 
| 177 |  |     virtual const UChar * | 
| 178 |  |     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; | 
| 179 |  |  | 
| 180 | 0 |     virtual UNormalizationCheckResult getQuickCheck(UChar32) const { | 
| 181 | 0 |         return UNORM_YES; | 
| 182 | 0 |     } | 
| 183 |  |  | 
| 184 |  |     const Normalizer2Impl &impl; | 
| 185 |  | }; | 
| 186 |  |  | 
| 187 |  | class DecomposeNormalizer2 : public Normalizer2WithImpl { | 
| 188 |  | public: | 
| 189 | 2 |     DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} | 
| 190 |  |     virtual ~DecomposeNormalizer2(); | 
| 191 |  |  | 
| 192 |  | private: | 
| 193 |  |     virtual void | 
| 194 |  |     normalize(const UChar *src, const UChar *limit, | 
| 195 | 0 |               ReorderingBuffer &buffer, UErrorCode &errorCode) const { | 
| 196 | 0 |         impl.decompose(src, limit, &buffer, errorCode); | 
| 197 | 0 |     } | 
| 198 |  |     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function. | 
| 199 |  |     virtual void | 
| 200 |  |     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, | 
| 201 |  |                        UnicodeString &safeMiddle, | 
| 202 | 0 |                        ReorderingBuffer &buffer, UErrorCode &errorCode) const { | 
| 203 | 0 |         impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); | 
| 204 | 0 |     } | 
| 205 |  |     virtual const UChar * | 
| 206 | 0 |     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { | 
| 207 | 0 |         return impl.decompose(src, limit, NULL, errorCode); | 
| 208 | 0 |     } | 
| 209 |  |     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function. | 
| 210 | 0 |     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { | 
| 211 | 0 |         return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; | 
| 212 | 0 |     } | 
| 213 | 0 |     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } | 
| 214 | 0 |     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } | 
| 215 | 0 |     virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } | 
| 216 |  | }; | 
| 217 |  |  | 
| 218 |  | class ComposeNormalizer2 : public Normalizer2WithImpl { | 
| 219 |  | public: | 
| 220 |  |     ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : | 
| 221 | 4 |         Normalizer2WithImpl(ni), onlyContiguous(fcc) {} | 
| 222 |  |     virtual ~ComposeNormalizer2(); | 
| 223 |  |  | 
| 224 |  | private: | 
| 225 |  |     virtual void | 
| 226 |  |     normalize(const UChar *src, const UChar *limit, | 
| 227 | 6.57M |               ReorderingBuffer &buffer, UErrorCode &errorCode) const { | 
| 228 | 6.57M |         impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); | 
| 229 | 6.57M |     } | 
| 230 |  |     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function. | 
| 231 |  |     virtual void | 
| 232 |  |     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, | 
| 233 |  |                        UnicodeString &safeMiddle, | 
| 234 | 379k |                        ReorderingBuffer &buffer, UErrorCode &errorCode) const { | 
| 235 | 379k |         impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); | 
| 236 | 379k |     } | 
| 237 |  |  | 
| 238 |  |     virtual UBool | 
| 239 | 4.34k |     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { | 
| 240 | 4.34k |         if(U_FAILURE(errorCode)) { | 
| 241 | 0 |             return FALSE; | 
| 242 | 0 |         } | 
| 243 | 4.34k |         const UChar *sArray=s.getBuffer(); | 
| 244 | 4.34k |         if(sArray==NULL) { | 
| 245 | 0 |             errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
| 246 | 0 |             return FALSE; | 
| 247 | 0 |         } | 
| 248 | 4.34k |         UnicodeString temp; | 
| 249 | 4.34k |         ReorderingBuffer buffer(impl, temp); | 
| 250 | 4.34k |         if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization | 
| 251 | 0 |             return FALSE; | 
| 252 | 0 |         } | 
| 253 | 4.34k |         return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); | 
| 254 | 4.34k |     } | 
| 255 |  |     virtual UNormalizationCheckResult | 
| 256 | 0 |     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { | 
| 257 | 0 |         if(U_FAILURE(errorCode)) { | 
| 258 | 0 |             return UNORM_MAYBE; | 
| 259 | 0 |         } | 
| 260 | 0 |         const UChar *sArray=s.getBuffer(); | 
| 261 | 0 |         if(sArray==NULL) { | 
| 262 | 0 |             errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
| 263 | 0 |             return UNORM_MAYBE; | 
| 264 | 0 |         } | 
| 265 | 0 |         UNormalizationCheckResult qcResult=UNORM_YES; | 
| 266 | 0 |         impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); | 
| 267 | 0 |         return qcResult; | 
| 268 | 0 |     } | 
| 269 |  |     virtual const UChar * | 
| 270 | 0 |     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { | 
| 271 | 0 |         return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); | 
| 272 | 0 |     } | 
| 273 |  |     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function. | 
| 274 | 0 |     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { | 
| 275 | 0 |         return impl.getCompQuickCheck(impl.getNorm16(c)); | 
| 276 | 0 |     } | 
| 277 | 0 |     virtual UBool hasBoundaryBefore(UChar32 c) const { | 
| 278 | 0 |         return impl.hasCompBoundaryBefore(c); | 
| 279 | 0 |     } | 
| 280 | 0 |     virtual UBool hasBoundaryAfter(UChar32 c) const { | 
| 281 | 0 |         return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); | 
| 282 | 0 |     } | 
| 283 | 0 |     virtual UBool isInert(UChar32 c) const { | 
| 284 | 0 |         return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); | 
| 285 | 0 |     } | 
| 286 |  |  | 
| 287 |  |     const UBool onlyContiguous; | 
| 288 |  | }; | 
| 289 |  |  | 
| 290 |  | class FCDNormalizer2 : public Normalizer2WithImpl { | 
| 291 |  | public: | 
| 292 | 2 |     FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} | 
| 293 |  |     virtual ~FCDNormalizer2(); | 
| 294 |  |  | 
| 295 |  | private: | 
| 296 |  |     virtual void | 
| 297 |  |     normalize(const UChar *src, const UChar *limit, | 
| 298 | 0 |               ReorderingBuffer &buffer, UErrorCode &errorCode) const { | 
| 299 | 0 |         impl.makeFCD(src, limit, &buffer, errorCode); | 
| 300 | 0 |     } | 
| 301 |  |     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function. | 
| 302 |  |     virtual void | 
| 303 |  |     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, | 
| 304 |  |                        UnicodeString &safeMiddle, | 
| 305 | 0 |                        ReorderingBuffer &buffer, UErrorCode &errorCode) const { | 
| 306 | 0 |         impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); | 
| 307 | 0 |     } | 
| 308 |  |     virtual const UChar * | 
| 309 | 0 |     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { | 
| 310 | 0 |         return impl.makeFCD(src, limit, NULL, errorCode); | 
| 311 | 0 |     } | 
| 312 |  |     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function. | 
| 313 | 0 |     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } | 
| 314 | 0 |     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } | 
| 315 | 0 |     virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } | 
| 316 |  | }; | 
| 317 |  |  | 
| 318 |  | struct Norm2AllModes : public UMemory { | 
| 319 |  |     Norm2AllModes(Normalizer2Impl *i) | 
| 320 | 2 |             : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {} | 
| 321 |  |     ~Norm2AllModes(); | 
| 322 |  |  | 
| 323 |  |     static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode); | 
| 324 |  |     static Norm2AllModes *createNFCInstance(UErrorCode &errorCode); | 
| 325 |  |     static Norm2AllModes *createInstance(const char *packageName, | 
| 326 |  |                                          const char *name, | 
| 327 |  |                                          UErrorCode &errorCode); | 
| 328 |  |  | 
| 329 |  |     static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode); | 
| 330 |  |     static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode); | 
| 331 |  |     static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode); | 
| 332 |  |  | 
| 333 |  |     Normalizer2Impl *impl; | 
| 334 |  |     ComposeNormalizer2 comp; | 
| 335 |  |     DecomposeNormalizer2 decomp; | 
| 336 |  |     FCDNormalizer2 fcd; | 
| 337 |  |     ComposeNormalizer2 fcc; | 
| 338 |  | }; | 
| 339 |  |  | 
| 340 |  | U_NAMESPACE_END | 
| 341 |  |  | 
| 342 |  | #endif  // !UCONFIG_NO_NORMALIZATION | 
| 343 |  | #endif  // __NORM2ALLMODES_H__ |