/src/icu/source/i18n/collationruleparser.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | *******************************************************************************  | 
5  |  | * Copyright (C) 2013-2015, International Business Machines  | 
6  |  | * Corporation and others.  All Rights Reserved.  | 
7  |  | *******************************************************************************  | 
8  |  | * collationruleparser.cpp  | 
9  |  | *  | 
10  |  | * (replaced the former ucol_tok.cpp)  | 
11  |  | *  | 
12  |  | * created on: 2013apr10  | 
13  |  | * created by: Markus W. Scherer  | 
14  |  | */  | 
15  |  |  | 
16  |  | #include "unicode/utypes.h"  | 
17  |  |  | 
18  |  | #if !UCONFIG_NO_COLLATION  | 
19  |  |  | 
20  |  | #include "unicode/normalizer2.h"  | 
21  |  | #include "unicode/parseerr.h"  | 
22  |  | #include "unicode/uchar.h"  | 
23  |  | #include "unicode/ucol.h"  | 
24  |  | #include "unicode/uloc.h"  | 
25  |  | #include "unicode/unistr.h"  | 
26  |  | #include "unicode/utf16.h"  | 
27  |  | #include "charstr.h"  | 
28  |  | #include "cmemory.h"  | 
29  |  | #include "collation.h"  | 
30  |  | #include "collationdata.h"  | 
31  |  | #include "collationruleparser.h"  | 
32  |  | #include "collationsettings.h"  | 
33  |  | #include "collationtailoring.h"  | 
34  |  | #include "cstring.h"  | 
35  |  | #include "patternprops.h"  | 
36  |  | #include "uassert.h"  | 
37  |  | #include "uvectr32.h"  | 
38  |  |  | 
39  |  | U_NAMESPACE_BEGIN  | 
40  |  |  | 
41  |  | namespace { | 
42  |  |  | 
43  |  | static const UChar BEFORE[] = { 0x5b, 0x62, 0x65, 0x66, 0x6f, 0x72, 0x65, 0 };  // "[before" | 
44  |  | const int32_t BEFORE_LENGTH = 7;  | 
45  |  |  | 
46  |  | }  // namespace  | 
47  |  |  | 
48  | 0  | CollationRuleParser::Sink::~Sink() {} | 
49  |  |  | 
50  |  | void  | 
51  | 0  | CollationRuleParser::Sink::suppressContractions(const UnicodeSet &, const char *&, UErrorCode &) {} | 
52  |  |  | 
53  |  | void  | 
54  | 0  | CollationRuleParser::Sink::optimize(const UnicodeSet &, const char *&, UErrorCode &) {} | 
55  |  |  | 
56  | 0  | CollationRuleParser::Importer::~Importer() {} | 
57  |  |  | 
58  |  | CollationRuleParser::CollationRuleParser(const CollationData *base, UErrorCode &errorCode)  | 
59  | 0  |         : nfd(*Normalizer2::getNFDInstance(errorCode)),  | 
60  | 0  |           nfc(*Normalizer2::getNFCInstance(errorCode)),  | 
61  | 0  |           rules(NULL), baseData(base), settings(NULL),  | 
62  |  |           parseError(NULL), errorReason(NULL),  | 
63  |  |           sink(NULL), importer(NULL),  | 
64  | 0  |           ruleIndex(0) { | 
65  | 0  | }  | 
66  |  |  | 
67  | 0  | CollationRuleParser::~CollationRuleParser() { | 
68  | 0  | }  | 
69  |  |  | 
70  |  | void  | 
71  |  | CollationRuleParser::parse(const UnicodeString &ruleString,  | 
72  |  |                            CollationSettings &outSettings,  | 
73  |  |                            UParseError *outParseError,  | 
74  | 0  |                            UErrorCode &errorCode) { | 
75  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
76  | 0  |     settings = &outSettings;  | 
77  | 0  |     parseError = outParseError;  | 
78  | 0  |     if(parseError != NULL) { | 
79  | 0  |         parseError->line = 0;  | 
80  | 0  |         parseError->offset = -1;  | 
81  | 0  |         parseError->preContext[0] = 0;  | 
82  | 0  |         parseError->postContext[0] = 0;  | 
83  | 0  |     }  | 
84  | 0  |     errorReason = NULL;  | 
85  | 0  |     parse(ruleString, errorCode);  | 
86  | 0  | }  | 
87  |  |  | 
88  |  | void  | 
89  | 0  | CollationRuleParser::parse(const UnicodeString &ruleString, UErrorCode &errorCode) { | 
90  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
91  | 0  |     rules = &ruleString;  | 
92  | 0  |     ruleIndex = 0;  | 
93  |  | 
  | 
94  | 0  |     while(ruleIndex < rules->length()) { | 
95  | 0  |         UChar c = rules->charAt(ruleIndex);  | 
96  | 0  |         if(PatternProps::isWhiteSpace(c)) { | 
97  | 0  |             ++ruleIndex;  | 
98  | 0  |             continue;  | 
99  | 0  |         }  | 
100  | 0  |         switch(c) { | 
101  | 0  |         case 0x26:  // '&'  | 
102  | 0  |             parseRuleChain(errorCode);  | 
103  | 0  |             break;  | 
104  | 0  |         case 0x5b:  // '['  | 
105  | 0  |             parseSetting(errorCode);  | 
106  | 0  |             break;  | 
107  | 0  |         case 0x23:  // '#' starts a comment, until the end of the line  | 
108  | 0  |             ruleIndex = skipComment(ruleIndex + 1);  | 
109  | 0  |             break;  | 
110  | 0  |         case 0x40:  // '@' is equivalent to [backwards 2]  | 
111  | 0  |             settings->setFlag(CollationSettings::BACKWARD_SECONDARY,  | 
112  | 0  |                               UCOL_ON, 0, errorCode);  | 
113  | 0  |             ++ruleIndex;  | 
114  | 0  |             break;  | 
115  | 0  |         case 0x21:  // '!' used to turn on Thai/Lao character reversal  | 
116  |  |             // Accept but ignore. The root collator has contractions  | 
117  |  |             // that are equivalent to the character reversal, where appropriate.  | 
118  | 0  |             ++ruleIndex;  | 
119  | 0  |             break;  | 
120  | 0  |         default:  | 
121  | 0  |             setParseError("expected a reset or setting or comment", errorCode); | 
122  | 0  |             break;  | 
123  | 0  |         }  | 
124  | 0  |         if(U_FAILURE(errorCode)) { return; } | 
125  | 0  |     }  | 
126  | 0  | }  | 
127  |  |  | 
128  |  | void  | 
129  | 0  | CollationRuleParser::parseRuleChain(UErrorCode &errorCode) { | 
130  | 0  |     int32_t resetStrength = parseResetAndPosition(errorCode);  | 
131  | 0  |     UBool isFirstRelation = TRUE;  | 
132  | 0  |     for(;;) { | 
133  | 0  |         int32_t result = parseRelationOperator(errorCode);  | 
134  | 0  |         if(U_FAILURE(errorCode)) { return; } | 
135  | 0  |         if(result < 0) { | 
136  | 0  |             if(ruleIndex < rules->length() && rules->charAt(ruleIndex) == 0x23) { | 
137  |  |                 // '#' starts a comment, until the end of the line  | 
138  | 0  |                 ruleIndex = skipComment(ruleIndex + 1);  | 
139  | 0  |                 continue;  | 
140  | 0  |             }  | 
141  | 0  |             if(isFirstRelation) { | 
142  | 0  |                 setParseError("reset not followed by a relation", errorCode); | 
143  | 0  |             }  | 
144  | 0  |             return;  | 
145  | 0  |         }  | 
146  | 0  |         int32_t strength = result & STRENGTH_MASK;  | 
147  | 0  |         if(resetStrength < UCOL_IDENTICAL) { | 
148  |  |             // reset-before rule chain  | 
149  | 0  |             if(isFirstRelation) { | 
150  | 0  |                 if(strength != resetStrength) { | 
151  | 0  |                     setParseError("reset-before strength differs from its first relation", errorCode); | 
152  | 0  |                     return;  | 
153  | 0  |                 }  | 
154  | 0  |             } else { | 
155  | 0  |                 if(strength < resetStrength) { | 
156  | 0  |                     setParseError("reset-before strength followed by a stronger relation", errorCode); | 
157  | 0  |                     return;  | 
158  | 0  |                 }  | 
159  | 0  |             }  | 
160  | 0  |         }  | 
161  | 0  |         int32_t i = ruleIndex + (result >> OFFSET_SHIFT);  // skip over the relation operator  | 
162  | 0  |         if((result & STARRED_FLAG) == 0) { | 
163  | 0  |             parseRelationStrings(strength, i, errorCode);  | 
164  | 0  |         } else { | 
165  | 0  |             parseStarredCharacters(strength, i, errorCode);  | 
166  | 0  |         }  | 
167  | 0  |         if(U_FAILURE(errorCode)) { return; } | 
168  | 0  |         isFirstRelation = FALSE;  | 
169  | 0  |     }  | 
170  | 0  | }  | 
171  |  |  | 
172  |  | int32_t  | 
173  | 0  | CollationRuleParser::parseResetAndPosition(UErrorCode &errorCode) { | 
174  | 0  |     if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; } | 
175  | 0  |     int32_t i = skipWhiteSpace(ruleIndex + 1);  | 
176  | 0  |     int32_t j;  | 
177  | 0  |     UChar c;  | 
178  | 0  |     int32_t resetStrength;  | 
179  | 0  |     if(rules->compare(i, BEFORE_LENGTH, BEFORE, 0, BEFORE_LENGTH) == 0 &&  | 
180  | 0  |             (j = i + BEFORE_LENGTH) < rules->length() &&  | 
181  | 0  |             PatternProps::isWhiteSpace(rules->charAt(j)) &&  | 
182  | 0  |             ((j = skipWhiteSpace(j + 1)) + 1) < rules->length() &&  | 
183  | 0  |             0x31 <= (c = rules->charAt(j)) && c <= 0x33 &&  | 
184  | 0  |             rules->charAt(j + 1) == 0x5d) { | 
185  |  |         // &[before n] with n=1 or 2 or 3  | 
186  | 0  |         resetStrength = UCOL_PRIMARY + (c - 0x31);  | 
187  | 0  |         i = skipWhiteSpace(j + 2);  | 
188  | 0  |     } else { | 
189  | 0  |         resetStrength = UCOL_IDENTICAL;  | 
190  | 0  |     }  | 
191  | 0  |     if(i >= rules->length()) { | 
192  | 0  |         setParseError("reset without position", errorCode); | 
193  | 0  |         return UCOL_DEFAULT;  | 
194  | 0  |     }  | 
195  | 0  |     UnicodeString str;  | 
196  | 0  |     if(rules->charAt(i) == 0x5b) {  // '[' | 
197  | 0  |         i = parseSpecialPosition(i, str, errorCode);  | 
198  | 0  |     } else { | 
199  | 0  |         i = parseTailoringString(i, str, errorCode);  | 
200  | 0  |     }  | 
201  | 0  |     sink->addReset(resetStrength, str, errorReason, errorCode);  | 
202  | 0  |     if(U_FAILURE(errorCode)) { setErrorContext(); } | 
203  | 0  |     ruleIndex = i;  | 
204  | 0  |     return resetStrength;  | 
205  | 0  | }  | 
206  |  |  | 
207  |  | int32_t  | 
208  | 0  | CollationRuleParser::parseRelationOperator(UErrorCode &errorCode) { | 
209  | 0  |     if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; } | 
210  | 0  |     ruleIndex = skipWhiteSpace(ruleIndex);  | 
211  | 0  |     if(ruleIndex >= rules->length()) { return UCOL_DEFAULT; } | 
212  | 0  |     int32_t strength;  | 
213  | 0  |     int32_t i = ruleIndex;  | 
214  | 0  |     UChar c = rules->charAt(i++);  | 
215  | 0  |     switch(c) { | 
216  | 0  |     case 0x3c:  // '<'  | 
217  | 0  |         if(i < rules->length() && rules->charAt(i) == 0x3c) {  // << | 
218  | 0  |             ++i;  | 
219  | 0  |             if(i < rules->length() && rules->charAt(i) == 0x3c) {  // <<< | 
220  | 0  |                 ++i;  | 
221  | 0  |                 if(i < rules->length() && rules->charAt(i) == 0x3c) {  // <<<< | 
222  | 0  |                     ++i;  | 
223  | 0  |                     strength = UCOL_QUATERNARY;  | 
224  | 0  |                 } else { | 
225  | 0  |                     strength = UCOL_TERTIARY;  | 
226  | 0  |                 }  | 
227  | 0  |             } else { | 
228  | 0  |                 strength = UCOL_SECONDARY;  | 
229  | 0  |             }  | 
230  | 0  |         } else { | 
231  | 0  |             strength = UCOL_PRIMARY;  | 
232  | 0  |         }  | 
233  | 0  |         if(i < rules->length() && rules->charAt(i) == 0x2a) {  // '*' | 
234  | 0  |             ++i;  | 
235  | 0  |             strength |= STARRED_FLAG;  | 
236  | 0  |         }  | 
237  | 0  |         break;  | 
238  | 0  |     case 0x3b:  // ';' same as <<  | 
239  | 0  |         strength = UCOL_SECONDARY;  | 
240  | 0  |         break;  | 
241  | 0  |     case 0x2c:  // ',' same as <<<  | 
242  | 0  |         strength = UCOL_TERTIARY;  | 
243  | 0  |         break;  | 
244  | 0  |     case 0x3d:  // '='  | 
245  | 0  |         strength = UCOL_IDENTICAL;  | 
246  | 0  |         if(i < rules->length() && rules->charAt(i) == 0x2a) {  // '*' | 
247  | 0  |             ++i;  | 
248  | 0  |             strength |= STARRED_FLAG;  | 
249  | 0  |         }  | 
250  | 0  |         break;  | 
251  | 0  |     default:  | 
252  | 0  |         return UCOL_DEFAULT;  | 
253  | 0  |     }  | 
254  | 0  |     return ((i - ruleIndex) << OFFSET_SHIFT) | strength;  | 
255  | 0  | }  | 
256  |  |  | 
257  |  | void  | 
258  | 0  | CollationRuleParser::parseRelationStrings(int32_t strength, int32_t i, UErrorCode &errorCode) { | 
259  |  |     // Parse  | 
260  |  |     //     prefix | str / extension  | 
261  |  |     // where prefix and extension are optional.  | 
262  | 0  |     UnicodeString prefix, str, extension;  | 
263  | 0  |     i = parseTailoringString(i, str, errorCode);  | 
264  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
265  | 0  |     UChar next = (i < rules->length()) ? rules->charAt(i) : 0;  | 
266  | 0  |     if(next == 0x7c) {  // '|' separates the context prefix from the string. | 
267  | 0  |         prefix = str;  | 
268  | 0  |         i = parseTailoringString(i + 1, str, errorCode);  | 
269  | 0  |         if(U_FAILURE(errorCode)) { return; } | 
270  | 0  |         next = (i < rules->length()) ? rules->charAt(i) : 0;  | 
271  | 0  |     }  | 
272  | 0  |     if(next == 0x2f) {  // '/' separates the string from the extension. | 
273  | 0  |         i = parseTailoringString(i + 1, extension, errorCode);  | 
274  | 0  |     }  | 
275  | 0  |     if(!prefix.isEmpty()) { | 
276  | 0  |         UChar32 prefix0 = prefix.char32At(0);  | 
277  | 0  |         UChar32 c = str.char32At(0);  | 
278  | 0  |         if(!nfc.hasBoundaryBefore(prefix0) || !nfc.hasBoundaryBefore(c)) { | 
279  | 0  |             setParseError("in 'prefix|str', prefix and str must each start with an NFC boundary", | 
280  | 0  |                           errorCode);  | 
281  | 0  |             return;  | 
282  | 0  |         }  | 
283  | 0  |     }  | 
284  | 0  |     sink->addRelation(strength, prefix, str, extension, errorReason, errorCode);  | 
285  | 0  |     if(U_FAILURE(errorCode)) { setErrorContext(); } | 
286  | 0  |     ruleIndex = i;  | 
287  | 0  | }  | 
288  |  |  | 
289  |  | void  | 
290  | 0  | CollationRuleParser::parseStarredCharacters(int32_t strength, int32_t i, UErrorCode &errorCode) { | 
291  | 0  |     UnicodeString empty, raw;  | 
292  | 0  |     i = parseString(skipWhiteSpace(i), raw, errorCode);  | 
293  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
294  | 0  |     if(raw.isEmpty()) { | 
295  | 0  |         setParseError("missing starred-relation string", errorCode); | 
296  | 0  |         return;  | 
297  | 0  |     }  | 
298  | 0  |     UChar32 prev = -1;  | 
299  | 0  |     int32_t j = 0;  | 
300  | 0  |     for(;;) { | 
301  | 0  |         while(j < raw.length()) { | 
302  | 0  |             UChar32 c = raw.char32At(j);  | 
303  | 0  |             if(!nfd.isInert(c)) { | 
304  | 0  |                 setParseError("starred-relation string is not all NFD-inert", errorCode); | 
305  | 0  |                 return;  | 
306  | 0  |             }  | 
307  | 0  |             sink->addRelation(strength, empty, UnicodeString(c), empty, errorReason, errorCode);  | 
308  | 0  |             if(U_FAILURE(errorCode)) { | 
309  | 0  |                 setErrorContext();  | 
310  | 0  |                 return;  | 
311  | 0  |             }  | 
312  | 0  |             j += U16_LENGTH(c);  | 
313  | 0  |             prev = c;  | 
314  | 0  |         }  | 
315  | 0  |         if(i >= rules->length() || rules->charAt(i) != 0x2d) {  // '-' | 
316  | 0  |             break;  | 
317  | 0  |         }  | 
318  | 0  |         if(prev < 0) { | 
319  | 0  |             setParseError("range without start in starred-relation string", errorCode); | 
320  | 0  |             return;  | 
321  | 0  |         }  | 
322  | 0  |         i = parseString(i + 1, raw, errorCode);  | 
323  | 0  |         if(U_FAILURE(errorCode)) { return; } | 
324  | 0  |         if(raw.isEmpty()) { | 
325  | 0  |             setParseError("range without end in starred-relation string", errorCode); | 
326  | 0  |             return;  | 
327  | 0  |         }  | 
328  | 0  |         UChar32 c = raw.char32At(0);  | 
329  | 0  |         if(c < prev) { | 
330  | 0  |             setParseError("range start greater than end in starred-relation string", errorCode); | 
331  | 0  |             return;  | 
332  | 0  |         }  | 
333  |  |         // range prev-c  | 
334  | 0  |         UnicodeString s;  | 
335  | 0  |         while(++prev <= c) { | 
336  | 0  |             if(!nfd.isInert(prev)) { | 
337  | 0  |                 setParseError("starred-relation string range is not all NFD-inert", errorCode); | 
338  | 0  |                 return;  | 
339  | 0  |             }  | 
340  | 0  |             if(U_IS_SURROGATE(prev)) { | 
341  | 0  |                 setParseError("starred-relation string range contains a surrogate", errorCode); | 
342  | 0  |                 return;  | 
343  | 0  |             }  | 
344  | 0  |             if(0xfffd <= prev && prev <= 0xffff) { | 
345  | 0  |                 setParseError("starred-relation string range contains U+FFFD, U+FFFE or U+FFFF", errorCode); | 
346  | 0  |                 return;  | 
347  | 0  |             }  | 
348  | 0  |             s.setTo(prev);  | 
349  | 0  |             sink->addRelation(strength, empty, s, empty, errorReason, errorCode);  | 
350  | 0  |             if(U_FAILURE(errorCode)) { | 
351  | 0  |                 setErrorContext();  | 
352  | 0  |                 return;  | 
353  | 0  |             }  | 
354  | 0  |         }  | 
355  | 0  |         prev = -1;  | 
356  | 0  |         j = U16_LENGTH(c);  | 
357  | 0  |     }  | 
358  | 0  |     ruleIndex = skipWhiteSpace(i);  | 
359  | 0  | }  | 
360  |  |  | 
361  |  | int32_t  | 
362  | 0  | CollationRuleParser::parseTailoringString(int32_t i, UnicodeString &raw, UErrorCode &errorCode) { | 
363  | 0  |     i = parseString(skipWhiteSpace(i), raw, errorCode);  | 
364  | 0  |     if(U_SUCCESS(errorCode) && raw.isEmpty()) { | 
365  | 0  |         setParseError("missing relation string", errorCode); | 
366  | 0  |     }  | 
367  | 0  |     return skipWhiteSpace(i);  | 
368  | 0  | }  | 
369  |  |  | 
370  |  | int32_t  | 
371  | 0  | CollationRuleParser::parseString(int32_t i, UnicodeString &raw, UErrorCode &errorCode) { | 
372  | 0  |     if(U_FAILURE(errorCode)) { return i; } | 
373  | 0  |     raw.remove();  | 
374  | 0  |     while(i < rules->length()) { | 
375  | 0  |         UChar32 c = rules->charAt(i++);  | 
376  | 0  |         if(isSyntaxChar(c)) { | 
377  | 0  |             if(c == 0x27) {  // apostrophe | 
378  | 0  |                 if(i < rules->length() && rules->charAt(i) == 0x27) { | 
379  |  |                     // Double apostrophe, encodes a single one.  | 
380  | 0  |                     raw.append((UChar)0x27);  | 
381  | 0  |                     ++i;  | 
382  | 0  |                     continue;  | 
383  | 0  |                 }  | 
384  |  |                 // Quote literal text until the next single apostrophe.  | 
385  | 0  |                 for(;;) { | 
386  | 0  |                     if(i == rules->length()) { | 
387  | 0  |                         setParseError("quoted literal text missing terminating apostrophe", errorCode); | 
388  | 0  |                         return i;  | 
389  | 0  |                     }  | 
390  | 0  |                     c = rules->charAt(i++);  | 
391  | 0  |                     if(c == 0x27) { | 
392  | 0  |                         if(i < rules->length() && rules->charAt(i) == 0x27) { | 
393  |  |                             // Double apostrophe inside quoted literal text,  | 
394  |  |                             // still encodes a single apostrophe.  | 
395  | 0  |                             ++i;  | 
396  | 0  |                         } else { | 
397  | 0  |                             break;  | 
398  | 0  |                         }  | 
399  | 0  |                     }  | 
400  | 0  |                     raw.append((UChar)c);  | 
401  | 0  |                 }  | 
402  | 0  |             } else if(c == 0x5c) {  // backslash | 
403  | 0  |                 if(i == rules->length()) { | 
404  | 0  |                     setParseError("backslash escape at the end of the rule string", errorCode); | 
405  | 0  |                     return i;  | 
406  | 0  |                 }  | 
407  | 0  |                 c = rules->char32At(i);  | 
408  | 0  |                 raw.append(c);  | 
409  | 0  |                 i += U16_LENGTH(c);  | 
410  | 0  |             } else { | 
411  |  |                 // Any other syntax character terminates a string.  | 
412  | 0  |                 --i;  | 
413  | 0  |                 break;  | 
414  | 0  |             }  | 
415  | 0  |         } else if(PatternProps::isWhiteSpace(c)) { | 
416  |  |             // Unquoted white space terminates a string.  | 
417  | 0  |             --i;  | 
418  | 0  |             break;  | 
419  | 0  |         } else { | 
420  | 0  |             raw.append((UChar)c);  | 
421  | 0  |         }  | 
422  | 0  |     }  | 
423  | 0  |     for(int32_t j = 0; j < raw.length();) { | 
424  | 0  |         UChar32 c = raw.char32At(j);  | 
425  | 0  |         if(U_IS_SURROGATE(c)) { | 
426  | 0  |             setParseError("string contains an unpaired surrogate", errorCode); | 
427  | 0  |             return i;  | 
428  | 0  |         }  | 
429  | 0  |         if(0xfffd <= c && c <= 0xffff) { | 
430  | 0  |             setParseError("string contains U+FFFD, U+FFFE or U+FFFF", errorCode); | 
431  | 0  |             return i;  | 
432  | 0  |         }  | 
433  | 0  |         j += U16_LENGTH(c);  | 
434  | 0  |     }  | 
435  | 0  |     return i;  | 
436  | 0  | }  | 
437  |  |  | 
438  |  | namespace { | 
439  |  |  | 
440  |  | static const char *const positions[] = { | 
441  |  |     "first tertiary ignorable",  | 
442  |  |     "last tertiary ignorable",  | 
443  |  |     "first secondary ignorable",  | 
444  |  |     "last secondary ignorable",  | 
445  |  |     "first primary ignorable",  | 
446  |  |     "last primary ignorable",  | 
447  |  |     "first variable",  | 
448  |  |     "last variable",  | 
449  |  |     "first regular",  | 
450  |  |     "last regular",  | 
451  |  |     "first implicit",  | 
452  |  |     "last implicit",  | 
453  |  |     "first trailing",  | 
454  |  |     "last trailing"  | 
455  |  | };  | 
456  |  |  | 
457  |  | }  // namespace  | 
458  |  |  | 
459  |  | int32_t  | 
460  | 0  | CollationRuleParser::parseSpecialPosition(int32_t i, UnicodeString &str, UErrorCode &errorCode) { | 
461  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
462  | 0  |     UnicodeString raw;  | 
463  | 0  |     int32_t j = readWords(i + 1, raw);  | 
464  | 0  |     if(j > i && rules->charAt(j) == 0x5d && !raw.isEmpty()) {  // words end with ] | 
465  | 0  |         ++j;  | 
466  | 0  |         for(int32_t pos = 0; pos < UPRV_LENGTHOF(positions); ++pos) { | 
467  | 0  |             if(raw == UnicodeString(positions[pos], -1, US_INV)) { | 
468  | 0  |                 str.setTo((UChar)POS_LEAD).append((UChar)(POS_BASE + pos));  | 
469  | 0  |                 return j;  | 
470  | 0  |             }  | 
471  | 0  |         }  | 
472  | 0  |         if(raw == UNICODE_STRING_SIMPLE("top")) { | 
473  | 0  |             str.setTo((UChar)POS_LEAD).append((UChar)(POS_BASE + LAST_REGULAR));  | 
474  | 0  |             return j;  | 
475  | 0  |         }  | 
476  | 0  |         if(raw == UNICODE_STRING_SIMPLE("variable top")) { | 
477  | 0  |             str.setTo((UChar)POS_LEAD).append((UChar)(POS_BASE + LAST_VARIABLE));  | 
478  | 0  |             return j;  | 
479  | 0  |         }  | 
480  | 0  |     }  | 
481  | 0  |     setParseError("not a valid special reset position", errorCode); | 
482  | 0  |     return i;  | 
483  | 0  | }  | 
484  |  |  | 
485  |  | void  | 
486  | 0  | CollationRuleParser::parseSetting(UErrorCode &errorCode) { | 
487  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
488  | 0  |     UnicodeString raw;  | 
489  | 0  |     int32_t i = ruleIndex + 1;  | 
490  | 0  |     int32_t j = readWords(i, raw);  | 
491  | 0  |     if(j <= i || raw.isEmpty()) { | 
492  | 0  |         setParseError("expected a setting/option at '['", errorCode); | 
493  | 0  |     }  | 
494  | 0  |     if(rules->charAt(j) == 0x5d) {  // words end with ] | 
495  | 0  |         ++j;  | 
496  | 0  |         if(raw.startsWith(UNICODE_STRING_SIMPLE("reorder")) && | 
497  | 0  |                 (raw.length() == 7 || raw.charAt(7) == 0x20)) { | 
498  | 0  |             parseReordering(raw, errorCode);  | 
499  | 0  |             ruleIndex = j;  | 
500  | 0  |             return;  | 
501  | 0  |         }  | 
502  | 0  |         if(raw == UNICODE_STRING_SIMPLE("backwards 2")) { | 
503  | 0  |             settings->setFlag(CollationSettings::BACKWARD_SECONDARY,  | 
504  | 0  |                               UCOL_ON, 0, errorCode);  | 
505  | 0  |             ruleIndex = j;  | 
506  | 0  |             return;  | 
507  | 0  |         }  | 
508  | 0  |         UnicodeString v;  | 
509  | 0  |         int32_t valueIndex = raw.lastIndexOf((UChar)0x20);  | 
510  | 0  |         if(valueIndex >= 0) { | 
511  | 0  |             v.setTo(raw, valueIndex + 1);  | 
512  | 0  |             raw.truncate(valueIndex);  | 
513  | 0  |         }  | 
514  | 0  |         if(raw == UNICODE_STRING_SIMPLE("strength") && v.length() == 1) { | 
515  | 0  |             int32_t value = UCOL_DEFAULT;  | 
516  | 0  |             UChar c = v.charAt(0);  | 
517  | 0  |             if(0x31 <= c && c <= 0x34) {  // 1..4 | 
518  | 0  |                 value = UCOL_PRIMARY + (c - 0x31);  | 
519  | 0  |             } else if(c == 0x49) {  // 'I' | 
520  | 0  |                 value = UCOL_IDENTICAL;  | 
521  | 0  |             }  | 
522  | 0  |             if(value != UCOL_DEFAULT) { | 
523  | 0  |                 settings->setStrength(value, 0, errorCode);  | 
524  | 0  |                 ruleIndex = j;  | 
525  | 0  |                 return;  | 
526  | 0  |             }  | 
527  | 0  |         } else if(raw == UNICODE_STRING_SIMPLE("alternate")) { | 
528  | 0  |             UColAttributeValue value = UCOL_DEFAULT;  | 
529  | 0  |             if(v == UNICODE_STRING_SIMPLE("non-ignorable")) { | 
530  | 0  |                 value = UCOL_NON_IGNORABLE;  | 
531  | 0  |             } else if(v == UNICODE_STRING_SIMPLE("shifted")) { | 
532  | 0  |                 value = UCOL_SHIFTED;  | 
533  | 0  |             }  | 
534  | 0  |             if(value != UCOL_DEFAULT) { | 
535  | 0  |                 settings->setAlternateHandling(value, 0, errorCode);  | 
536  | 0  |                 ruleIndex = j;  | 
537  | 0  |                 return;  | 
538  | 0  |             }  | 
539  | 0  |         } else if(raw == UNICODE_STRING_SIMPLE("maxVariable")) { | 
540  | 0  |             int32_t value = UCOL_DEFAULT;  | 
541  | 0  |             if(v == UNICODE_STRING_SIMPLE("space")) { | 
542  | 0  |                 value = CollationSettings::MAX_VAR_SPACE;  | 
543  | 0  |             } else if(v == UNICODE_STRING_SIMPLE("punct")) { | 
544  | 0  |                 value = CollationSettings::MAX_VAR_PUNCT;  | 
545  | 0  |             } else if(v == UNICODE_STRING_SIMPLE("symbol")) { | 
546  | 0  |                 value = CollationSettings::MAX_VAR_SYMBOL;  | 
547  | 0  |             } else if(v == UNICODE_STRING_SIMPLE("currency")) { | 
548  | 0  |                 value = CollationSettings::MAX_VAR_CURRENCY;  | 
549  | 0  |             }  | 
550  | 0  |             if(value != UCOL_DEFAULT) { | 
551  | 0  |                 settings->setMaxVariable(value, 0, errorCode);  | 
552  | 0  |                 settings->variableTop = baseData->getLastPrimaryForGroup(  | 
553  | 0  |                     UCOL_REORDER_CODE_FIRST + value);  | 
554  | 0  |                 U_ASSERT(settings->variableTop != 0);  | 
555  | 0  |                 ruleIndex = j;  | 
556  | 0  |                 return;  | 
557  | 0  |             }  | 
558  | 0  |         } else if(raw == UNICODE_STRING_SIMPLE("caseFirst")) { | 
559  | 0  |             UColAttributeValue value = UCOL_DEFAULT;  | 
560  | 0  |             if(v == UNICODE_STRING_SIMPLE("off")) { | 
561  | 0  |                 value = UCOL_OFF;  | 
562  | 0  |             } else if(v == UNICODE_STRING_SIMPLE("lower")) { | 
563  | 0  |                 value = UCOL_LOWER_FIRST;  | 
564  | 0  |             } else if(v == UNICODE_STRING_SIMPLE("upper")) { | 
565  | 0  |                 value = UCOL_UPPER_FIRST;  | 
566  | 0  |             }  | 
567  | 0  |             if(value != UCOL_DEFAULT) { | 
568  | 0  |                 settings->setCaseFirst(value, 0, errorCode);  | 
569  | 0  |                 ruleIndex = j;  | 
570  | 0  |                 return;  | 
571  | 0  |             }  | 
572  | 0  |         } else if(raw == UNICODE_STRING_SIMPLE("caseLevel")) { | 
573  | 0  |             UColAttributeValue value = getOnOffValue(v);  | 
574  | 0  |             if(value != UCOL_DEFAULT) { | 
575  | 0  |                 settings->setFlag(CollationSettings::CASE_LEVEL, value, 0, errorCode);  | 
576  | 0  |                 ruleIndex = j;  | 
577  | 0  |                 return;  | 
578  | 0  |             }  | 
579  | 0  |         } else if(raw == UNICODE_STRING_SIMPLE("normalization")) { | 
580  | 0  |             UColAttributeValue value = getOnOffValue(v);  | 
581  | 0  |             if(value != UCOL_DEFAULT) { | 
582  | 0  |                 settings->setFlag(CollationSettings::CHECK_FCD, value, 0, errorCode);  | 
583  | 0  |                 ruleIndex = j;  | 
584  | 0  |                 return;  | 
585  | 0  |             }  | 
586  | 0  |         } else if(raw == UNICODE_STRING_SIMPLE("numericOrdering")) { | 
587  | 0  |             UColAttributeValue value = getOnOffValue(v);  | 
588  | 0  |             if(value != UCOL_DEFAULT) { | 
589  | 0  |                 settings->setFlag(CollationSettings::NUMERIC, value, 0, errorCode);  | 
590  | 0  |                 ruleIndex = j;  | 
591  | 0  |                 return;  | 
592  | 0  |             }  | 
593  | 0  |         } else if(raw == UNICODE_STRING_SIMPLE("hiraganaQ")) { | 
594  | 0  |             UColAttributeValue value = getOnOffValue(v);  | 
595  | 0  |             if(value != UCOL_DEFAULT) { | 
596  | 0  |                 if(value == UCOL_ON) { | 
597  | 0  |                     setParseError("[hiraganaQ on] is not supported", errorCode); | 
598  | 0  |                 }  | 
599  | 0  |                 ruleIndex = j;  | 
600  | 0  |                 return;  | 
601  | 0  |             }  | 
602  | 0  |         } else if(raw == UNICODE_STRING_SIMPLE("import")) { | 
603  | 0  |             CharString lang;  | 
604  | 0  |             lang.appendInvariantChars(v, errorCode);  | 
605  | 0  |             if(errorCode == U_MEMORY_ALLOCATION_ERROR) { return; } | 
606  |  |             // BCP 47 language tag -> ICU locale ID  | 
607  | 0  |             char localeID[ULOC_FULLNAME_CAPACITY];  | 
608  | 0  |             int32_t parsedLength;  | 
609  | 0  |             int32_t length = uloc_forLanguageTag(lang.data(), localeID, ULOC_FULLNAME_CAPACITY,  | 
610  | 0  |                                                  &parsedLength, &errorCode);  | 
611  | 0  |             if(U_FAILURE(errorCode) ||  | 
612  | 0  |                     parsedLength != lang.length() || length >= ULOC_FULLNAME_CAPACITY) { | 
613  | 0  |                 errorCode = U_ZERO_ERROR;  | 
614  | 0  |                 setParseError("expected language tag in [import langTag]", errorCode); | 
615  | 0  |                 return;  | 
616  | 0  |             }  | 
617  |  |             // localeID minus all keywords  | 
618  | 0  |             char baseID[ULOC_FULLNAME_CAPACITY];  | 
619  | 0  |             length = uloc_getBaseName(localeID, baseID, ULOC_FULLNAME_CAPACITY, &errorCode);  | 
620  | 0  |             if(U_FAILURE(errorCode) || length >= ULOC_KEYWORDS_CAPACITY) { | 
621  | 0  |                 errorCode = U_ZERO_ERROR;  | 
622  | 0  |                 setParseError("expected language tag in [import langTag]", errorCode); | 
623  | 0  |                 return;  | 
624  | 0  |             }  | 
625  | 0  |             if(length == 0) { | 
626  | 0  |                 uprv_strcpy(baseID, "root");  | 
627  | 0  |             } else if(*baseID == '_') { | 
628  | 0  |                 uprv_memmove(baseID + 3, baseID, length + 1);  | 
629  | 0  |                 uprv_memcpy(baseID, "und", 3);  | 
630  | 0  |             }  | 
631  |  |             // @collation=type, or length=0 if not specified  | 
632  | 0  |             char collationType[ULOC_KEYWORDS_CAPACITY];  | 
633  | 0  |             length = uloc_getKeywordValue(localeID, "collation",  | 
634  | 0  |                                           collationType, ULOC_KEYWORDS_CAPACITY,  | 
635  | 0  |                                           &errorCode);  | 
636  | 0  |             if(U_FAILURE(errorCode) || length >= ULOC_KEYWORDS_CAPACITY) { | 
637  | 0  |                 errorCode = U_ZERO_ERROR;  | 
638  | 0  |                 setParseError("expected language tag in [import langTag]", errorCode); | 
639  | 0  |                 return;  | 
640  | 0  |             }  | 
641  | 0  |             if(importer == NULL) { | 
642  | 0  |                 setParseError("[import langTag] is not supported", errorCode); | 
643  | 0  |             } else { | 
644  | 0  |                 UnicodeString importedRules;  | 
645  | 0  |                 importer->getRules(baseID, length > 0 ? collationType : "standard",  | 
646  | 0  |                                    importedRules, errorReason, errorCode);  | 
647  | 0  |                 if(U_FAILURE(errorCode)) { | 
648  | 0  |                     if(errorReason == NULL) { | 
649  | 0  |                         errorReason = "[import langTag] failed";  | 
650  | 0  |                     }  | 
651  | 0  |                     setErrorContext();  | 
652  | 0  |                     return;  | 
653  | 0  |                 }  | 
654  | 0  |                 const UnicodeString *outerRules = rules;  | 
655  | 0  |                 int32_t outerRuleIndex = ruleIndex;  | 
656  | 0  |                 parse(importedRules, errorCode);  | 
657  | 0  |                 if(U_FAILURE(errorCode)) { | 
658  | 0  |                     if(parseError != NULL) { | 
659  | 0  |                         parseError->offset = outerRuleIndex;  | 
660  | 0  |                     }  | 
661  | 0  |                 }  | 
662  | 0  |                 rules = outerRules;  | 
663  | 0  |                 ruleIndex = j;  | 
664  | 0  |             }  | 
665  | 0  |             return;  | 
666  | 0  |         }  | 
667  | 0  |     } else if(rules->charAt(j) == 0x5b) {  // words end with [ | 
668  | 0  |         UnicodeSet set;  | 
669  | 0  |         j = parseUnicodeSet(j, set, errorCode);  | 
670  | 0  |         if(U_FAILURE(errorCode)) { return; } | 
671  | 0  |         if(raw == UNICODE_STRING_SIMPLE("optimize")) { | 
672  | 0  |             sink->optimize(set, errorReason, errorCode);  | 
673  | 0  |             if(U_FAILURE(errorCode)) { setErrorContext(); } | 
674  | 0  |             ruleIndex = j;  | 
675  | 0  |             return;  | 
676  | 0  |         } else if(raw == UNICODE_STRING_SIMPLE("suppressContractions")) { | 
677  | 0  |             sink->suppressContractions(set, errorReason, errorCode);  | 
678  | 0  |             if(U_FAILURE(errorCode)) { setErrorContext(); } | 
679  | 0  |             ruleIndex = j;  | 
680  | 0  |             return;  | 
681  | 0  |         }  | 
682  | 0  |     }  | 
683  | 0  |     setParseError("not a valid setting/option", errorCode); | 
684  | 0  | }  | 
685  |  |  | 
686  |  | void  | 
687  | 0  | CollationRuleParser::parseReordering(const UnicodeString &raw, UErrorCode &errorCode) { | 
688  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
689  | 0  |     int32_t i = 7;  // after "reorder"  | 
690  | 0  |     if(i == raw.length()) { | 
691  |  |         // empty [reorder] with no codes  | 
692  | 0  |         settings->resetReordering();  | 
693  | 0  |         return;  | 
694  | 0  |     }  | 
695  |  |     // Parse the codes in [reorder aa bb cc].  | 
696  | 0  |     UVector32 reorderCodes(errorCode);  | 
697  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
698  | 0  |     CharString word;  | 
699  | 0  |     while(i < raw.length()) { | 
700  | 0  |         ++i;  // skip the word-separating space  | 
701  | 0  |         int32_t limit = raw.indexOf((UChar)0x20, i);  | 
702  | 0  |         if(limit < 0) { limit = raw.length(); } | 
703  | 0  |         word.clear().appendInvariantChars(raw.tempSubStringBetween(i, limit), errorCode);  | 
704  | 0  |         if(U_FAILURE(errorCode)) { return; } | 
705  | 0  |         int32_t code = getReorderCode(word.data());  | 
706  | 0  |         if(code < 0) { | 
707  | 0  |             setParseError("unknown script or reorder code", errorCode); | 
708  | 0  |             return;  | 
709  | 0  |         }  | 
710  | 0  |         reorderCodes.addElement(code, errorCode);  | 
711  | 0  |         if(U_FAILURE(errorCode)) { return; } | 
712  | 0  |         i = limit;  | 
713  | 0  |     }  | 
714  | 0  |     settings->setReordering(*baseData, reorderCodes.getBuffer(), reorderCodes.size(), errorCode);  | 
715  | 0  | }  | 
716  |  |  | 
717  |  | static const char *const gSpecialReorderCodes[] = { | 
718  |  |     "space", "punct", "symbol", "currency", "digit"  | 
719  |  | };  | 
720  |  |  | 
721  |  | int32_t  | 
722  | 0  | CollationRuleParser::getReorderCode(const char *word) { | 
723  | 0  |     for(int32_t i = 0; i < UPRV_LENGTHOF(gSpecialReorderCodes); ++i) { | 
724  | 0  |         if(uprv_stricmp(word, gSpecialReorderCodes[i]) == 0) { | 
725  | 0  |             return UCOL_REORDER_CODE_FIRST + i;  | 
726  | 0  |         }  | 
727  | 0  |     }  | 
728  | 0  |     int32_t script = u_getPropertyValueEnum(UCHAR_SCRIPT, word);  | 
729  | 0  |     if(script >= 0) { | 
730  | 0  |         return script;  | 
731  | 0  |     }  | 
732  | 0  |     if(uprv_stricmp(word, "others") == 0) { | 
733  | 0  |         return UCOL_REORDER_CODE_OTHERS;  // same as Zzzz = USCRIPT_UNKNOWN  | 
734  | 0  |     }  | 
735  | 0  |     return -1;  | 
736  | 0  | }  | 
737  |  |  | 
738  |  | UColAttributeValue  | 
739  | 0  | CollationRuleParser::getOnOffValue(const UnicodeString &s) { | 
740  | 0  |     if(s == UNICODE_STRING_SIMPLE("on")) { | 
741  | 0  |         return UCOL_ON;  | 
742  | 0  |     } else if(s == UNICODE_STRING_SIMPLE("off")) { | 
743  | 0  |         return UCOL_OFF;  | 
744  | 0  |     } else { | 
745  | 0  |         return UCOL_DEFAULT;  | 
746  | 0  |     }  | 
747  | 0  | }  | 
748  |  |  | 
749  |  | int32_t  | 
750  | 0  | CollationRuleParser::parseUnicodeSet(int32_t i, UnicodeSet &set, UErrorCode &errorCode) { | 
751  |  |     // Collect a UnicodeSet pattern between a balanced pair of [brackets].  | 
752  | 0  |     int32_t level = 0;  | 
753  | 0  |     int32_t j = i;  | 
754  | 0  |     for(;;) { | 
755  | 0  |         if(j == rules->length()) { | 
756  | 0  |             setParseError("unbalanced UnicodeSet pattern brackets", errorCode); | 
757  | 0  |             return j;  | 
758  | 0  |         }  | 
759  | 0  |         UChar c = rules->charAt(j++);  | 
760  | 0  |         if(c == 0x5b) {  // '[' | 
761  | 0  |             ++level;  | 
762  | 0  |         } else if(c == 0x5d) {  // ']' | 
763  | 0  |             if(--level == 0) { break; } | 
764  | 0  |         }  | 
765  | 0  |     }  | 
766  | 0  |     set.applyPattern(rules->tempSubStringBetween(i, j), errorCode);  | 
767  | 0  |     if(U_FAILURE(errorCode)) { | 
768  | 0  |         errorCode = U_ZERO_ERROR;  | 
769  | 0  |         setParseError("not a valid UnicodeSet pattern", errorCode); | 
770  | 0  |         return j;  | 
771  | 0  |     }  | 
772  | 0  |     j = skipWhiteSpace(j);  | 
773  | 0  |     if(j == rules->length() || rules->charAt(j) != 0x5d) { | 
774  | 0  |         setParseError("missing option-terminating ']' after UnicodeSet pattern", errorCode); | 
775  | 0  |         return j;  | 
776  | 0  |     }  | 
777  | 0  |     return ++j;  | 
778  | 0  | }  | 
779  |  |  | 
780  |  | int32_t  | 
781  | 0  | CollationRuleParser::readWords(int32_t i, UnicodeString &raw) const { | 
782  | 0  |     static const UChar sp = 0x20;  | 
783  | 0  |     raw.remove();  | 
784  | 0  |     i = skipWhiteSpace(i);  | 
785  | 0  |     for(;;) { | 
786  | 0  |         if(i >= rules->length()) { return 0; } | 
787  | 0  |         UChar c = rules->charAt(i);  | 
788  | 0  |         if(isSyntaxChar(c) && c != 0x2d && c != 0x5f) {  // syntax except -_ | 
789  | 0  |             if(raw.isEmpty()) { return i; } | 
790  | 0  |             if(raw.endsWith(&sp, 1)) {  // remove trailing space | 
791  | 0  |                 raw.truncate(raw.length() - 1);  | 
792  | 0  |             }  | 
793  | 0  |             return i;  | 
794  | 0  |         }  | 
795  | 0  |         if(PatternProps::isWhiteSpace(c)) { | 
796  | 0  |             raw.append(sp);  | 
797  | 0  |             i = skipWhiteSpace(i + 1);  | 
798  | 0  |         } else { | 
799  | 0  |             raw.append(c);  | 
800  | 0  |             ++i;  | 
801  | 0  |         }  | 
802  | 0  |     }  | 
803  | 0  | }  | 
804  |  |  | 
805  |  | int32_t  | 
806  | 0  | CollationRuleParser::skipComment(int32_t i) const { | 
807  |  |     // skip to past the newline  | 
808  | 0  |     while(i < rules->length()) { | 
809  | 0  |         UChar c = rules->charAt(i++);  | 
810  |  |         // LF or FF or CR or NEL or LS or PS  | 
811  | 0  |         if(c == 0xa || c == 0xc || c == 0xd || c == 0x85 || c == 0x2028 || c == 0x2029) { | 
812  |  |             // Unicode Newline Guidelines: "A readline function should stop at NLF, LS, FF, or PS."  | 
813  |  |             // NLF (new line function) = CR or LF or CR+LF or NEL.  | 
814  |  |             // No need to collect all of CR+LF because a following LF will be ignored anyway.  | 
815  | 0  |             break;  | 
816  | 0  |         }  | 
817  | 0  |     }  | 
818  | 0  |     return i;  | 
819  | 0  | }  | 
820  |  |  | 
821  |  | void  | 
822  | 0  | CollationRuleParser::setParseError(const char *reason, UErrorCode &errorCode) { | 
823  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
824  |  |     // Error code consistent with the old parser (from ca. 2001),  | 
825  |  |     // rather than U_PARSE_ERROR;  | 
826  | 0  |     errorCode = U_INVALID_FORMAT_ERROR;  | 
827  | 0  |     errorReason = reason;  | 
828  | 0  |     if(parseError != NULL) { setErrorContext(); } | 
829  | 0  | }  | 
830  |  |  | 
831  |  | void  | 
832  | 0  | CollationRuleParser::setErrorContext() { | 
833  | 0  |     if(parseError == NULL) { return; } | 
834  |  |  | 
835  |  |     // Note: This relies on the calling code maintaining the ruleIndex  | 
836  |  |     // at a position that is useful for debugging.  | 
837  |  |     // For example, at the beginning of a reset or relation etc.  | 
838  | 0  |     parseError->offset = ruleIndex;  | 
839  | 0  |     parseError->line = 0;  // We are not counting line numbers.  | 
840  |  |  | 
841  |  |     // before ruleIndex  | 
842  | 0  |     int32_t start = ruleIndex - (U_PARSE_CONTEXT_LEN - 1);  | 
843  | 0  |     if(start < 0) { | 
844  | 0  |         start = 0;  | 
845  | 0  |     } else if(start > 0 && U16_IS_TRAIL(rules->charAt(start))) { | 
846  | 0  |         ++start;  | 
847  | 0  |     }  | 
848  | 0  |     int32_t length = ruleIndex - start;  | 
849  | 0  |     rules->extract(start, length, parseError->preContext);  | 
850  | 0  |     parseError->preContext[length] = 0;  | 
851  |  |  | 
852  |  |     // starting from ruleIndex  | 
853  | 0  |     length = rules->length() - ruleIndex;  | 
854  | 0  |     if(length >= U_PARSE_CONTEXT_LEN) { | 
855  | 0  |         length = U_PARSE_CONTEXT_LEN - 1;  | 
856  | 0  |         if(U16_IS_LEAD(rules->charAt(ruleIndex + length - 1))) { | 
857  | 0  |             --length;  | 
858  | 0  |         }  | 
859  | 0  |     }  | 
860  | 0  |     rules->extract(ruleIndex, length, parseError->postContext);  | 
861  | 0  |     parseError->postContext[length] = 0;  | 
862  | 0  | }  | 
863  |  |  | 
864  |  | UBool  | 
865  | 0  | CollationRuleParser::isSyntaxChar(UChar32 c) { | 
866  | 0  |     return 0x21 <= c && c <= 0x7e &&  | 
867  | 0  |             (c <= 0x2f || (0x3a <= c && c <= 0x40) ||  | 
868  | 0  |             (0x5b <= c && c <= 0x60) || (0x7b <= c));  | 
869  | 0  | }  | 
870  |  |  | 
871  |  | int32_t  | 
872  | 0  | CollationRuleParser::skipWhiteSpace(int32_t i) const { | 
873  | 0  |     while(i < rules->length() && PatternProps::isWhiteSpace(rules->charAt(i))) { | 
874  | 0  |         ++i;  | 
875  | 0  |     }  | 
876  | 0  |     return i;  | 
877  | 0  | }  | 
878  |  |  | 
879  |  | U_NAMESPACE_END  | 
880  |  |  | 
881  |  | #endif  // !UCONFIG_NO_COLLATION  |