/src/icu/source/i18n/nfrule.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | ******************************************************************************  | 
5  |  | *   Copyright (C) 1997-2015, International Business Machines  | 
6  |  | *   Corporation and others.  All Rights Reserved.  | 
7  |  | ******************************************************************************  | 
8  |  | *   file name:  nfrule.cpp  | 
9  |  | *   encoding:   UTF-8  | 
10  |  | *   tab size:   8 (not used)  | 
11  |  | *   indentation:4  | 
12  |  | *  | 
13  |  | * Modification history  | 
14  |  | * Date        Name      Comments  | 
15  |  | * 10/11/2001  Doug      Ported from ICU4J  | 
16  |  | */  | 
17  |  |  | 
18  |  | #include "nfrule.h"  | 
19  |  |  | 
20  |  | #if U_HAVE_RBNF  | 
21  |  |  | 
22  |  | #include "unicode/localpointer.h"  | 
23  |  | #include "unicode/rbnf.h"  | 
24  |  | #include "unicode/tblcoll.h"  | 
25  |  | #include "unicode/plurfmt.h"  | 
26  |  | #include "unicode/upluralrules.h"  | 
27  |  | #include "unicode/coleitr.h"  | 
28  |  | #include "unicode/uchar.h"  | 
29  |  | #include "nfrs.h"  | 
30  |  | #include "nfrlist.h"  | 
31  |  | #include "nfsubs.h"  | 
32  |  | #include "patternprops.h"  | 
33  |  | #include "putilimp.h"  | 
34  |  |  | 
35  |  | U_NAMESPACE_BEGIN  | 
36  |  |  | 
37  |  | NFRule::NFRule(const RuleBasedNumberFormat* _rbnf, const UnicodeString &_ruleText, UErrorCode &status)  | 
38  | 0  |   : baseValue((int32_t)0)  | 
39  | 0  |   , radix(10)  | 
40  | 0  |   , exponent(0)  | 
41  | 0  |   , decimalPoint(0)  | 
42  | 0  |   , fRuleText(_ruleText)  | 
43  |  |   , sub1(NULL)  | 
44  |  |   , sub2(NULL)  | 
45  | 0  |   , formatter(_rbnf)  | 
46  |  |   , rulePatternFormat(NULL)  | 
47  | 0  | { | 
48  | 0  |     if (!fRuleText.isEmpty()) { | 
49  | 0  |         parseRuleDescriptor(fRuleText, status);  | 
50  | 0  |     }  | 
51  | 0  | }  | 
52  |  |  | 
53  |  | NFRule::~NFRule()  | 
54  | 0  | { | 
55  | 0  |     if (sub1 != sub2) { | 
56  | 0  |         delete sub2;  | 
57  | 0  |         sub2 = NULL;  | 
58  | 0  |     }  | 
59  | 0  |     delete sub1;  | 
60  | 0  |     sub1 = NULL;  | 
61  | 0  |     delete rulePatternFormat;  | 
62  | 0  |     rulePatternFormat = NULL;  | 
63  | 0  | }  | 
64  |  |  | 
65  |  | static const UChar gLeftBracket = 0x005b;  | 
66  |  | static const UChar gRightBracket = 0x005d;  | 
67  |  | static const UChar gColon = 0x003a;  | 
68  |  | static const UChar gZero = 0x0030;  | 
69  |  | static const UChar gNine = 0x0039;  | 
70  |  | static const UChar gSpace = 0x0020;  | 
71  |  | static const UChar gSlash = 0x002f;  | 
72  |  | static const UChar gGreaterThan = 0x003e;  | 
73  |  | static const UChar gLessThan = 0x003c;  | 
74  |  | static const UChar gComma = 0x002c;  | 
75  |  | static const UChar gDot = 0x002e;  | 
76  |  | static const UChar gTick = 0x0027;  | 
77  |  | //static const UChar gMinus = 0x002d;  | 
78  |  | static const UChar gSemicolon = 0x003b;  | 
79  |  | static const UChar gX = 0x0078;  | 
80  |  |  | 
81  |  | static const UChar gMinusX[] =                  {0x2D, 0x78, 0};    /* "-x" */ | 
82  |  | static const UChar gInf[] =                     {0x49, 0x6E, 0x66, 0}; /* "Inf" */ | 
83  |  | static const UChar gNaN[] =                     {0x4E, 0x61, 0x4E, 0}; /* "NaN" */ | 
84  |  |  | 
85  |  | static const UChar gDollarOpenParenthesis[] =   {0x24, 0x28, 0}; /* "$(" */ | 
86  |  | static const UChar gClosedParenthesisDollar[] = {0x29, 0x24, 0}; /* ")$" */ | 
87  |  |  | 
88  |  | static const UChar gLessLess[] =                {0x3C, 0x3C, 0};    /* "<<" */ | 
89  |  | static const UChar gLessPercent[] =             {0x3C, 0x25, 0};    /* "<%" */ | 
90  |  | static const UChar gLessHash[] =                {0x3C, 0x23, 0};    /* "<#" */ | 
91  |  | static const UChar gLessZero[] =                {0x3C, 0x30, 0};    /* "<0" */ | 
92  |  | static const UChar gGreaterGreater[] =          {0x3E, 0x3E, 0};    /* ">>" */ | 
93  |  | static const UChar gGreaterPercent[] =          {0x3E, 0x25, 0};    /* ">%" */ | 
94  |  | static const UChar gGreaterHash[] =             {0x3E, 0x23, 0};    /* ">#" */ | 
95  |  | static const UChar gGreaterZero[] =             {0x3E, 0x30, 0};    /* ">0" */ | 
96  |  | static const UChar gEqualPercent[] =            {0x3D, 0x25, 0};    /* "=%" */ | 
97  |  | static const UChar gEqualHash[] =               {0x3D, 0x23, 0};    /* "=#" */ | 
98  |  | static const UChar gEqualZero[] =               {0x3D, 0x30, 0};    /* "=0" */ | 
99  |  | static const UChar gGreaterGreaterGreater[] =   {0x3E, 0x3E, 0x3E, 0}; /* ">>>" */ | 
100  |  |  | 
101  |  | static const UChar * const RULE_PREFIXES[] = { | 
102  |  |     gLessLess, gLessPercent, gLessHash, gLessZero,  | 
103  |  |     gGreaterGreater, gGreaterPercent,gGreaterHash, gGreaterZero,  | 
104  |  |     gEqualPercent, gEqualHash, gEqualZero, NULL  | 
105  |  | };  | 
106  |  |  | 
107  |  | void  | 
108  |  | NFRule::makeRules(UnicodeString& description,  | 
109  |  |                   NFRuleSet *owner,  | 
110  |  |                   const NFRule *predecessor,  | 
111  |  |                   const RuleBasedNumberFormat *rbnf,  | 
112  |  |                   NFRuleList& rules,  | 
113  |  |                   UErrorCode& status)  | 
114  | 0  | { | 
115  |  |     // we know we're making at least one rule, so go ahead and  | 
116  |  |     // new it up and initialize its basevalue and divisor  | 
117  |  |     // (this also strips the rule descriptor, if any, off the  | 
118  |  |     // description string)  | 
119  | 0  |     NFRule* rule1 = new NFRule(rbnf, description, status);  | 
120  |  |     /* test for NULL */  | 
121  | 0  |     if (rule1 == 0) { | 
122  | 0  |         status = U_MEMORY_ALLOCATION_ERROR;  | 
123  | 0  |         return;  | 
124  | 0  |     }  | 
125  | 0  |     description = rule1->fRuleText;  | 
126  |  |  | 
127  |  |     // check the description to see whether there's text enclosed  | 
128  |  |     // in brackets  | 
129  | 0  |     int32_t brack1 = description.indexOf(gLeftBracket);  | 
130  | 0  |     int32_t brack2 = brack1 < 0 ? -1 : description.indexOf(gRightBracket);  | 
131  |  |  | 
132  |  |     // if the description doesn't contain a matched pair of brackets,  | 
133  |  |     // or if it's of a type that doesn't recognize bracketed text,  | 
134  |  |     // then leave the description alone, initialize the rule's  | 
135  |  |     // rule text and substitutions, and return that rule  | 
136  | 0  |     if (brack2 < 0 || brack1 > brack2  | 
137  | 0  |         || rule1->getType() == kProperFractionRule  | 
138  | 0  |         || rule1->getType() == kNegativeNumberRule  | 
139  | 0  |         || rule1->getType() == kInfinityRule  | 
140  | 0  |         || rule1->getType() == kNaNRule)  | 
141  | 0  |     { | 
142  | 0  |         rule1->extractSubstitutions(owner, description, predecessor, status);  | 
143  | 0  |     }  | 
144  | 0  |     else { | 
145  |  |         // if the description does contain a matched pair of brackets,  | 
146  |  |         // then it's really shorthand for two rules (with one exception)  | 
147  | 0  |         NFRule* rule2 = NULL;  | 
148  | 0  |         UnicodeString sbuf;  | 
149  |  |  | 
150  |  |         // we'll actually only split the rule into two rules if its  | 
151  |  |         // base value is an even multiple of its divisor (or it's one  | 
152  |  |         // of the special rules)  | 
153  | 0  |         if ((rule1->baseValue > 0  | 
154  | 0  |             && (rule1->baseValue % util64_pow(rule1->radix, rule1->exponent)) == 0)  | 
155  | 0  |             || rule1->getType() == kImproperFractionRule  | 
156  | 0  |             || rule1->getType() == kDefaultRule) { | 
157  |  |  | 
158  |  |             // if it passes that test, new up the second rule.  If the  | 
159  |  |             // rule set both rules will belong to is a fraction rule  | 
160  |  |             // set, they both have the same base value; otherwise,  | 
161  |  |             // increment the original rule's base value ("rule1" actually | 
162  |  |             // goes SECOND in the rule set's rule list)  | 
163  | 0  |             rule2 = new NFRule(rbnf, UnicodeString(), status);  | 
164  |  |             /* test for NULL */  | 
165  | 0  |             if (rule2 == 0) { | 
166  | 0  |                 status = U_MEMORY_ALLOCATION_ERROR;  | 
167  | 0  |                 return;  | 
168  | 0  |             }  | 
169  | 0  |             if (rule1->baseValue >= 0) { | 
170  | 0  |                 rule2->baseValue = rule1->baseValue;  | 
171  | 0  |                 if (!owner->isFractionRuleSet()) { | 
172  | 0  |                     ++rule1->baseValue;  | 
173  | 0  |                 }  | 
174  | 0  |             }  | 
175  |  |  | 
176  |  |             // if the description began with "x.x" and contains bracketed  | 
177  |  |             // text, it describes both the improper fraction rule and  | 
178  |  |             // the proper fraction rule  | 
179  | 0  |             else if (rule1->getType() == kImproperFractionRule) { | 
180  | 0  |                 rule2->setType(kProperFractionRule);  | 
181  | 0  |             }  | 
182  |  |  | 
183  |  |             // if the description began with "x.0" and contains bracketed  | 
184  |  |             // text, it describes both the default rule and the  | 
185  |  |             // improper fraction rule  | 
186  | 0  |             else if (rule1->getType() == kDefaultRule) { | 
187  | 0  |                 rule2->baseValue = rule1->baseValue;  | 
188  | 0  |                 rule1->setType(kImproperFractionRule);  | 
189  | 0  |             }  | 
190  |  |  | 
191  |  |             // both rules have the same radix and exponent (i.e., the  | 
192  |  |             // same divisor)  | 
193  | 0  |             rule2->radix = rule1->radix;  | 
194  | 0  |             rule2->exponent = rule1->exponent;  | 
195  |  |  | 
196  |  |             // rule2's rule text omits the stuff in brackets: initialize  | 
197  |  |             // its rule text and substitutions accordingly  | 
198  | 0  |             sbuf.append(description, 0, brack1);  | 
199  | 0  |             if (brack2 + 1 < description.length()) { | 
200  | 0  |                 sbuf.append(description, brack2 + 1, description.length() - brack2 - 1);  | 
201  | 0  |             }  | 
202  | 0  |             rule2->extractSubstitutions(owner, sbuf, predecessor, status);  | 
203  | 0  |         }  | 
204  |  |  | 
205  |  |         // rule1's text includes the text in the brackets but omits  | 
206  |  |         // the brackets themselves: initialize _its_ rule text and  | 
207  |  |         // substitutions accordingly  | 
208  | 0  |         sbuf.setTo(description, 0, brack1);  | 
209  | 0  |         sbuf.append(description, brack1 + 1, brack2 - brack1 - 1);  | 
210  | 0  |         if (brack2 + 1 < description.length()) { | 
211  | 0  |             sbuf.append(description, brack2 + 1, description.length() - brack2 - 1);  | 
212  | 0  |         }  | 
213  | 0  |         rule1->extractSubstitutions(owner, sbuf, predecessor, status);  | 
214  |  |  | 
215  |  |         // if we only have one rule, return it; if we have two, return  | 
216  |  |         // a two-element array containing them (notice that rule2 goes  | 
217  |  |         // BEFORE rule1 in the list: in all cases, rule2 OMITS the  | 
218  |  |         // material in the brackets and rule1 INCLUDES the material  | 
219  |  |         // in the brackets)  | 
220  | 0  |         if (rule2 != NULL) { | 
221  | 0  |             if (rule2->baseValue >= kNoBase) { | 
222  | 0  |                 rules.add(rule2);  | 
223  | 0  |             }  | 
224  | 0  |             else { | 
225  | 0  |                 owner->setNonNumericalRule(rule2);  | 
226  | 0  |             }  | 
227  | 0  |         }  | 
228  | 0  |     }  | 
229  | 0  |     if (rule1->baseValue >= kNoBase) { | 
230  | 0  |         rules.add(rule1);  | 
231  | 0  |     }  | 
232  | 0  |     else { | 
233  | 0  |         owner->setNonNumericalRule(rule1);  | 
234  | 0  |     }  | 
235  | 0  | }  | 
236  |  |  | 
237  |  | /**  | 
238  |  |  * This function parses the rule's rule descriptor (i.e., the base  | 
239  |  |  * value and/or other tokens that precede the rule's rule text  | 
240  |  |  * in the description) and sets the rule's base value, radix, and  | 
241  |  |  * exponent according to the descriptor.  (If the description doesn't  | 
242  |  |  * include a rule descriptor, then this function sets everything to  | 
243  |  |  * default values and the rule set sets the rule's real base value).  | 
244  |  |  * @param description The rule's description  | 
245  |  |  * @return If "description" included a rule descriptor, this is  | 
246  |  |  * "description" with the descriptor and any trailing whitespace  | 
247  |  |  * stripped off.  Otherwise; it's "descriptor" unchangd.  | 
248  |  |  */  | 
249  |  | void  | 
250  |  | NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status)  | 
251  | 0  | { | 
252  |  |     // the description consists of a rule descriptor and a rule body,  | 
253  |  |     // separated by a colon.  The rule descriptor is optional.  If  | 
254  |  |     // it's omitted, just set the base value to 0.  | 
255  | 0  |     int32_t p = description.indexOf(gColon);  | 
256  | 0  |     if (p != -1) { | 
257  |  |         // copy the descriptor out into its own string and strip it,  | 
258  |  |         // along with any trailing whitespace, out of the original  | 
259  |  |         // description  | 
260  | 0  |         UnicodeString descriptor;  | 
261  | 0  |         descriptor.setTo(description, 0, p);  | 
262  |  | 
  | 
263  | 0  |         ++p;  | 
264  | 0  |         while (p < description.length() && PatternProps::isWhiteSpace(description.charAt(p))) { | 
265  | 0  |             ++p;  | 
266  | 0  |         }  | 
267  | 0  |         description.removeBetween(0, p);  | 
268  |  |  | 
269  |  |         // check first to see if the rule descriptor matches the token  | 
270  |  |         // for one of the special rules.  If it does, set the base  | 
271  |  |         // value to the correct identifier value  | 
272  | 0  |         int descriptorLength = descriptor.length();  | 
273  | 0  |         UChar firstChar = descriptor.charAt(0);  | 
274  | 0  |         UChar lastChar = descriptor.charAt(descriptorLength - 1);  | 
275  | 0  |         if (firstChar >= gZero && firstChar <= gNine && lastChar != gX) { | 
276  |  |             // if the rule descriptor begins with a digit, it's a descriptor  | 
277  |  |             // for a normal rule  | 
278  |  |             // since we don't have Long.parseLong, and this isn't much work anyway,  | 
279  |  |             // just build up the value as we encounter the digits.  | 
280  | 0  |             int64_t val = 0;  | 
281  | 0  |             p = 0;  | 
282  | 0  |             UChar c = gSpace;  | 
283  |  |  | 
284  |  |             // begin parsing the descriptor: copy digits  | 
285  |  |             // into "tempValue", skip periods, commas, and spaces,  | 
286  |  |             // stop on a slash or > sign (or at the end of the string),  | 
287  |  |             // and throw an exception on any other character  | 
288  | 0  |             int64_t ll_10 = 10;  | 
289  | 0  |             while (p < descriptorLength) { | 
290  | 0  |                 c = descriptor.charAt(p);  | 
291  | 0  |                 if (c >= gZero && c <= gNine) { | 
292  | 0  |                     val = val * ll_10 + (int32_t)(c - gZero);  | 
293  | 0  |                 }  | 
294  | 0  |                 else if (c == gSlash || c == gGreaterThan) { | 
295  | 0  |                     break;  | 
296  | 0  |                 }  | 
297  | 0  |                 else if (PatternProps::isWhiteSpace(c) || c == gComma || c == gDot) { | 
298  | 0  |                 }  | 
299  | 0  |                 else { | 
300  |  |                     // throw new IllegalArgumentException("Illegal character in rule descriptor"); | 
301  | 0  |                     status = U_PARSE_ERROR;  | 
302  | 0  |                     return;  | 
303  | 0  |                 }  | 
304  | 0  |                 ++p;  | 
305  | 0  |             }  | 
306  |  |  | 
307  |  |             // we have the base value, so set it  | 
308  | 0  |             setBaseValue(val, status);  | 
309  |  |  | 
310  |  |             // if we stopped the previous loop on a slash, we're  | 
311  |  |             // now parsing the rule's radix.  Again, accumulate digits  | 
312  |  |             // in tempValue, skip punctuation, stop on a > mark, and  | 
313  |  |             // throw an exception on anything else  | 
314  | 0  |             if (c == gSlash) { | 
315  | 0  |                 val = 0;  | 
316  | 0  |                 ++p;  | 
317  | 0  |                 ll_10 = 10;  | 
318  | 0  |                 while (p < descriptorLength) { | 
319  | 0  |                     c = descriptor.charAt(p);  | 
320  | 0  |                     if (c >= gZero && c <= gNine) { | 
321  | 0  |                         val = val * ll_10 + (int32_t)(c - gZero);  | 
322  | 0  |                     }  | 
323  | 0  |                     else if (c == gGreaterThan) { | 
324  | 0  |                         break;  | 
325  | 0  |                     }  | 
326  | 0  |                     else if (PatternProps::isWhiteSpace(c) || c == gComma || c == gDot) { | 
327  | 0  |                     }  | 
328  | 0  |                     else { | 
329  |  |                         // throw new IllegalArgumentException("Illegal character is rule descriptor"); | 
330  | 0  |                         status = U_PARSE_ERROR;  | 
331  | 0  |                         return;  | 
332  | 0  |                     }  | 
333  | 0  |                     ++p;  | 
334  | 0  |                 }  | 
335  |  |  | 
336  |  |                 // tempValue now contain's the rule's radix.  Set it  | 
337  |  |                 // accordingly, and recalculate the rule's exponent  | 
338  | 0  |                 radix = (int32_t)val;  | 
339  | 0  |                 if (radix == 0) { | 
340  |  |                     // throw new IllegalArgumentException("Rule can't have radix of 0"); | 
341  | 0  |                     status = U_PARSE_ERROR;  | 
342  | 0  |                 }  | 
343  |  | 
  | 
344  | 0  |                 exponent = expectedExponent();  | 
345  | 0  |             }  | 
346  |  |  | 
347  |  |             // if we stopped the previous loop on a > sign, then continue  | 
348  |  |             // for as long as we still see > signs.  For each one,  | 
349  |  |             // decrement the exponent (unless the exponent is already 0).  | 
350  |  |             // If we see another character before reaching the end of  | 
351  |  |             // the descriptor, that's also a syntax error.  | 
352  | 0  |             if (c == gGreaterThan) { | 
353  | 0  |                 while (p < descriptor.length()) { | 
354  | 0  |                     c = descriptor.charAt(p);  | 
355  | 0  |                     if (c == gGreaterThan && exponent > 0) { | 
356  | 0  |                         --exponent;  | 
357  | 0  |                     } else { | 
358  |  |                         // throw new IllegalArgumentException("Illegal character in rule descriptor"); | 
359  | 0  |                         status = U_PARSE_ERROR;  | 
360  | 0  |                         return;  | 
361  | 0  |                     }  | 
362  | 0  |                     ++p;  | 
363  | 0  |                 }  | 
364  | 0  |             }  | 
365  | 0  |         }  | 
366  | 0  |         else if (0 == descriptor.compare(gMinusX, 2)) { | 
367  | 0  |             setType(kNegativeNumberRule);  | 
368  | 0  |         }  | 
369  | 0  |         else if (descriptorLength == 3) { | 
370  | 0  |             if (firstChar == gZero && lastChar == gX) { | 
371  | 0  |                 setBaseValue(kProperFractionRule, status);  | 
372  | 0  |                 decimalPoint = descriptor.charAt(1);  | 
373  | 0  |             }  | 
374  | 0  |             else if (firstChar == gX && lastChar == gX) { | 
375  | 0  |                 setBaseValue(kImproperFractionRule, status);  | 
376  | 0  |                 decimalPoint = descriptor.charAt(1);  | 
377  | 0  |             }  | 
378  | 0  |             else if (firstChar == gX && lastChar == gZero) { | 
379  | 0  |                 setBaseValue(kDefaultRule, status);  | 
380  | 0  |                 decimalPoint = descriptor.charAt(1);  | 
381  | 0  |             }  | 
382  | 0  |             else if (descriptor.compare(gNaN, 3) == 0) { | 
383  | 0  |                 setBaseValue(kNaNRule, status);  | 
384  | 0  |             }  | 
385  | 0  |             else if (descriptor.compare(gInf, 3) == 0) { | 
386  | 0  |                 setBaseValue(kInfinityRule, status);  | 
387  | 0  |             }  | 
388  | 0  |         }  | 
389  | 0  |     }  | 
390  |  |     // else use the default base value for now.  | 
391  |  |  | 
392  |  |     // finally, if the rule body begins with an apostrophe, strip it off  | 
393  |  |     // (this is generally used to put whitespace at the beginning of  | 
394  |  |     // a rule's rule text)  | 
395  | 0  |     if (description.length() > 0 && description.charAt(0) == gTick) { | 
396  | 0  |         description.removeBetween(0, 1);  | 
397  | 0  |     }  | 
398  |  |  | 
399  |  |     // return the description with all the stuff we've just waded through  | 
400  |  |     // stripped off the front.  It now contains just the rule body.  | 
401  |  |     // return description;  | 
402  | 0  | }  | 
403  |  |  | 
404  |  | /**  | 
405  |  | * Searches the rule's rule text for the substitution tokens,  | 
406  |  | * creates the substitutions, and removes the substitution tokens  | 
407  |  | * from the rule's rule text.  | 
408  |  | * @param owner The rule set containing this rule  | 
409  |  | * @param predecessor The rule preseding this one in "owners" rule list  | 
410  |  | * @param ownersOwner The RuleBasedFormat that owns this rule  | 
411  |  | */  | 
412  |  | void  | 
413  |  | NFRule::extractSubstitutions(const NFRuleSet* ruleSet,  | 
414  |  |                              const UnicodeString &ruleText,  | 
415  |  |                              const NFRule* predecessor,  | 
416  |  |                              UErrorCode& status)  | 
417  | 0  | { | 
418  | 0  |     if (U_FAILURE(status)) { | 
419  | 0  |         return;  | 
420  | 0  |     }  | 
421  | 0  |     fRuleText = ruleText;  | 
422  | 0  |     sub1 = extractSubstitution(ruleSet, predecessor, status);  | 
423  | 0  |     if (sub1 == NULL) { | 
424  |  |         // Small optimization. There is no need to create a redundant NullSubstitution.  | 
425  | 0  |         sub2 = NULL;  | 
426  | 0  |     }  | 
427  | 0  |     else { | 
428  | 0  |         sub2 = extractSubstitution(ruleSet, predecessor, status);  | 
429  | 0  |     }  | 
430  | 0  |     int32_t pluralRuleStart = fRuleText.indexOf(gDollarOpenParenthesis, -1, 0);  | 
431  | 0  |     int32_t pluralRuleEnd = (pluralRuleStart >= 0 ? fRuleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart) : -1);  | 
432  | 0  |     if (pluralRuleEnd >= 0) { | 
433  | 0  |         int32_t endType = fRuleText.indexOf(gComma, pluralRuleStart);  | 
434  | 0  |         if (endType < 0) { | 
435  | 0  |             status = U_PARSE_ERROR;  | 
436  | 0  |             return;  | 
437  | 0  |         }  | 
438  | 0  |         UnicodeString type(fRuleText.tempSubString(pluralRuleStart + 2, endType - pluralRuleStart - 2));  | 
439  | 0  |         UPluralType pluralType;  | 
440  | 0  |         if (type.startsWith(UNICODE_STRING_SIMPLE("cardinal"))) { | 
441  | 0  |             pluralType = UPLURAL_TYPE_CARDINAL;  | 
442  | 0  |         }  | 
443  | 0  |         else if (type.startsWith(UNICODE_STRING_SIMPLE("ordinal"))) { | 
444  | 0  |             pluralType = UPLURAL_TYPE_ORDINAL;  | 
445  | 0  |         }  | 
446  | 0  |         else { | 
447  | 0  |             status = U_ILLEGAL_ARGUMENT_ERROR;  | 
448  | 0  |             return;  | 
449  | 0  |         }  | 
450  | 0  |         rulePatternFormat = formatter->createPluralFormat(pluralType,  | 
451  | 0  |                 fRuleText.tempSubString(endType + 1, pluralRuleEnd - endType - 1), status);  | 
452  | 0  |     }  | 
453  | 0  | }  | 
454  |  |  | 
455  |  | /**  | 
456  |  | * Searches the rule's rule text for the first substitution token,  | 
457  |  | * creates a substitution based on it, and removes the token from  | 
458  |  | * the rule's rule text.  | 
459  |  | * @param owner The rule set containing this rule  | 
460  |  | * @param predecessor The rule preceding this one in the rule set's  | 
461  |  | * rule list  | 
462  |  | * @param ownersOwner The RuleBasedNumberFormat that owns this rule  | 
463  |  | * @return The newly-created substitution.  This is never null; if  | 
464  |  | * the rule text doesn't contain any substitution tokens, this will  | 
465  |  | * be a NullSubstitution.  | 
466  |  | */  | 
467  |  | NFSubstitution *  | 
468  |  | NFRule::extractSubstitution(const NFRuleSet* ruleSet,  | 
469  |  |                             const NFRule* predecessor,  | 
470  |  |                             UErrorCode& status)  | 
471  | 0  | { | 
472  | 0  |     NFSubstitution* result = NULL;  | 
473  |  |  | 
474  |  |     // search the rule's rule text for the first two characters of  | 
475  |  |     // a substitution token  | 
476  | 0  |     int32_t subStart = indexOfAnyRulePrefix();  | 
477  | 0  |     int32_t subEnd = subStart;  | 
478  |  |  | 
479  |  |     // if we didn't find one, create a null substitution positioned  | 
480  |  |     // at the end of the rule text  | 
481  | 0  |     if (subStart == -1) { | 
482  | 0  |         return NULL;  | 
483  | 0  |     }  | 
484  |  |  | 
485  |  |     // special-case the ">>>" token, since searching for the > at the  | 
486  |  |     // end will actually find the > in the middle  | 
487  | 0  |     if (fRuleText.indexOf(gGreaterGreaterGreater, 3, 0) == subStart) { | 
488  | 0  |         subEnd = subStart + 2;  | 
489  |  |  | 
490  |  |         // otherwise the substitution token ends with the same character  | 
491  |  |         // it began with  | 
492  | 0  |     } else { | 
493  | 0  |         UChar c = fRuleText.charAt(subStart);  | 
494  | 0  |         subEnd = fRuleText.indexOf(c, subStart + 1);  | 
495  |  |         // special case for '<%foo<<'  | 
496  | 0  |         if (c == gLessThan && subEnd != -1 && subEnd < fRuleText.length() - 1 && fRuleText.charAt(subEnd+1) == c) { | 
497  |  |             // ordinals use "=#,##0==%abbrev=" as their rule.  Notice that the '==' in the middle  | 
498  |  |             // occurs because of the juxtaposition of two different rules.  The check for '<' is a hack  | 
499  |  |             // to get around this.  Having the duplicate at the front would cause problems with  | 
500  |  |             // rules like "<<%" to format, say, percents...  | 
501  | 0  |             ++subEnd;  | 
502  | 0  |         }  | 
503  | 0  |    }  | 
504  |  |  | 
505  |  |     // if we don't find the end of the token (i.e., if we're on a single,  | 
506  |  |     // unmatched token character), create a null substitution positioned  | 
507  |  |     // at the end of the rule  | 
508  | 0  |     if (subEnd == -1) { | 
509  | 0  |         return NULL;  | 
510  | 0  |     }  | 
511  |  |  | 
512  |  |     // if we get here, we have a real substitution token (or at least  | 
513  |  |     // some text bounded by substitution token characters).  Use  | 
514  |  |     // makeSubstitution() to create the right kind of substitution  | 
515  | 0  |     UnicodeString subToken;  | 
516  | 0  |     subToken.setTo(fRuleText, subStart, subEnd + 1 - subStart);  | 
517  | 0  |     result = NFSubstitution::makeSubstitution(subStart, this, predecessor, ruleSet,  | 
518  | 0  |         this->formatter, subToken, status);  | 
519  |  |  | 
520  |  |     // remove the substitution from the rule text  | 
521  | 0  |     fRuleText.removeBetween(subStart, subEnd+1);  | 
522  |  | 
  | 
523  | 0  |     return result;  | 
524  | 0  | }  | 
525  |  |  | 
526  |  | /**  | 
527  |  |  * Sets the rule's base value, and causes the radix and exponent  | 
528  |  |  * to be recalculated.  This is used during construction when we  | 
529  |  |  * don't know the rule's base value until after it's been  | 
530  |  |  * constructed.  It should be used at any other time.  | 
531  |  |  * @param The new base value for the rule.  | 
532  |  |  */  | 
533  |  | void  | 
534  |  | NFRule::setBaseValue(int64_t newBaseValue, UErrorCode& status)  | 
535  | 0  | { | 
536  |  |     // set the base value  | 
537  | 0  |     baseValue = newBaseValue;  | 
538  | 0  |     radix = 10;  | 
539  |  |  | 
540  |  |     // if this isn't a special rule, recalculate the radix and exponent  | 
541  |  |     // (the radix always defaults to 10; if it's supposed to be something  | 
542  |  |     // else, it's cleaned up by the caller and the exponent is  | 
543  |  |     // recalculated again-- the only function that does this is  | 
544  |  |     // NFRule.parseRuleDescriptor() )  | 
545  | 0  |     if (baseValue >= 1) { | 
546  | 0  |         exponent = expectedExponent();  | 
547  |  |  | 
548  |  |         // this function gets called on a fully-constructed rule whose  | 
549  |  |         // description didn't specify a base value.  This means it  | 
550  |  |         // has substitutions, and some substitutions hold on to copies  | 
551  |  |         // of the rule's divisor.  Fix their copies of the divisor.  | 
552  | 0  |         if (sub1 != NULL) { | 
553  | 0  |             sub1->setDivisor(radix, exponent, status);  | 
554  | 0  |         }  | 
555  | 0  |         if (sub2 != NULL) { | 
556  | 0  |             sub2->setDivisor(radix, exponent, status);  | 
557  | 0  |         }  | 
558  |  |  | 
559  |  |         // if this is a special rule, its radix and exponent are basically  | 
560  |  |         // ignored.  Set them to "safe" default values  | 
561  | 0  |     } else { | 
562  | 0  |         exponent = 0;  | 
563  | 0  |     }  | 
564  | 0  | }  | 
565  |  |  | 
566  |  | /**  | 
567  |  | * This calculates the rule's exponent based on its radix and base  | 
568  |  | * value.  This will be the highest power the radix can be raised to  | 
569  |  | * and still produce a result less than or equal to the base value.  | 
570  |  | */  | 
571  |  | int16_t  | 
572  |  | NFRule::expectedExponent() const  | 
573  | 0  | { | 
574  |  |     // since the log of 0, or the log base 0 of something, causes an  | 
575  |  |     // error, declare the exponent in these cases to be 0 (we also  | 
576  |  |     // deal with the special-rule identifiers here)  | 
577  | 0  |     if (radix == 0 || baseValue < 1) { | 
578  | 0  |         return 0;  | 
579  | 0  |     }  | 
580  |  |  | 
581  |  |     // we get rounding error in some cases-- for example, log 1000 / log 10  | 
582  |  |     // gives us 1.9999999996 instead of 2.  The extra logic here is to take  | 
583  |  |     // that into account  | 
584  | 0  |     int16_t tempResult = (int16_t)(uprv_log((double)baseValue) / uprv_log((double)radix));  | 
585  | 0  |     int64_t temp = util64_pow(radix, tempResult + 1);  | 
586  | 0  |     if (temp <= baseValue) { | 
587  | 0  |         tempResult += 1;  | 
588  | 0  |     }  | 
589  | 0  |     return tempResult;  | 
590  | 0  | }  | 
591  |  |  | 
592  |  | /**  | 
593  |  |  * Searches the rule's rule text for any of the specified strings.  | 
594  |  |  * @return The index of the first match in the rule's rule text  | 
595  |  |  * (i.e., the first substring in the rule's rule text that matches  | 
596  |  |  * _any_ of the strings in "strings").  If none of the strings in  | 
597  |  |  * "strings" is found in the rule's rule text, returns -1.  | 
598  |  |  */  | 
599  |  | int32_t  | 
600  |  | NFRule::indexOfAnyRulePrefix() const  | 
601  | 0  | { | 
602  | 0  |     int result = -1;  | 
603  | 0  |     for (int i = 0; RULE_PREFIXES[i]; i++) { | 
604  | 0  |         int32_t pos = fRuleText.indexOf(*RULE_PREFIXES[i]);  | 
605  | 0  |         if (pos != -1 && (result == -1 || pos < result)) { | 
606  | 0  |             result = pos;  | 
607  | 0  |         }  | 
608  | 0  |     }  | 
609  | 0  |     return result;  | 
610  | 0  | }  | 
611  |  |  | 
612  |  | //-----------------------------------------------------------------------  | 
613  |  | // boilerplate  | 
614  |  | //-----------------------------------------------------------------------  | 
615  |  |  | 
616  |  | static UBool  | 
617  |  | util_equalSubstitutions(const NFSubstitution* sub1, const NFSubstitution* sub2)  | 
618  | 0  | { | 
619  | 0  |     if (sub1) { | 
620  | 0  |         if (sub2) { | 
621  | 0  |             return *sub1 == *sub2;  | 
622  | 0  |         }  | 
623  | 0  |     } else if (!sub2) { | 
624  | 0  |         return TRUE;  | 
625  | 0  |     }  | 
626  | 0  |     return FALSE;  | 
627  | 0  | }  | 
628  |  |  | 
629  |  | /**  | 
630  |  | * Tests two rules for equality.  | 
631  |  | * @param that The rule to compare this one against  | 
632  |  | * @return True is the two rules are functionally equivalent  | 
633  |  | */  | 
634  |  | bool  | 
635  |  | NFRule::operator==(const NFRule& rhs) const  | 
636  | 0  | { | 
637  | 0  |     return baseValue == rhs.baseValue  | 
638  | 0  |         && radix == rhs.radix  | 
639  | 0  |         && exponent == rhs.exponent  | 
640  | 0  |         && fRuleText == rhs.fRuleText  | 
641  | 0  |         && util_equalSubstitutions(sub1, rhs.sub1)  | 
642  | 0  |         && util_equalSubstitutions(sub2, rhs.sub2);  | 
643  | 0  | }  | 
644  |  |  | 
645  |  | /**  | 
646  |  | * Returns a textual representation of the rule.  This won't  | 
647  |  | * necessarily be the same as the description that this rule  | 
648  |  | * was created with, but it will produce the same result.  | 
649  |  | * @return A textual description of the rule  | 
650  |  | */  | 
651  |  | static void util_append64(UnicodeString& result, int64_t n)  | 
652  | 0  | { | 
653  | 0  |     UChar buffer[256];  | 
654  | 0  |     int32_t len = util64_tou(n, buffer, sizeof(buffer));  | 
655  | 0  |     UnicodeString temp(buffer, len);  | 
656  | 0  |     result.append(temp);  | 
657  | 0  | }  | 
658  |  |  | 
659  |  | void  | 
660  |  | NFRule::_appendRuleText(UnicodeString& result) const  | 
661  | 0  | { | 
662  | 0  |     switch (getType()) { | 
663  | 0  |     case kNegativeNumberRule: result.append(gMinusX, 2); break;  | 
664  | 0  |     case kImproperFractionRule: result.append(gX).append(decimalPoint == 0 ? gDot : decimalPoint).append(gX); break;  | 
665  | 0  |     case kProperFractionRule: result.append(gZero).append(decimalPoint == 0 ? gDot : decimalPoint).append(gX); break;  | 
666  | 0  |     case kDefaultRule: result.append(gX).append(decimalPoint == 0 ? gDot : decimalPoint).append(gZero); break;  | 
667  | 0  |     case kInfinityRule: result.append(gInf, 3); break;  | 
668  | 0  |     case kNaNRule: result.append(gNaN, 3); break;  | 
669  | 0  |     default:  | 
670  |  |         // for a normal rule, write out its base value, and if the radix is  | 
671  |  |         // something other than 10, write out the radix (with the preceding  | 
672  |  |         // slash, of course).  Then calculate the expected exponent and if  | 
673  |  |         // if isn't the same as the actual exponent, write an appropriate  | 
674  |  |         // number of > signs.  Finally, terminate the whole thing with  | 
675  |  |         // a colon.  | 
676  | 0  |         util_append64(result, baseValue);  | 
677  | 0  |         if (radix != 10) { | 
678  | 0  |             result.append(gSlash);  | 
679  | 0  |             util_append64(result, radix);  | 
680  | 0  |         }  | 
681  | 0  |         int numCarets = expectedExponent() - exponent;  | 
682  | 0  |         for (int i = 0; i < numCarets; i++) { | 
683  | 0  |             result.append(gGreaterThan);  | 
684  | 0  |         }  | 
685  | 0  |         break;  | 
686  | 0  |     }  | 
687  | 0  |     result.append(gColon);  | 
688  | 0  |     result.append(gSpace);  | 
689  |  |  | 
690  |  |     // if the rule text begins with a space, write an apostrophe  | 
691  |  |     // (whitespace after the rule descriptor is ignored; the  | 
692  |  |     // apostrophe is used to make the whitespace significant)  | 
693  | 0  |     if (fRuleText.charAt(0) == gSpace && (sub1 == NULL || sub1->getPos() != 0)) { | 
694  | 0  |         result.append(gTick);  | 
695  | 0  |     }  | 
696  |  |  | 
697  |  |     // now, write the rule's rule text, inserting appropriate  | 
698  |  |     // substitution tokens in the appropriate places  | 
699  | 0  |     UnicodeString ruleTextCopy;  | 
700  | 0  |     ruleTextCopy.setTo(fRuleText);  | 
701  |  | 
  | 
702  | 0  |     UnicodeString temp;  | 
703  | 0  |     if (sub2 != NULL) { | 
704  | 0  |         sub2->toString(temp);  | 
705  | 0  |         ruleTextCopy.insert(sub2->getPos(), temp);  | 
706  | 0  |     }  | 
707  | 0  |     if (sub1 != NULL) { | 
708  | 0  |         sub1->toString(temp);  | 
709  | 0  |         ruleTextCopy.insert(sub1->getPos(), temp);  | 
710  | 0  |     }  | 
711  |  | 
  | 
712  | 0  |     result.append(ruleTextCopy);  | 
713  |  |  | 
714  |  |     // and finally, top the whole thing off with a semicolon and  | 
715  |  |     // return the result  | 
716  | 0  |     result.append(gSemicolon);  | 
717  | 0  | }  | 
718  |  |  | 
719  |  | int64_t NFRule::getDivisor() const  | 
720  | 0  | { | 
721  | 0  |     return util64_pow(radix, exponent);  | 
722  | 0  | }  | 
723  |  |  | 
724  |  |  | 
725  |  | //-----------------------------------------------------------------------  | 
726  |  | // formatting  | 
727  |  | //-----------------------------------------------------------------------  | 
728  |  |  | 
729  |  | /**  | 
730  |  | * Formats the number, and inserts the resulting text into  | 
731  |  | * toInsertInto.  | 
732  |  | * @param number The number being formatted  | 
733  |  | * @param toInsertInto The string where the resultant text should  | 
734  |  | * be inserted  | 
735  |  | * @param pos The position in toInsertInto where the resultant text  | 
736  |  | * should be inserted  | 
737  |  | */  | 
738  |  | void  | 
739  |  | NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const  | 
740  | 0  | { | 
741  |  |     // first, insert the rule's rule text into toInsertInto at the  | 
742  |  |     // specified position, then insert the results of the substitutions  | 
743  |  |     // into the right places in toInsertInto (notice we do the  | 
744  |  |     // substitutions in reverse order so that the offsets don't get  | 
745  |  |     // messed up)  | 
746  | 0  |     int32_t pluralRuleStart = fRuleText.length();  | 
747  | 0  |     int32_t lengthOffset = 0;  | 
748  | 0  |     if (!rulePatternFormat) { | 
749  | 0  |         toInsertInto.insert(pos, fRuleText);  | 
750  | 0  |     }  | 
751  | 0  |     else { | 
752  | 0  |         pluralRuleStart = fRuleText.indexOf(gDollarOpenParenthesis, -1, 0);  | 
753  | 0  |         int pluralRuleEnd = fRuleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart);  | 
754  | 0  |         int initialLength = toInsertInto.length();  | 
755  | 0  |         if (pluralRuleEnd < fRuleText.length() - 1) { | 
756  | 0  |             toInsertInto.insert(pos, fRuleText.tempSubString(pluralRuleEnd + 2));  | 
757  | 0  |         }  | 
758  | 0  |         toInsertInto.insert(pos,  | 
759  | 0  |             rulePatternFormat->format((int32_t)(number/util64_pow(radix, exponent)), status));  | 
760  | 0  |         if (pluralRuleStart > 0) { | 
761  | 0  |             toInsertInto.insert(pos, fRuleText.tempSubString(0, pluralRuleStart));  | 
762  | 0  |         }  | 
763  | 0  |         lengthOffset = fRuleText.length() - (toInsertInto.length() - initialLength);  | 
764  | 0  |     }  | 
765  |  | 
  | 
766  | 0  |     if (sub2 != NULL) { | 
767  | 0  |         sub2->doSubstitution(number, toInsertInto, pos - (sub2->getPos() > pluralRuleStart ? lengthOffset : 0), recursionCount, status);  | 
768  | 0  |     }  | 
769  | 0  |     if (sub1 != NULL) { | 
770  | 0  |         sub1->doSubstitution(number, toInsertInto, pos - (sub1->getPos() > pluralRuleStart ? lengthOffset : 0), recursionCount, status);  | 
771  | 0  |     }  | 
772  | 0  | }  | 
773  |  |  | 
774  |  | /**  | 
775  |  | * Formats the number, and inserts the resulting text into  | 
776  |  | * toInsertInto.  | 
777  |  | * @param number The number being formatted  | 
778  |  | * @param toInsertInto The string where the resultant text should  | 
779  |  | * be inserted  | 
780  |  | * @param pos The position in toInsertInto where the resultant text  | 
781  |  | * should be inserted  | 
782  |  | */  | 
783  |  | void  | 
784  |  | NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const  | 
785  | 0  | { | 
786  |  |     // first, insert the rule's rule text into toInsertInto at the  | 
787  |  |     // specified position, then insert the results of the substitutions  | 
788  |  |     // into the right places in toInsertInto  | 
789  |  |     // [again, we have two copies of this routine that do the same thing  | 
790  |  |     // so that we don't sacrifice precision in a long by casting it  | 
791  |  |     // to a double]  | 
792  | 0  |     int32_t pluralRuleStart = fRuleText.length();  | 
793  | 0  |     int32_t lengthOffset = 0;  | 
794  | 0  |     if (!rulePatternFormat) { | 
795  | 0  |         toInsertInto.insert(pos, fRuleText);  | 
796  | 0  |     }  | 
797  | 0  |     else { | 
798  | 0  |         pluralRuleStart = fRuleText.indexOf(gDollarOpenParenthesis, -1, 0);  | 
799  | 0  |         int pluralRuleEnd = fRuleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart);  | 
800  | 0  |         int initialLength = toInsertInto.length();  | 
801  | 0  |         if (pluralRuleEnd < fRuleText.length() - 1) { | 
802  | 0  |             toInsertInto.insert(pos, fRuleText.tempSubString(pluralRuleEnd + 2));  | 
803  | 0  |         }  | 
804  | 0  |         double pluralVal = number;  | 
805  | 0  |         if (0 <= pluralVal && pluralVal < 1) { | 
806  |  |             // We're in a fractional rule, and we have to match the NumeratorSubstitution behavior.  | 
807  |  |             // 2.3 can become 0.2999999999999998 for the fraction due to rounding errors.  | 
808  | 0  |             pluralVal = uprv_round(pluralVal * util64_pow(radix, exponent));  | 
809  | 0  |         }  | 
810  | 0  |         else { | 
811  | 0  |             pluralVal = pluralVal / util64_pow(radix, exponent);  | 
812  | 0  |         }  | 
813  | 0  |         toInsertInto.insert(pos, rulePatternFormat->format((int32_t)(pluralVal), status));  | 
814  | 0  |         if (pluralRuleStart > 0) { | 
815  | 0  |             toInsertInto.insert(pos, fRuleText.tempSubString(0, pluralRuleStart));  | 
816  | 0  |         }  | 
817  | 0  |         lengthOffset = fRuleText.length() - (toInsertInto.length() - initialLength);  | 
818  | 0  |     }  | 
819  |  | 
  | 
820  | 0  |     if (sub2 != NULL) { | 
821  | 0  |         sub2->doSubstitution(number, toInsertInto, pos - (sub2->getPos() > pluralRuleStart ? lengthOffset : 0), recursionCount, status);  | 
822  | 0  |     }  | 
823  | 0  |     if (sub1 != NULL) { | 
824  | 0  |         sub1->doSubstitution(number, toInsertInto, pos - (sub1->getPos() > pluralRuleStart ? lengthOffset : 0), recursionCount, status);  | 
825  | 0  |     }  | 
826  | 0  | }  | 
827  |  |  | 
828  |  | /**  | 
829  |  | * Used by the owning rule set to determine whether to invoke the  | 
830  |  | * rollback rule (i.e., whether this rule or the one that precedes  | 
831  |  | * it in the rule set's list should be used to format the number)  | 
832  |  | * @param The number being formatted  | 
833  |  | * @return True if the rule set should use the rule that precedes  | 
834  |  | * this one in its list; false if it should use this rule  | 
835  |  | */  | 
836  |  | UBool  | 
837  |  | NFRule::shouldRollBack(int64_t number) const  | 
838  | 0  | { | 
839  |  |     // we roll back if the rule contains a modulus substitution,  | 
840  |  |     // the number being formatted is an even multiple of the rule's  | 
841  |  |     // divisor, and the rule's base value is NOT an even multiple  | 
842  |  |     // of its divisor  | 
843  |  |     // In other words, if the original description had  | 
844  |  |     //    100: << hundred[ >>];  | 
845  |  |     // that expands into  | 
846  |  |     //    100: << hundred;  | 
847  |  |     //    101: << hundred >>;  | 
848  |  |     // internally.  But when we're formatting 200, if we use the rule  | 
849  |  |     // at 101, which would normally apply, we get "two hundred zero".  | 
850  |  |     // To prevent this, we roll back and use the rule at 100 instead.  | 
851  |  |     // This is the logic that makes this happen: the rule at 101 has  | 
852  |  |     // a modulus substitution, its base value isn't an even multiple  | 
853  |  |     // of 100, and the value we're trying to format _is_ an even  | 
854  |  |     // multiple of 100.  This is called the "rollback rule."  | 
855  | 0  |     if ((sub1 != NULL && sub1->isModulusSubstitution()) || (sub2 != NULL && sub2->isModulusSubstitution())) { | 
856  | 0  |         int64_t re = util64_pow(radix, exponent);  | 
857  | 0  |         return (number % re) == 0 && (baseValue % re) != 0;  | 
858  | 0  |     }  | 
859  | 0  |     return FALSE;  | 
860  | 0  | }  | 
861  |  |  | 
862  |  | //-----------------------------------------------------------------------  | 
863  |  | // parsing  | 
864  |  | //-----------------------------------------------------------------------  | 
865  |  |  | 
866  |  | /**  | 
867  |  | * Attempts to parse the string with this rule.  | 
868  |  | * @param text The string being parsed  | 
869  |  | * @param parsePosition On entry, the value is ignored and assumed to  | 
870  |  | * be 0. On exit, this has been updated with the position of the first  | 
871  |  | * character not consumed by matching the text against this rule  | 
872  |  | * (if this rule doesn't match the text at all, the parse position  | 
873  |  | * if left unchanged (presumably at 0) and the function returns  | 
874  |  | * new Long(0)).  | 
875  |  | * @param isFractionRule True if this rule is contained within a  | 
876  |  | * fraction rule set.  This is only used if the rule has no  | 
877  |  | * substitutions.  | 
878  |  | * @return If this rule matched the text, this is the rule's base value  | 
879  |  | * combined appropriately with the results of parsing the substitutions.  | 
880  |  | * If nothing matched, this is new Long(0) and the parse position is  | 
881  |  | * left unchanged.  The result will be an instance of Long if the  | 
882  |  | * result is an integer and Double otherwise.  The result is never null.  | 
883  |  | */  | 
884  |  | #ifdef RBNF_DEBUG  | 
885  |  | #include <stdio.h>  | 
886  |  |  | 
887  |  | static void dumpUS(FILE* f, const UnicodeString& us) { | 
888  |  |   int len = us.length();  | 
889  |  |   char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];  | 
890  |  |   if (buf != NULL) { | 
891  |  |     us.extract(0, len, buf);  | 
892  |  |     buf[len] = 0;  | 
893  |  |     fprintf(f, "%s", buf);  | 
894  |  |     uprv_free(buf); //delete[] buf;  | 
895  |  |   }  | 
896  |  | }  | 
897  |  | #endif  | 
898  |  | UBool  | 
899  |  | NFRule::doParse(const UnicodeString& text,  | 
900  |  |                 ParsePosition& parsePosition,  | 
901  |  |                 UBool isFractionRule,  | 
902  |  |                 double upperBound,  | 
903  |  |                 uint32_t nonNumericalExecutedRuleMask,  | 
904  |  |                 Formattable& resVal) const  | 
905  | 0  | { | 
906  |  |     // internally we operate on a copy of the string being parsed  | 
907  |  |     // (because we're going to change it) and use our own ParsePosition  | 
908  | 0  |     ParsePosition pp;  | 
909  | 0  |     UnicodeString workText(text);  | 
910  |  | 
  | 
911  | 0  |     int32_t sub1Pos = sub1 != NULL ? sub1->getPos() : fRuleText.length();  | 
912  | 0  |     int32_t sub2Pos = sub2 != NULL ? sub2->getPos() : fRuleText.length();  | 
913  |  |  | 
914  |  |     // check to see whether the text before the first substitution  | 
915  |  |     // matches the text at the beginning of the string being  | 
916  |  |     // parsed.  If it does, strip that off the front of workText;  | 
917  |  |     // otherwise, dump out with a mismatch  | 
918  | 0  |     UnicodeString prefix;  | 
919  | 0  |     prefix.setTo(fRuleText, 0, sub1Pos);  | 
920  |  | 
  | 
921  |  | #ifdef RBNF_DEBUG  | 
922  |  |     fprintf(stderr, "doParse %p ", this);  | 
923  |  |     { | 
924  |  |         UnicodeString rt;  | 
925  |  |         _appendRuleText(rt);  | 
926  |  |         dumpUS(stderr, rt);  | 
927  |  |     }  | 
928  |  |  | 
929  |  |     fprintf(stderr, " text: '");  | 
930  |  |     dumpUS(stderr, text);  | 
931  |  |     fprintf(stderr, "' prefix: '");  | 
932  |  |     dumpUS(stderr, prefix);  | 
933  |  | #endif  | 
934  | 0  |     stripPrefix(workText, prefix, pp);  | 
935  | 0  |     int32_t prefixLength = text.length() - workText.length();  | 
936  |  | 
  | 
937  |  | #ifdef RBNF_DEBUG  | 
938  |  |     fprintf(stderr, "' pl: %d ppi: %d s1p: %d\n", prefixLength, pp.getIndex(), sub1Pos);  | 
939  |  | #endif  | 
940  |  | 
  | 
941  | 0  |     if (pp.getIndex() == 0 && sub1Pos != 0) { | 
942  |  |         // commented out because ParsePosition doesn't have error index in 1.1.x  | 
943  |  |         // restored for ICU4C port  | 
944  | 0  |         parsePosition.setErrorIndex(pp.getErrorIndex());  | 
945  | 0  |         resVal.setLong(0);  | 
946  | 0  |         return TRUE;  | 
947  | 0  |     }  | 
948  | 0  |     if (baseValue == kInfinityRule) { | 
949  |  |         // If you match this, don't try to perform any calculations on it.  | 
950  | 0  |         parsePosition.setIndex(pp.getIndex());  | 
951  | 0  |         resVal.setDouble(uprv_getInfinity());  | 
952  | 0  |         return TRUE;  | 
953  | 0  |     }  | 
954  | 0  |     if (baseValue == kNaNRule) { | 
955  |  |         // If you match this, don't try to perform any calculations on it.  | 
956  | 0  |         parsePosition.setIndex(pp.getIndex());  | 
957  | 0  |         resVal.setDouble(uprv_getNaN());  | 
958  | 0  |         return TRUE;  | 
959  | 0  |     }  | 
960  |  |  | 
961  |  |     // this is the fun part.  The basic guts of the rule-matching  | 
962  |  |     // logic is matchToDelimiter(), which is called twice.  The first  | 
963  |  |     // time it searches the input string for the rule text BETWEEN  | 
964  |  |     // the substitutions and tries to match the intervening text  | 
965  |  |     // in the input string with the first substitution.  If that  | 
966  |  |     // succeeds, it then calls it again, this time to look for the  | 
967  |  |     // rule text after the second substitution and to match the  | 
968  |  |     // intervening input text against the second substitution.  | 
969  |  |     //  | 
970  |  |     // For example, say we have a rule that looks like this:  | 
971  |  |     //    first << middle >> last;  | 
972  |  |     // and input text that looks like this:  | 
973  |  |     //    first one middle two last  | 
974  |  |     // First we use stripPrefix() to match "first " in both places and  | 
975  |  |     // strip it off the front, leaving  | 
976  |  |     //    one middle two last  | 
977  |  |     // Then we use matchToDelimiter() to match " middle " and try to  | 
978  |  |     // match "one" against a substitution.  If it's successful, we now  | 
979  |  |     // have  | 
980  |  |     //    two last  | 
981  |  |     // We use matchToDelimiter() a second time to match " last" and  | 
982  |  |     // try to match "two" against a substitution.  If "two" matches  | 
983  |  |     // the substitution, we have a successful parse.  | 
984  |  |     //  | 
985  |  |     // Since it's possible in many cases to find multiple instances  | 
986  |  |     // of each of these pieces of rule text in the input string,  | 
987  |  |     // we need to try all the possible combinations of these  | 
988  |  |     // locations.  This prevents us from prematurely declaring a mismatch,  | 
989  |  |     // and makes sure we match as much input text as we can.  | 
990  | 0  |     int highWaterMark = 0;  | 
991  | 0  |     double result = 0;  | 
992  | 0  |     int start = 0;  | 
993  | 0  |     double tempBaseValue = (double)(baseValue <= 0 ? 0 : baseValue);  | 
994  |  | 
  | 
995  | 0  |     UnicodeString temp;  | 
996  | 0  |     do { | 
997  |  |         // our partial parse result starts out as this rule's base  | 
998  |  |         // value.  If it finds a successful match, matchToDelimiter()  | 
999  |  |         // will compose this in some way with what it gets back from  | 
1000  |  |         // the substitution, giving us a new partial parse result  | 
1001  | 0  |         pp.setIndex(0);  | 
1002  |  | 
  | 
1003  | 0  |         temp.setTo(fRuleText, sub1Pos, sub2Pos - sub1Pos);  | 
1004  | 0  |         double partialResult = matchToDelimiter(workText, start, tempBaseValue,  | 
1005  | 0  |             temp, pp, sub1,  | 
1006  | 0  |             nonNumericalExecutedRuleMask,  | 
1007  | 0  |             upperBound);  | 
1008  |  |  | 
1009  |  |         // if we got a successful match (or were trying to match a  | 
1010  |  |         // null substitution), pp is now pointing at the first unmatched  | 
1011  |  |         // character.  Take note of that, and try matchToDelimiter()  | 
1012  |  |         // on the input text again  | 
1013  | 0  |         if (pp.getIndex() != 0 || sub1 == NULL) { | 
1014  | 0  |             start = pp.getIndex();  | 
1015  |  | 
  | 
1016  | 0  |             UnicodeString workText2;  | 
1017  | 0  |             workText2.setTo(workText, pp.getIndex(), workText.length() - pp.getIndex());  | 
1018  | 0  |             ParsePosition pp2;  | 
1019  |  |  | 
1020  |  |             // the second matchToDelimiter() will compose our previous  | 
1021  |  |             // partial result with whatever it gets back from its  | 
1022  |  |             // substitution if there's a successful match, giving us  | 
1023  |  |             // a real result  | 
1024  | 0  |             temp.setTo(fRuleText, sub2Pos, fRuleText.length() - sub2Pos);  | 
1025  | 0  |             partialResult = matchToDelimiter(workText2, 0, partialResult,  | 
1026  | 0  |                 temp, pp2, sub2,  | 
1027  | 0  |                 nonNumericalExecutedRuleMask,  | 
1028  | 0  |                 upperBound);  | 
1029  |  |  | 
1030  |  |             // if we got a successful match on this second  | 
1031  |  |             // matchToDelimiter() call, update the high-water mark  | 
1032  |  |             // and result (if necessary)  | 
1033  | 0  |             if (pp2.getIndex() != 0 || sub2 == NULL) { | 
1034  | 0  |                 if (prefixLength + pp.getIndex() + pp2.getIndex() > highWaterMark) { | 
1035  | 0  |                     highWaterMark = prefixLength + pp.getIndex() + pp2.getIndex();  | 
1036  | 0  |                     result = partialResult;  | 
1037  | 0  |                 }  | 
1038  | 0  |             }  | 
1039  | 0  |             else { | 
1040  |  |                 // commented out because ParsePosition doesn't have error index in 1.1.x  | 
1041  |  |                 // restored for ICU4C port  | 
1042  | 0  |                 int32_t i_temp = pp2.getErrorIndex() + sub1Pos + pp.getIndex();  | 
1043  | 0  |                 if (i_temp> parsePosition.getErrorIndex()) { | 
1044  | 0  |                     parsePosition.setErrorIndex(i_temp);  | 
1045  | 0  |                 }  | 
1046  | 0  |             }  | 
1047  | 0  |         }  | 
1048  | 0  |         else { | 
1049  |  |             // commented out because ParsePosition doesn't have error index in 1.1.x  | 
1050  |  |             // restored for ICU4C port  | 
1051  | 0  |             int32_t i_temp = sub1Pos + pp.getErrorIndex();  | 
1052  | 0  |             if (i_temp > parsePosition.getErrorIndex()) { | 
1053  | 0  |                 parsePosition.setErrorIndex(i_temp);  | 
1054  | 0  |             }  | 
1055  | 0  |         }  | 
1056  |  |         // keep trying to match things until the outer matchToDelimiter()  | 
1057  |  |         // call fails to make a match (each time, it picks up where it  | 
1058  |  |         // left off the previous time)  | 
1059  | 0  |     } while (sub1Pos != sub2Pos  | 
1060  | 0  |         && pp.getIndex() > 0  | 
1061  | 0  |         && pp.getIndex() < workText.length()  | 
1062  | 0  |         && pp.getIndex() != start);  | 
1063  |  |  | 
1064  |  |     // update the caller's ParsePosition with our high-water mark  | 
1065  |  |     // (i.e., it now points at the first character this function  | 
1066  |  |     // didn't match-- the ParsePosition is therefore unchanged if  | 
1067  |  |     // we didn't match anything)  | 
1068  | 0  |     parsePosition.setIndex(highWaterMark);  | 
1069  |  |     // commented out because ParsePosition doesn't have error index in 1.1.x  | 
1070  |  |     // restored for ICU4C port  | 
1071  | 0  |     if (highWaterMark > 0) { | 
1072  | 0  |         parsePosition.setErrorIndex(0);  | 
1073  | 0  |     }  | 
1074  |  |  | 
1075  |  |     // this is a hack for one unusual condition: Normally, whether this  | 
1076  |  |     // rule belong to a fraction rule set or not is handled by its  | 
1077  |  |     // substitutions.  But if that rule HAS NO substitutions, then  | 
1078  |  |     // we have to account for it here.  By definition, if the matching  | 
1079  |  |     // rule in a fraction rule set has no substitutions, its numerator  | 
1080  |  |     // is 1, and so the result is the reciprocal of its base value.  | 
1081  | 0  |     if (isFractionRule && highWaterMark > 0 && sub1 == NULL) { | 
1082  | 0  |         result = 1 / result;  | 
1083  | 0  |     }  | 
1084  |  | 
  | 
1085  | 0  |     resVal.setDouble(result);  | 
1086  | 0  |     return TRUE; // ??? do we need to worry if it is a long or a double?  | 
1087  | 0  | }  | 
1088  |  |  | 
1089  |  | /**  | 
1090  |  | * This function is used by parse() to match the text being parsed  | 
1091  |  | * against a possible prefix string.  This function  | 
1092  |  | * matches characters from the beginning of the string being parsed  | 
1093  |  | * to characters from the prospective prefix.  If they match, pp is  | 
1094  |  | * updated to the first character not matched, and the result is  | 
1095  |  | * the unparsed part of the string.  If they don't match, the whole  | 
1096  |  | * string is returned, and pp is left unchanged.  | 
1097  |  | * @param text The string being parsed  | 
1098  |  | * @param prefix The text to match against  | 
1099  |  | * @param pp On entry, ignored and assumed to be 0.  On exit, points  | 
1100  |  | * to the first unmatched character (assuming the whole prefix matched),  | 
1101  |  | * or is unchanged (if the whole prefix didn't match).  | 
1102  |  | * @return If things match, this is the unparsed part of "text";  | 
1103  |  | * if they didn't match, this is "text".  | 
1104  |  | */  | 
1105  |  | void  | 
1106  |  | NFRule::stripPrefix(UnicodeString& text, const UnicodeString& prefix, ParsePosition& pp) const  | 
1107  | 0  | { | 
1108  |  |     // if the prefix text is empty, dump out without doing anything  | 
1109  | 0  |     if (prefix.length() != 0) { | 
1110  | 0  |       UErrorCode status = U_ZERO_ERROR;  | 
1111  |  |         // use prefixLength() to match the beginning of  | 
1112  |  |         // "text" against "prefix".  This function returns the  | 
1113  |  |         // number of characters from "text" that matched (or 0 if  | 
1114  |  |         // we didn't match the whole prefix)  | 
1115  | 0  |         int32_t pfl = prefixLength(text, prefix, status);  | 
1116  | 0  |         if (U_FAILURE(status)) { // Memory allocation error. | 
1117  | 0  |           return;  | 
1118  | 0  |         }  | 
1119  | 0  |         if (pfl != 0) { | 
1120  |  |             // if we got a successful match, update the parse position  | 
1121  |  |             // and strip the prefix off of "text"  | 
1122  | 0  |             pp.setIndex(pp.getIndex() + pfl);  | 
1123  | 0  |             text.remove(0, pfl);  | 
1124  | 0  |         }  | 
1125  | 0  |     }  | 
1126  | 0  | }  | 
1127  |  |  | 
1128  |  | /**  | 
1129  |  | * Used by parse() to match a substitution and any following text.  | 
1130  |  | * "text" is searched for instances of "delimiter".  For each instance  | 
1131  |  | * of delimiter, the intervening text is tested to see whether it  | 
1132  |  | * matches the substitution.  The longest match wins.  | 
1133  |  | * @param text The string being parsed  | 
1134  |  | * @param startPos The position in "text" where we should start looking  | 
1135  |  | * for "delimiter".  | 
1136  |  | * @param baseValue A partial parse result (often the rule's base value),  | 
1137  |  | * which is combined with the result from matching the substitution  | 
1138  |  | * @param delimiter The string to search "text" for.  | 
1139  |  | * @param pp Ignored and presumed to be 0 on entry.  If there's a match,  | 
1140  |  | * on exit this will point to the first unmatched character.  | 
1141  |  | * @param sub If we find "delimiter" in "text", this substitution is used  | 
1142  |  | * to match the text between the beginning of the string and the  | 
1143  |  | * position of "delimiter."  (If "delimiter" is the empty string, then  | 
1144  |  | * this function just matches against this substitution and updates  | 
1145  |  | * everything accordingly.)  | 
1146  |  | * @param upperBound When matching the substitution, it will only  | 
1147  |  | * consider rules with base values lower than this value.  | 
1148  |  | * @return If there's a match, this is the result of composing  | 
1149  |  | * baseValue with the result of matching the substitution.  Otherwise,  | 
1150  |  | * this is new Long(0).  It's never null.  If the result is an integer,  | 
1151  |  | * this will be an instance of Long; otherwise, it's an instance of  | 
1152  |  | * Double.  | 
1153  |  | *  | 
1154  |  | * !!! note {dlf} in point of fact, in the java code the caller always converts | 
1155  |  | * the result to a double, so we might as well return one.  | 
1156  |  | */  | 
1157  |  | double  | 
1158  |  | NFRule::matchToDelimiter(const UnicodeString& text,  | 
1159  |  |                          int32_t startPos,  | 
1160  |  |                          double _baseValue,  | 
1161  |  |                          const UnicodeString& delimiter,  | 
1162  |  |                          ParsePosition& pp,  | 
1163  |  |                          const NFSubstitution* sub,  | 
1164  |  |                          uint32_t nonNumericalExecutedRuleMask,  | 
1165  |  |                          double upperBound) const  | 
1166  | 0  | { | 
1167  | 0  |   UErrorCode status = U_ZERO_ERROR;  | 
1168  |  |     // if "delimiter" contains real (i.e., non-ignorable) text, search  | 
1169  |  |     // it for "delimiter" beginning at "start".  If that succeeds, then  | 
1170  |  |     // use "sub"'s doParse() method to match the text before the  | 
1171  |  |     // instance of "delimiter" we just found.  | 
1172  | 0  |     if (!allIgnorable(delimiter, status)) { | 
1173  | 0  |       if (U_FAILURE(status)) { //Memory allocation error. | 
1174  | 0  |         return 0;  | 
1175  | 0  |       }  | 
1176  | 0  |         ParsePosition tempPP;  | 
1177  | 0  |         Formattable result;  | 
1178  |  |  | 
1179  |  |         // use findText() to search for "delimiter".  It returns a two-  | 
1180  |  |         // element array: element 0 is the position of the match, and  | 
1181  |  |         // element 1 is the number of characters that matched  | 
1182  |  |         // "delimiter".  | 
1183  | 0  |         int32_t dLen;  | 
1184  | 0  |         int32_t dPos = findText(text, delimiter, startPos, &dLen);  | 
1185  |  |  | 
1186  |  |         // if findText() succeeded, isolate the text preceding the  | 
1187  |  |         // match, and use "sub" to match that text  | 
1188  | 0  |         while (dPos >= 0) { | 
1189  | 0  |             UnicodeString subText;  | 
1190  | 0  |             subText.setTo(text, 0, dPos);  | 
1191  | 0  |             if (subText.length() > 0) { | 
1192  | 0  |                 UBool success = sub->doParse(subText, tempPP, _baseValue, upperBound,  | 
1193  |  | #if UCONFIG_NO_COLLATION  | 
1194  |  |                     FALSE,  | 
1195  |  | #else  | 
1196  | 0  |                     formatter->isLenient(),  | 
1197  | 0  | #endif  | 
1198  | 0  |                     nonNumericalExecutedRuleMask,  | 
1199  | 0  |                     result);  | 
1200  |  |  | 
1201  |  |                 // if the substitution could match all the text up to  | 
1202  |  |                 // where we found "delimiter", then this function has  | 
1203  |  |                 // a successful match.  Bump the caller's parse position  | 
1204  |  |                 // to point to the first character after the text  | 
1205  |  |                 // that matches "delimiter", and return the result  | 
1206  |  |                 // we got from parsing the substitution.  | 
1207  | 0  |                 if (success && tempPP.getIndex() == dPos) { | 
1208  | 0  |                     pp.setIndex(dPos + dLen);  | 
1209  | 0  |                     return result.getDouble();  | 
1210  | 0  |                 }  | 
1211  | 0  |                 else { | 
1212  |  |                     // commented out because ParsePosition doesn't have error index in 1.1.x  | 
1213  |  |                     // restored for ICU4C port  | 
1214  | 0  |                     if (tempPP.getErrorIndex() > 0) { | 
1215  | 0  |                         pp.setErrorIndex(tempPP.getErrorIndex());  | 
1216  | 0  |                     } else { | 
1217  | 0  |                         pp.setErrorIndex(tempPP.getIndex());  | 
1218  | 0  |                     }  | 
1219  | 0  |                 }  | 
1220  | 0  |             }  | 
1221  |  |  | 
1222  |  |             // if we didn't match the substitution, search for another  | 
1223  |  |             // copy of "delimiter" in "text" and repeat the loop if  | 
1224  |  |             // we find it  | 
1225  | 0  |             tempPP.setIndex(0);  | 
1226  | 0  |             dPos = findText(text, delimiter, dPos + dLen, &dLen);  | 
1227  | 0  |         }  | 
1228  |  |         // if we make it here, this was an unsuccessful match, and we  | 
1229  |  |         // leave pp unchanged and return 0  | 
1230  | 0  |         pp.setIndex(0);  | 
1231  | 0  |         return 0;  | 
1232  |  |  | 
1233  |  |         // if "delimiter" is empty, or consists only of ignorable characters  | 
1234  |  |         // (i.e., is semantically empty), thwe we obviously can't search  | 
1235  |  |         // for "delimiter".  Instead, just use "sub" to parse as much of  | 
1236  |  |         // "text" as possible.  | 
1237  | 0  |     }  | 
1238  | 0  |     else if (sub == NULL) { | 
1239  | 0  |         return _baseValue;  | 
1240  | 0  |     }  | 
1241  | 0  |     else { | 
1242  | 0  |         ParsePosition tempPP;  | 
1243  | 0  |         Formattable result;  | 
1244  |  |  | 
1245  |  |         // try to match the whole string against the substitution  | 
1246  | 0  |         UBool success = sub->doParse(text, tempPP, _baseValue, upperBound,  | 
1247  |  | #if UCONFIG_NO_COLLATION  | 
1248  |  |             FALSE,  | 
1249  |  | #else  | 
1250  | 0  |             formatter->isLenient(),  | 
1251  | 0  | #endif  | 
1252  | 0  |             nonNumericalExecutedRuleMask,  | 
1253  | 0  |             result);  | 
1254  | 0  |         if (success && (tempPP.getIndex() != 0)) { | 
1255  |  |             // if there's a successful match (or it's a null  | 
1256  |  |             // substitution), update pp to point to the first  | 
1257  |  |             // character we didn't match, and pass the result from  | 
1258  |  |             // sub.doParse() on through to the caller  | 
1259  | 0  |             pp.setIndex(tempPP.getIndex());  | 
1260  | 0  |             return result.getDouble();  | 
1261  | 0  |         }  | 
1262  | 0  |         else { | 
1263  |  |             // commented out because ParsePosition doesn't have error index in 1.1.x  | 
1264  |  |             // restored for ICU4C port  | 
1265  | 0  |             pp.setErrorIndex(tempPP.getErrorIndex());  | 
1266  | 0  |         }  | 
1267  |  |  | 
1268  |  |         // and if we get to here, then nothing matched, so we return  | 
1269  |  |         // 0 and leave pp alone  | 
1270  | 0  |         return 0;  | 
1271  | 0  |     }  | 
1272  | 0  | }  | 
1273  |  |  | 
1274  |  | /**  | 
1275  |  | * Used by stripPrefix() to match characters.  If lenient parse mode  | 
1276  |  | * is off, this just calls startsWith().  If lenient parse mode is on,  | 
1277  |  | * this function uses CollationElementIterators to match characters in  | 
1278  |  | * the strings (only primary-order differences are significant in  | 
1279  |  | * determining whether there's a match).  | 
1280  |  | * @param str The string being tested  | 
1281  |  | * @param prefix The text we're hoping to see at the beginning  | 
1282  |  | * of "str"  | 
1283  |  | * @return If "prefix" is found at the beginning of "str", this  | 
1284  |  | * is the number of characters in "str" that were matched (this  | 
1285  |  | * isn't necessarily the same as the length of "prefix" when matching  | 
1286  |  | * text with a collator).  If there's no match, this is 0.  | 
1287  |  | */  | 
1288  |  | int32_t  | 
1289  |  | NFRule::prefixLength(const UnicodeString& str, const UnicodeString& prefix, UErrorCode& status) const  | 
1290  | 0  | { | 
1291  |  |     // if we're looking for an empty prefix, it obviously matches  | 
1292  |  |     // zero characters.  Just go ahead and return 0.  | 
1293  | 0  |     if (prefix.length() == 0) { | 
1294  | 0  |         return 0;  | 
1295  | 0  |     }  | 
1296  |  |  | 
1297  | 0  | #if !UCONFIG_NO_COLLATION  | 
1298  |  |     // go through all this grief if we're in lenient-parse mode  | 
1299  | 0  |     if (formatter->isLenient()) { | 
1300  |  |         // Check if non-lenient rule finds the text before call lenient parsing  | 
1301  | 0  |         if (str.startsWith(prefix)) { | 
1302  | 0  |             return prefix.length();  | 
1303  | 0  |         }  | 
1304  |  |         // get the formatter's collator and use it to create two  | 
1305  |  |         // collation element iterators, one over the target string  | 
1306  |  |         // and another over the prefix (right now, we'll throw an  | 
1307  |  |         // exception if the collator we get back from the formatter  | 
1308  |  |         // isn't a RuleBasedCollator, because RuleBasedCollator defines  | 
1309  |  |         // the CollationElementIterator protocol.  Hopefully, this  | 
1310  |  |         // will change someday.)  | 
1311  | 0  |         const RuleBasedCollator* collator = formatter->getCollator();  | 
1312  | 0  |         if (collator == NULL) { | 
1313  | 0  |             status = U_MEMORY_ALLOCATION_ERROR;  | 
1314  | 0  |             return 0;  | 
1315  | 0  |         }  | 
1316  | 0  |         LocalPointer<CollationElementIterator> strIter(collator->createCollationElementIterator(str));  | 
1317  | 0  |         LocalPointer<CollationElementIterator> prefixIter(collator->createCollationElementIterator(prefix));  | 
1318  |  |         // Check for memory allocation error.  | 
1319  | 0  |         if (strIter.isNull() || prefixIter.isNull()) { | 
1320  | 0  |             status = U_MEMORY_ALLOCATION_ERROR;  | 
1321  | 0  |             return 0;  | 
1322  | 0  |         }  | 
1323  |  |  | 
1324  | 0  |         UErrorCode err = U_ZERO_ERROR;  | 
1325  |  |  | 
1326  |  |         // The original code was problematic.  Consider this match:  | 
1327  |  |         // prefix = "fifty-"  | 
1328  |  |         // string = " fifty-7"  | 
1329  |  |         // The intent is to match string up to the '7', by matching 'fifty-' at position 1  | 
1330  |  |         // in the string.  Unfortunately, we were getting a match, and then computing where  | 
1331  |  |         // the match terminated by rematching the string.  The rematch code was using as an  | 
1332  |  |         // initial guess the substring of string between 0 and prefix.length.  Because of  | 
1333  |  |         // the leading space and trailing hyphen (both ignorable) this was succeeding, leaving  | 
1334  |  |         // the position before the hyphen in the string.  Recursing down, we then parsed the  | 
1335  |  |         // remaining string '-7' as numeric.  The resulting number turned out as 43 (50 - 7).  | 
1336  |  |         // This was not pretty, especially since the string "fifty-7" parsed just fine.  | 
1337  |  |         //  | 
1338  |  |         // We have newer APIs now, so we can use calls on the iterator to determine what we  | 
1339  |  |         // matched up to.  If we terminate because we hit the last element in the string,  | 
1340  |  |         // our match terminates at this length.  If we terminate because we hit the last element  | 
1341  |  |         // in the target, our match terminates at one before the element iterator position.  | 
1342  |  |  | 
1343  |  |         // match collation elements between the strings  | 
1344  | 0  |         int32_t oStr = strIter->next(err);  | 
1345  | 0  |         int32_t oPrefix = prefixIter->next(err);  | 
1346  |  | 
  | 
1347  | 0  |         while (oPrefix != CollationElementIterator::NULLORDER) { | 
1348  |  |             // skip over ignorable characters in the target string  | 
1349  | 0  |             while (CollationElementIterator::primaryOrder(oStr) == 0  | 
1350  | 0  |                 && oStr != CollationElementIterator::NULLORDER) { | 
1351  | 0  |                 oStr = strIter->next(err);  | 
1352  | 0  |             }  | 
1353  |  |  | 
1354  |  |             // skip over ignorable characters in the prefix  | 
1355  | 0  |             while (CollationElementIterator::primaryOrder(oPrefix) == 0  | 
1356  | 0  |                 && oPrefix != CollationElementIterator::NULLORDER) { | 
1357  | 0  |                 oPrefix = prefixIter->next(err);  | 
1358  | 0  |             }  | 
1359  |  |  | 
1360  |  |             // dlf: move this above following test, if we consume the  | 
1361  |  |             // entire target, aren't we ok even if the source was also  | 
1362  |  |             // entirely consumed?  | 
1363  |  |  | 
1364  |  |             // if skipping over ignorables brought to the end of  | 
1365  |  |             // the prefix, we DID match: drop out of the loop  | 
1366  | 0  |             if (oPrefix == CollationElementIterator::NULLORDER) { | 
1367  | 0  |                 break;  | 
1368  | 0  |             }  | 
1369  |  |  | 
1370  |  |             // if skipping over ignorables brought us to the end  | 
1371  |  |             // of the target string, we didn't match and return 0  | 
1372  | 0  |             if (oStr == CollationElementIterator::NULLORDER) { | 
1373  | 0  |                 return 0;  | 
1374  | 0  |             }  | 
1375  |  |  | 
1376  |  |             // match collation elements from the two strings  | 
1377  |  |             // (considering only primary differences).  If we  | 
1378  |  |             // get a mismatch, dump out and return 0  | 
1379  | 0  |             if (CollationElementIterator::primaryOrder(oStr)  | 
1380  | 0  |                 != CollationElementIterator::primaryOrder(oPrefix)) { | 
1381  | 0  |                 return 0;  | 
1382  |  |  | 
1383  |  |                 // otherwise, advance to the next character in each string  | 
1384  |  |                 // and loop (we drop out of the loop when we exhaust  | 
1385  |  |                 // collation elements in the prefix)  | 
1386  | 0  |             } else { | 
1387  | 0  |                 oStr = strIter->next(err);  | 
1388  | 0  |                 oPrefix = prefixIter->next(err);  | 
1389  | 0  |             }  | 
1390  | 0  |         }  | 
1391  |  |  | 
1392  | 0  |         int32_t result = strIter->getOffset();  | 
1393  | 0  |         if (oStr != CollationElementIterator::NULLORDER) { | 
1394  | 0  |             --result; // back over character that we don't want to consume;  | 
1395  | 0  |         }  | 
1396  |  | 
  | 
1397  |  | #ifdef RBNF_DEBUG  | 
1398  |  |         fprintf(stderr, "prefix length: %d\n", result);  | 
1399  |  | #endif  | 
1400  | 0  |         return result;  | 
1401  |  | #if 0  | 
1402  |  |         //----------------------------------------------------------------  | 
1403  |  |         // JDK 1.2-specific API call  | 
1404  |  |         // return strIter.getOffset();  | 
1405  |  |         //----------------------------------------------------------------  | 
1406  |  |         // JDK 1.1 HACK (take out for 1.2-specific code)  | 
1407  |  |  | 
1408  |  |         // if we make it to here, we have a successful match.  Now we  | 
1409  |  |         // have to find out HOW MANY characters from the target string  | 
1410  |  |         // matched the prefix (there isn't necessarily a one-to-one  | 
1411  |  |         // mapping between collation elements and characters).  | 
1412  |  |         // In JDK 1.2, there's a simple getOffset() call we can use.  | 
1413  |  |         // In JDK 1.1, on the other hand, we have to go through some  | 
1414  |  |         // ugly contortions.  First, use the collator to compare the  | 
1415  |  |         // same number of characters from the prefix and target string.  | 
1416  |  |         // If they're equal, we're done.  | 
1417  |  |         collator->setStrength(Collator::PRIMARY);  | 
1418  |  |         if (str.length() >= prefix.length()) { | 
1419  |  |             UnicodeString temp;  | 
1420  |  |             temp.setTo(str, 0, prefix.length());  | 
1421  |  |             if (collator->equals(temp, prefix)) { | 
1422  |  | #ifdef RBNF_DEBUG  | 
1423  |  |                 fprintf(stderr, "returning: %d\n", prefix.length());  | 
1424  |  | #endif  | 
1425  |  |                 return prefix.length();  | 
1426  |  |             }  | 
1427  |  |         }  | 
1428  |  |  | 
1429  |  |         // if they're not equal, then we have to compare successively  | 
1430  |  |         // larger and larger substrings of the target string until we  | 
1431  |  |         // get to one that matches the prefix.  At that point, we know  | 
1432  |  |         // how many characters matched the prefix, and we can return.  | 
1433  |  |         int32_t p = 1;  | 
1434  |  |         while (p <= str.length()) { | 
1435  |  |             UnicodeString temp;  | 
1436  |  |             temp.setTo(str, 0, p);  | 
1437  |  |             if (collator->equals(temp, prefix)) { | 
1438  |  |                 return p;  | 
1439  |  |             } else { | 
1440  |  |                 ++p;  | 
1441  |  |             }  | 
1442  |  |         }  | 
1443  |  |  | 
1444  |  |         // SHOULD NEVER GET HERE!!!  | 
1445  |  |         return 0;  | 
1446  |  |         //----------------------------------------------------------------  | 
1447  |  | #endif  | 
1448  |  |  | 
1449  |  |         // If lenient parsing is turned off, forget all that crap above.  | 
1450  |  |         // Just use String.startsWith() and be done with it.  | 
1451  | 0  |   } else  | 
1452  | 0  | #endif  | 
1453  | 0  |   { | 
1454  | 0  |       if (str.startsWith(prefix)) { | 
1455  | 0  |           return prefix.length();  | 
1456  | 0  |       } else { | 
1457  | 0  |           return 0;  | 
1458  | 0  |       }  | 
1459  | 0  |   }  | 
1460  | 0  | }  | 
1461  |  |  | 
1462  |  | /**  | 
1463  |  | * Searches a string for another string.  If lenient parsing is off,  | 
1464  |  | * this just calls indexOf().  If lenient parsing is on, this function  | 
1465  |  | * uses CollationElementIterator to match characters, and only  | 
1466  |  | * primary-order differences are significant in determining whether  | 
1467  |  | * there's a match.  | 
1468  |  | * @param str The string to search  | 
1469  |  | * @param key The string to search "str" for  | 
1470  |  | * @param startingAt The index into "str" where the search is to  | 
1471  |  | * begin  | 
1472  |  | * @return A two-element array of ints.  Element 0 is the position  | 
1473  |  | * of the match, or -1 if there was no match.  Element 1 is the  | 
1474  |  | * number of characters in "str" that matched (which isn't necessarily  | 
1475  |  | * the same as the length of "key")  | 
1476  |  | */  | 
1477  |  | int32_t  | 
1478  |  | NFRule::findText(const UnicodeString& str,  | 
1479  |  |                  const UnicodeString& key,  | 
1480  |  |                  int32_t startingAt,  | 
1481  |  |                  int32_t* length) const  | 
1482  | 0  | { | 
1483  | 0  |     if (rulePatternFormat) { | 
1484  | 0  |         Formattable result;  | 
1485  | 0  |         FieldPosition position(UNUM_INTEGER_FIELD);  | 
1486  | 0  |         position.setBeginIndex(startingAt);  | 
1487  | 0  |         rulePatternFormat->parseType(str, this, result, position);  | 
1488  | 0  |         int start = position.getBeginIndex();  | 
1489  | 0  |         if (start >= 0) { | 
1490  | 0  |             int32_t pluralRuleStart = fRuleText.indexOf(gDollarOpenParenthesis, -1, 0);  | 
1491  | 0  |             int32_t pluralRuleSuffix = fRuleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart) + 2;  | 
1492  | 0  |             int32_t matchLen = position.getEndIndex() - start;  | 
1493  | 0  |             UnicodeString prefix(fRuleText.tempSubString(0, pluralRuleStart));  | 
1494  | 0  |             UnicodeString suffix(fRuleText.tempSubString(pluralRuleSuffix));  | 
1495  | 0  |             if (str.compare(start - prefix.length(), prefix.length(), prefix, 0, prefix.length()) == 0  | 
1496  | 0  |                     && str.compare(start + matchLen, suffix.length(), suffix, 0, suffix.length()) == 0)  | 
1497  | 0  |             { | 
1498  | 0  |                 *length = matchLen + prefix.length() + suffix.length();  | 
1499  | 0  |                 return start - prefix.length();  | 
1500  | 0  |             }  | 
1501  | 0  |         }  | 
1502  | 0  |         *length = 0;  | 
1503  | 0  |         return -1;  | 
1504  | 0  |     }  | 
1505  | 0  |     if (!formatter->isLenient()) { | 
1506  |  |         // if lenient parsing is turned off, this is easy: just call  | 
1507  |  |         // String.indexOf() and we're done  | 
1508  | 0  |         *length = key.length();  | 
1509  | 0  |         return str.indexOf(key, startingAt);  | 
1510  | 0  |     }  | 
1511  | 0  |     else { | 
1512  |  |         // Check if non-lenient rule finds the text before call lenient parsing  | 
1513  | 0  |         *length = key.length();  | 
1514  | 0  |         int32_t pos = str.indexOf(key, startingAt);  | 
1515  | 0  |         if(pos >= 0) { | 
1516  | 0  |             return pos;  | 
1517  | 0  |         } else { | 
1518  |  |             // but if lenient parsing is turned ON, we've got some work ahead of us  | 
1519  | 0  |             return findTextLenient(str, key, startingAt, length);  | 
1520  | 0  |         }  | 
1521  | 0  |     }  | 
1522  | 0  | }  | 
1523  |  |  | 
1524  |  | int32_t  | 
1525  |  | NFRule::findTextLenient(const UnicodeString& str,  | 
1526  |  |                  const UnicodeString& key,  | 
1527  |  |                  int32_t startingAt,  | 
1528  |  |                  int32_t* length) const  | 
1529  | 0  | { | 
1530  |  |     //----------------------------------------------------------------  | 
1531  |  |     // JDK 1.1 HACK (take out of 1.2-specific code)  | 
1532  |  |  | 
1533  |  |     // in JDK 1.2, CollationElementIterator provides us with an  | 
1534  |  |     // API to map between character offsets and collation elements  | 
1535  |  |     // and we can do this by marching through the string comparing  | 
1536  |  |     // collation elements.  We can't do that in JDK 1.1.  Instead,  | 
1537  |  |     // we have to go through this horrible slow mess:  | 
1538  | 0  |     int32_t p = startingAt;  | 
1539  | 0  |     int32_t keyLen = 0;  | 
1540  |  |  | 
1541  |  |     // basically just isolate smaller and smaller substrings of  | 
1542  |  |     // the target string (each running to the end of the string,  | 
1543  |  |     // and with the first one running from startingAt to the end)  | 
1544  |  |     // and then use prefixLength() to see if the search key is at  | 
1545  |  |     // the beginning of each substring.  This is excruciatingly  | 
1546  |  |     // slow, but it will locate the key and tell use how long the  | 
1547  |  |     // matching text was.  | 
1548  | 0  |     UnicodeString temp;  | 
1549  | 0  |     UErrorCode status = U_ZERO_ERROR;  | 
1550  | 0  |     while (p < str.length() && keyLen == 0) { | 
1551  | 0  |         temp.setTo(str, p, str.length() - p);  | 
1552  | 0  |         keyLen = prefixLength(temp, key, status);  | 
1553  | 0  |         if (U_FAILURE(status)) { | 
1554  | 0  |             break;  | 
1555  | 0  |         }  | 
1556  | 0  |         if (keyLen != 0) { | 
1557  | 0  |             *length = keyLen;  | 
1558  | 0  |             return p;  | 
1559  | 0  |         }  | 
1560  | 0  |         ++p;  | 
1561  | 0  |     }  | 
1562  |  |     // if we make it to here, we didn't find it.  Return -1 for the  | 
1563  |  |     // location.  The length should be ignored, but set it to 0,  | 
1564  |  |     // which should be "safe"  | 
1565  | 0  |     *length = 0;  | 
1566  | 0  |     return -1;  | 
1567  | 0  | }  | 
1568  |  |  | 
1569  |  | /**  | 
1570  |  | * Checks to see whether a string consists entirely of ignorable  | 
1571  |  | * characters.  | 
1572  |  | * @param str The string to test.  | 
1573  |  | * @return true if the string is empty of consists entirely of  | 
1574  |  | * characters that the number formatter's collator says are  | 
1575  |  | * ignorable at the primary-order level.  false otherwise.  | 
1576  |  | */  | 
1577  |  | UBool  | 
1578  |  | NFRule::allIgnorable(const UnicodeString& str, UErrorCode& status) const  | 
1579  | 0  | { | 
1580  |  |     // if the string is empty, we can just return true  | 
1581  | 0  |     if (str.length() == 0) { | 
1582  | 0  |         return TRUE;  | 
1583  | 0  |     }  | 
1584  |  |  | 
1585  | 0  | #if !UCONFIG_NO_COLLATION  | 
1586  |  |     // if lenient parsing is turned on, walk through the string with  | 
1587  |  |     // a collation element iterator and make sure each collation  | 
1588  |  |     // element is 0 (ignorable) at the primary level  | 
1589  | 0  |     if (formatter->isLenient()) { | 
1590  | 0  |         const RuleBasedCollator* collator = formatter->getCollator();  | 
1591  | 0  |         if (collator == NULL) { | 
1592  | 0  |             status = U_MEMORY_ALLOCATION_ERROR;  | 
1593  | 0  |             return FALSE;  | 
1594  | 0  |         }  | 
1595  | 0  |         LocalPointer<CollationElementIterator> iter(collator->createCollationElementIterator(str));  | 
1596  |  |  | 
1597  |  |         // Memory allocation error check.  | 
1598  | 0  |         if (iter.isNull()) { | 
1599  | 0  |             status = U_MEMORY_ALLOCATION_ERROR;  | 
1600  | 0  |             return FALSE;  | 
1601  | 0  |         }  | 
1602  |  |  | 
1603  | 0  |         UErrorCode err = U_ZERO_ERROR;  | 
1604  | 0  |         int32_t o = iter->next(err);  | 
1605  | 0  |         while (o != CollationElementIterator::NULLORDER  | 
1606  | 0  |             && CollationElementIterator::primaryOrder(o) == 0) { | 
1607  | 0  |             o = iter->next(err);  | 
1608  | 0  |         }  | 
1609  |  | 
  | 
1610  | 0  |         return o == CollationElementIterator::NULLORDER;  | 
1611  | 0  |     }  | 
1612  | 0  | #endif  | 
1613  |  |  | 
1614  |  |     // if lenient parsing is turned off, there is no such thing as  | 
1615  |  |     // an ignorable character: return true only if the string is empty  | 
1616  | 0  |     return FALSE;  | 
1617  | 0  | }  | 
1618  |  |  | 
1619  |  | void  | 
1620  | 0  | NFRule::setDecimalFormatSymbols(const DecimalFormatSymbols& newSymbols, UErrorCode& status) { | 
1621  | 0  |     if (sub1 != NULL) { | 
1622  | 0  |         sub1->setDecimalFormatSymbols(newSymbols, status);  | 
1623  | 0  |     }  | 
1624  | 0  |     if (sub2 != NULL) { | 
1625  | 0  |         sub2->setDecimalFormatSymbols(newSymbols, status);  | 
1626  | 0  |     }  | 
1627  | 0  | }  | 
1628  |  |  | 
1629  |  | U_NAMESPACE_END  | 
1630  |  |  | 
1631  |  | /* U_HAVE_RBNF */  | 
1632  |  | #endif  |