/src/icu/icu4c/source/i18n/nfrs.cpp
Line | Count | Source |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ****************************************************************************** |
5 | | * Copyright (C) 1997-2015, International Business Machines |
6 | | * Corporation and others. All Rights Reserved. |
7 | | ****************************************************************************** |
8 | | * file name: nfrs.cpp |
9 | | * encoding: UTF-8 |
10 | | * tab size: 8 (not used) |
11 | | * indentation:4 |
12 | | * |
13 | | * Modification history |
14 | | * Date Name Comments |
15 | | * 10/11/2001 Doug Ported from ICU4J |
16 | | */ |
17 | | |
18 | | #include "nfrs.h" |
19 | | |
20 | | #if U_HAVE_RBNF |
21 | | |
22 | | #include "unicode/uchar.h" |
23 | | #include "nfrule.h" |
24 | | #include "nfrlist.h" |
25 | | #include "patternprops.h" |
26 | | #include "putilimp.h" |
27 | | |
28 | | #ifdef RBNF_DEBUG |
29 | | #include "cmemory.h" |
30 | | #endif |
31 | | |
32 | | enum { |
33 | | /** -x */ |
34 | | NEGATIVE_RULE_INDEX = 0, |
35 | | /** x.x */ |
36 | | IMPROPER_FRACTION_RULE_INDEX = 1, |
37 | | /** 0.x */ |
38 | | PROPER_FRACTION_RULE_INDEX = 2, |
39 | | /** x.0 */ |
40 | | DEFAULT_RULE_INDEX = 3, |
41 | | /** Inf */ |
42 | | INFINITY_RULE_INDEX = 4, |
43 | | /** NaN */ |
44 | | NAN_RULE_INDEX = 5, |
45 | | NON_NUMERICAL_RULE_LENGTH = 6 |
46 | | }; |
47 | | |
48 | | U_NAMESPACE_BEGIN |
49 | | |
50 | | #if 0 |
51 | | // euclid's algorithm works with doubles |
52 | | // note, doubles only get us up to one quadrillion or so, which |
53 | | // isn't as much range as we get with longs. We probably still |
54 | | // want either 64-bit math, or BigInteger. |
55 | | |
56 | | static int64_t |
57 | | util_lcm(int64_t x, int64_t y) |
58 | | { |
59 | | x.abs(); |
60 | | y.abs(); |
61 | | |
62 | | if (x == 0 || y == 0) { |
63 | | return 0; |
64 | | } else { |
65 | | do { |
66 | | if (x < y) { |
67 | | int64_t t = x; x = y; y = t; |
68 | | } |
69 | | x -= y * (x/y); |
70 | | } while (x != 0); |
71 | | |
72 | | return y; |
73 | | } |
74 | | } |
75 | | |
76 | | #else |
77 | | /** |
78 | | * Calculates the least common multiple of x and y. |
79 | | */ |
80 | | static int64_t |
81 | | util_lcm(int64_t x, int64_t y) |
82 | 0 | { |
83 | | // binary gcd algorithm from Knuth, "The Art of Computer Programming," |
84 | | // vol. 2, 1st ed., pp. 298-299 |
85 | 0 | int64_t x1 = x; |
86 | 0 | int64_t y1 = y; |
87 | |
|
88 | 0 | int p2 = 0; |
89 | 0 | while ((x1 & 1) == 0 && (y1 & 1) == 0) { |
90 | 0 | ++p2; |
91 | 0 | x1 >>= 1; |
92 | 0 | y1 >>= 1; |
93 | 0 | } |
94 | |
|
95 | 0 | int64_t t; |
96 | 0 | if ((x1 & 1) == 1) { |
97 | 0 | t = -y1; |
98 | 0 | } else { |
99 | 0 | t = x1; |
100 | 0 | } |
101 | |
|
102 | 0 | while (t != 0) { |
103 | 0 | while ((t & 1) == 0) { |
104 | 0 | t = t >> 1; |
105 | 0 | } |
106 | 0 | if (t > 0) { |
107 | 0 | x1 = t; |
108 | 0 | } else { |
109 | 0 | y1 = -t; |
110 | 0 | } |
111 | 0 | t = x1 - y1; |
112 | 0 | } |
113 | |
|
114 | 0 | int64_t gcd = x1 << p2; |
115 | | |
116 | | // x * y == gcd(x, y) * lcm(x, y) |
117 | 0 | return x / gcd * y; |
118 | 0 | } |
119 | | #endif |
120 | | |
121 | | static const char16_t gPercent = 0x0025; |
122 | | static const char16_t gColon = 0x003a; |
123 | | static const char16_t gSemicolon = 0x003b; |
124 | | static const char16_t gLineFeed = 0x000a; |
125 | | |
126 | | static const char16_t gPercentPercent[] = |
127 | | { |
128 | | 0x25, 0x25, 0 |
129 | | }; /* "%%" */ |
130 | | |
131 | | static const char16_t gNoparse[] = |
132 | | { |
133 | | 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0 |
134 | | }; /* "@noparse" */ |
135 | | |
136 | | NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status) |
137 | 86.9k | : rules(0) |
138 | 86.9k | , owner(_owner) |
139 | 86.9k | , fractionRules() |
140 | 86.9k | { |
141 | 608k | for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) { |
142 | 521k | nonNumericalRules[i] = nullptr; |
143 | 521k | } |
144 | | |
145 | 86.9k | if (U_FAILURE(status)) { |
146 | 0 | return; |
147 | 0 | } |
148 | | |
149 | 86.9k | UnicodeString& description = descriptions[index]; // !!! make sure index is valid |
150 | | |
151 | 86.9k | if (description.isEmpty()) { |
152 | | // throw new IllegalArgumentException("Empty rule set description"); |
153 | 40 | status = U_PARSE_ERROR; |
154 | 40 | return; |
155 | 40 | } |
156 | | |
157 | | // if the description begins with a rule set name (the rule set |
158 | | // name can be omitted in formatter descriptions that consist |
159 | | // of only one rule set), copy it out into our "name" member |
160 | | // and delete it from the description |
161 | 86.8k | if (description.charAt(0) == gPercent) { |
162 | 75.9k | int32_t pos = description.indexOf(gColon); |
163 | | // if there are no name or the name is "%". |
164 | 75.9k | if (pos < 2) { |
165 | | // throw new IllegalArgumentException("Rule set name doesn't end in colon"); |
166 | 97 | status = U_PARSE_ERROR; |
167 | 97 | return; |
168 | 75.9k | } else { |
169 | 75.9k | name.setTo(description, 0, pos); |
170 | 78.8k | while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) { |
171 | 2.93k | } |
172 | 75.9k | description.remove(0, pos); |
173 | 75.9k | } |
174 | 75.9k | } else { |
175 | 10.8k | name.setTo(UNICODE_STRING_SIMPLE("%default")); |
176 | 10.8k | } |
177 | | |
178 | 86.7k | if (description.isEmpty()) { |
179 | | // throw new IllegalArgumentException("Empty rule set description"); |
180 | 6 | status = U_PARSE_ERROR; |
181 | 6 | return; |
182 | 6 | } |
183 | | |
184 | 86.7k | fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0; |
185 | | |
186 | 86.7k | if (name.endsWith(gNoparse, 8)) { |
187 | 0 | fIsParseable = false; |
188 | 0 | name.truncate(name.length() - 8); // remove the @noparse from the name |
189 | 0 | } |
190 | | |
191 | | // all of the other members of NFRuleSet are initialized |
192 | | // by parseRules() |
193 | 86.7k | } |
194 | | |
195 | | void |
196 | | NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status) |
197 | 84.9k | { |
198 | | // start by creating a Vector whose elements are Strings containing |
199 | | // the descriptions of the rules (one rule per element). The rules |
200 | | // are separated by semicolons (there's no escape facility: ALL |
201 | | // semicolons are rule delimiters) |
202 | | |
203 | 84.9k | if (U_FAILURE(status)) { |
204 | 0 | return; |
205 | 0 | } |
206 | | |
207 | | // ensure we are starting with an empty rule list |
208 | 84.9k | rules.deleteAll(); |
209 | | |
210 | | // dlf - the original code kept a separate description array for no reason, |
211 | | // so I got rid of it. The loop was too complex so I simplified it. |
212 | | |
213 | 84.9k | UnicodeString currentDescription; |
214 | 84.9k | int32_t oldP = 0; |
215 | 1.32M | while (oldP < description.length()) { |
216 | 1.24M | int32_t p = description.indexOf(gSemicolon, oldP); |
217 | 1.24M | if (p == -1) { |
218 | 10.0k | p = description.length(); |
219 | 10.0k | } |
220 | 1.24M | currentDescription.setTo(description, oldP, p - oldP); |
221 | 1.24M | NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status); |
222 | 1.24M | if (U_FAILURE(status)) { |
223 | 1.09k | return; |
224 | 1.09k | } |
225 | 1.24M | oldP = p + 1; |
226 | 1.24M | } |
227 | | |
228 | | // for rules that didn't specify a base value, their base values |
229 | | // were initialized to 0. Make another pass through the list and |
230 | | // set all those rules' base values. We also remove any special |
231 | | // rules from the list and put them into their own member variables |
232 | 83.9k | int64_t defaultBaseValue = 0; |
233 | | |
234 | | // (this isn't a for loop because we might be deleting items from |
235 | | // the vector-- we want to make sure we only increment i when |
236 | | // we _didn't_ delete anything from the vector) |
237 | 83.9k | int32_t rulesSize = rules.size(); |
238 | 1.51M | for (int32_t i = 0; i < rulesSize; i++) { |
239 | 1.43M | NFRule* rule = rules[i]; |
240 | 1.43M | int64_t baseValue = rule->getBaseValue(); |
241 | | |
242 | 1.43M | if (baseValue == 0) { |
243 | | // if the rule's base value is 0, fill in a default |
244 | | // base value (this will be 1 plus the preceding |
245 | | // rule's base value for regular rule sets, and the |
246 | | // same as the preceding rule's base value in fraction |
247 | | // rule sets) |
248 | 283k | rule->setBaseValue(defaultBaseValue, status); |
249 | 283k | if (U_FAILURE(status)) { |
250 | 0 | return; |
251 | 0 | } |
252 | 283k | } |
253 | 1.14M | else { |
254 | | // if it's a regular rule that already knows its base value, |
255 | | // check to make sure the rules are in order, and update |
256 | | // the default base value for the next rule |
257 | 1.14M | if (baseValue < defaultBaseValue) { |
258 | | // throw new IllegalArgumentException("Rules are not in order"); |
259 | 65 | status = U_PARSE_ERROR; |
260 | 65 | return; |
261 | 65 | } |
262 | 1.14M | defaultBaseValue = baseValue; |
263 | 1.14M | } |
264 | 1.43M | if (!fIsFractionRuleSet) { |
265 | 1.42M | ++defaultBaseValue; |
266 | 1.42M | } |
267 | 1.43M | } |
268 | 83.9k | } |
269 | | |
270 | | /** |
271 | | * Set one of the non-numerical rules. |
272 | | * @param rule The rule to set. |
273 | | */ |
274 | 81.5k | void NFRuleSet::setNonNumericalRule(NFRule *rule) { |
275 | 81.5k | switch (rule->getBaseValue()) { |
276 | 47.2k | case NFRule::kNegativeNumberRule: |
277 | 47.2k | delete nonNumericalRules[NEGATIVE_RULE_INDEX]; |
278 | 47.2k | nonNumericalRules[NEGATIVE_RULE_INDEX] = rule; |
279 | 47.2k | return; |
280 | 27.3k | case NFRule::kImproperFractionRule: |
281 | 27.3k | setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, true); |
282 | 27.3k | return; |
283 | 1.71k | case NFRule::kProperFractionRule: |
284 | 1.71k | setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, true); |
285 | 1.71k | return; |
286 | 5.26k | case NFRule::kDefaultRule: |
287 | 5.26k | setBestFractionRule(DEFAULT_RULE_INDEX, rule, true); |
288 | 5.26k | return; |
289 | 0 | case NFRule::kInfinityRule: |
290 | 0 | delete nonNumericalRules[INFINITY_RULE_INDEX]; |
291 | 0 | nonNumericalRules[INFINITY_RULE_INDEX] = rule; |
292 | 0 | return; |
293 | 0 | case NFRule::kNaNRule: |
294 | 0 | delete nonNumericalRules[NAN_RULE_INDEX]; |
295 | 0 | nonNumericalRules[NAN_RULE_INDEX] = rule; |
296 | 0 | return; |
297 | 0 | case NFRule::kNoBase: |
298 | 0 | case NFRule::kOtherRule: |
299 | 0 | default: |
300 | | // If we do not remember the rule inside the object. |
301 | | // delete it here to prevent memory leak. |
302 | 0 | delete rule; |
303 | 0 | return; |
304 | 81.5k | } |
305 | 81.5k | } |
306 | | |
307 | | /** |
308 | | * Determine the best fraction rule to use. Rules matching the decimal point from |
309 | | * DecimalFormatSymbols become the main set of rules to use. |
310 | | * @param originalIndex The index into nonNumericalRules |
311 | | * @param newRule The new rule to consider |
312 | | * @param rememberRule Should the new rule be added to fractionRules. |
313 | | */ |
314 | 34.3k | void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) { |
315 | 34.3k | if (rememberRule) { |
316 | 34.3k | fractionRules.add(newRule); |
317 | 34.3k | } |
318 | 34.3k | NFRule *bestResult = nonNumericalRules[originalIndex]; |
319 | 34.3k | if (bestResult == nullptr) { |
320 | 20.7k | nonNumericalRules[originalIndex] = newRule; |
321 | 20.7k | } |
322 | 13.6k | else { |
323 | | // We have more than one. Which one is better? |
324 | 13.6k | const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols(); |
325 | 13.6k | if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0) |
326 | 13.6k | == newRule->getDecimalPoint()) |
327 | 195 | { |
328 | 195 | nonNumericalRules[originalIndex] = newRule; |
329 | 195 | } |
330 | | // else leave it alone |
331 | 13.6k | } |
332 | 34.3k | } |
333 | | |
334 | | NFRuleSet::~NFRuleSet() |
335 | 86.8k | { |
336 | 607k | for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { |
337 | 521k | if (i != IMPROPER_FRACTION_RULE_INDEX |
338 | 434k | && i != PROPER_FRACTION_RULE_INDEX |
339 | 347k | && i != DEFAULT_RULE_INDEX) |
340 | 260k | { |
341 | 260k | delete nonNumericalRules[i]; |
342 | 260k | } |
343 | | // else it will be deleted via NFRuleList fractionRules |
344 | 521k | } |
345 | 86.8k | } |
346 | | |
347 | | static UBool |
348 | | util_equalRules(const NFRule* rule1, const NFRule* rule2) |
349 | 0 | { |
350 | 0 | if (rule1) { |
351 | 0 | if (rule2) { |
352 | 0 | return *rule1 == *rule2; |
353 | 0 | } |
354 | 0 | } else if (!rule2) { |
355 | 0 | return true; |
356 | 0 | } |
357 | 0 | return false; |
358 | 0 | } |
359 | | |
360 | | bool |
361 | | NFRuleSet::operator==(const NFRuleSet& rhs) const |
362 | 0 | { |
363 | 0 | if (rules.size() == rhs.rules.size() && |
364 | 0 | fIsFractionRuleSet == rhs.fIsFractionRuleSet && |
365 | 0 | name == rhs.name) { |
366 | | |
367 | | // ...then compare the non-numerical rule lists... |
368 | 0 | for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { |
369 | 0 | if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) { |
370 | 0 | return false; |
371 | 0 | } |
372 | 0 | } |
373 | | |
374 | | // ...then compare the rule lists... |
375 | 0 | for (uint32_t i = 0; i < rules.size(); ++i) { |
376 | 0 | if (*rules[i] != *rhs.rules[i]) { |
377 | 0 | return false; |
378 | 0 | } |
379 | 0 | } |
380 | 0 | return true; |
381 | 0 | } |
382 | 0 | return false; |
383 | 0 | } |
384 | | |
385 | | void |
386 | 0 | NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) { |
387 | 0 | for (uint32_t i = 0; i < rules.size(); ++i) { |
388 | 0 | rules[i]->setDecimalFormatSymbols(newSymbols, status); |
389 | 0 | } |
390 | | // Switch the fraction rules to mirror the DecimalFormatSymbols. |
391 | 0 | for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= DEFAULT_RULE_INDEX; nonNumericalIdx++) { |
392 | 0 | if (nonNumericalRules[nonNumericalIdx]) { |
393 | 0 | for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) { |
394 | 0 | NFRule *fractionRule = fractionRules[fIdx]; |
395 | 0 | if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) { |
396 | 0 | setBestFractionRule(nonNumericalIdx, fractionRule, false); |
397 | 0 | } |
398 | 0 | } |
399 | 0 | } |
400 | 0 | } |
401 | |
|
402 | 0 | for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) { |
403 | 0 | NFRule *rule = nonNumericalRules[nnrIdx]; |
404 | 0 | if (rule) { |
405 | 0 | rule->setDecimalFormatSymbols(newSymbols, status); |
406 | 0 | } |
407 | 0 | } |
408 | 0 | } |
409 | | |
410 | 7.76M | #define RECURSION_LIMIT 64 |
411 | | |
412 | | void |
413 | | NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const |
414 | 929 | { |
415 | 929 | if (recursionCount >= RECURSION_LIMIT) { |
416 | | // stop recursion |
417 | 0 | status = U_INVALID_STATE_ERROR; |
418 | 0 | return; |
419 | 0 | } |
420 | 929 | const NFRule *rule = findNormalRule(number); |
421 | 929 | if (rule) { // else error, but can't report it |
422 | 929 | rule->doFormat(number, toAppendTo, pos, ++recursionCount, status); |
423 | 929 | } |
424 | 929 | } |
425 | | |
426 | | void |
427 | | NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const |
428 | 0 | { |
429 | 0 | if (recursionCount >= RECURSION_LIMIT) { |
430 | | // stop recursion |
431 | 0 | status = U_INVALID_STATE_ERROR; |
432 | 0 | return; |
433 | 0 | } |
434 | 0 | const NFRule *rule = findDoubleRule(number); |
435 | 0 | if (rule) { // else error, but can't report it |
436 | 0 | rule->doFormat(number, toAppendTo, pos, ++recursionCount, status); |
437 | 0 | } |
438 | 0 | } |
439 | | |
440 | | const NFRule* |
441 | | NFRuleSet::findDoubleRule(double number) const |
442 | 0 | { |
443 | | // if this is a fraction rule set, use findFractionRuleSetRule() |
444 | 0 | if (isFractionRuleSet()) { |
445 | 0 | return findFractionRuleSetRule(number); |
446 | 0 | } |
447 | | |
448 | 0 | if (uprv_isNaN(number)) { |
449 | 0 | const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX]; |
450 | 0 | if (!rule) { |
451 | 0 | rule = owner->getDefaultNaNRule(); |
452 | 0 | } |
453 | 0 | return rule; |
454 | 0 | } |
455 | | |
456 | | // if the number is negative, return the negative number rule |
457 | | // (if there isn't a negative-number rule, we pretend it's a |
458 | | // positive number) |
459 | 0 | if (number < 0) { |
460 | 0 | if (nonNumericalRules[NEGATIVE_RULE_INDEX]) { |
461 | 0 | return nonNumericalRules[NEGATIVE_RULE_INDEX]; |
462 | 0 | } else { |
463 | 0 | number = -number; |
464 | 0 | } |
465 | 0 | } |
466 | | |
467 | 0 | if (uprv_isInfinite(number)) { |
468 | 0 | const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX]; |
469 | 0 | if (!rule) { |
470 | 0 | rule = owner->getDefaultInfinityRule(); |
471 | 0 | } |
472 | 0 | return rule; |
473 | 0 | } |
474 | | |
475 | | // if the number isn't an integer, we use one of the fraction rules... |
476 | 0 | if (number != uprv_floor(number)) { |
477 | | // if the number is between 0 and 1, return the proper |
478 | | // fraction rule |
479 | 0 | if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) { |
480 | 0 | return nonNumericalRules[PROPER_FRACTION_RULE_INDEX]; |
481 | 0 | } |
482 | | // otherwise, return the improper fraction rule |
483 | 0 | else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) { |
484 | 0 | return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]; |
485 | 0 | } |
486 | 0 | } |
487 | | |
488 | | // if there's a default rule, use it to format the number |
489 | 0 | if (nonNumericalRules[DEFAULT_RULE_INDEX]) { |
490 | 0 | return nonNumericalRules[DEFAULT_RULE_INDEX]; |
491 | 0 | } |
492 | | |
493 | | // and if we haven't yet returned a rule, use findNormalRule() |
494 | | // to find the applicable rule |
495 | 0 | int64_t r = util64_fromDouble(number + 0.5); |
496 | 0 | return findNormalRule(r); |
497 | 0 | } |
498 | | |
499 | | const NFRule * |
500 | | NFRuleSet::findNormalRule(int64_t number) const |
501 | 929 | { |
502 | | // if this is a fraction rule set, use findFractionRuleSetRule() |
503 | | // to find the rule (we should only go into this clause if the |
504 | | // value is 0) |
505 | 929 | if (fIsFractionRuleSet) { |
506 | 0 | return findFractionRuleSetRule(static_cast<double>(number)); |
507 | 0 | } |
508 | | |
509 | | // if the number is negative, return the negative-number rule |
510 | | // (if there isn't one, pretend the number is positive) |
511 | 929 | if (number < 0) { |
512 | 0 | if (nonNumericalRules[NEGATIVE_RULE_INDEX]) { |
513 | 0 | return nonNumericalRules[NEGATIVE_RULE_INDEX]; |
514 | 0 | } else { |
515 | 0 | number = -number; |
516 | 0 | } |
517 | 0 | } |
518 | | |
519 | | // we have to repeat the preceding two checks, even though we |
520 | | // do them in findRule(), because the version of format() that |
521 | | // takes a long bypasses findRule() and goes straight to this |
522 | | // function. This function does skip the fraction rules since |
523 | | // we know the value is an integer (it also skips the default |
524 | | // rule, since it's considered a fraction rule. Skipping the |
525 | | // default rule in this function is also how we avoid infinite |
526 | | // recursion) |
527 | | |
528 | | // {dlf} unfortunately this fails if there are no rules except |
529 | | // special rules. If there are no rules, use the default rule. |
530 | | |
531 | | // binary-search the rule list for the applicable rule |
532 | | // (a rule is used for all values from its base value to |
533 | | // the next rule's base value) |
534 | 929 | int32_t hi = rules.size(); |
535 | 929 | if (hi > 0) { |
536 | 929 | int32_t lo = 0; |
537 | | |
538 | 5.67k | while (lo < hi) { |
539 | 5.35k | int32_t mid = (lo + hi) / 2; |
540 | 5.35k | if (rules[mid]->getBaseValue() == number) { |
541 | 605 | return rules[mid]; |
542 | 605 | } |
543 | 4.74k | else if (rules[mid]->getBaseValue() > number) { |
544 | 3.32k | hi = mid; |
545 | 3.32k | } |
546 | 1.42k | else { |
547 | 1.42k | lo = mid + 1; |
548 | 1.42k | } |
549 | 5.35k | } |
550 | 324 | if (hi == 0) { // bad rule set, minimum base > 0 |
551 | 0 | return nullptr; // want to throw exception here |
552 | 0 | } |
553 | | |
554 | 324 | NFRule *result = rules[hi - 1]; |
555 | | |
556 | | // use shouldRollBack() to see whether we need to invoke the |
557 | | // rollback rule (see shouldRollBack()'s documentation for |
558 | | // an explanation of the rollback rule). If we do, roll back |
559 | | // one rule and return that one instead of the one we'd normally |
560 | | // return |
561 | 324 | if (result->shouldRollBack(number)) { |
562 | 0 | if (hi == 1) { // bad rule set, no prior rule to rollback to from this base |
563 | 0 | return nullptr; |
564 | 0 | } |
565 | 0 | result = rules[hi - 2]; |
566 | 0 | } |
567 | 324 | return result; |
568 | 324 | } |
569 | | // else use the default rule |
570 | 0 | return nonNumericalRules[DEFAULT_RULE_INDEX]; |
571 | 929 | } |
572 | | |
573 | | /** |
574 | | * If this rule is a fraction rule set, this function is used by |
575 | | * findRule() to select the most appropriate rule for formatting |
576 | | * the number. Basically, the base value of each rule in the rule |
577 | | * set is treated as the denominator of a fraction. Whichever |
578 | | * denominator can produce the fraction closest in value to the |
579 | | * number passed in is the result. If there's a tie, the earlier |
580 | | * one in the list wins. (If there are two rules in a row with the |
581 | | * same base value, the first one is used when the numerator of the |
582 | | * fraction would be 1, and the second rule is used the rest of the |
583 | | * time. |
584 | | * @param number The number being formatted (which will always be |
585 | | * a number between 0 and 1) |
586 | | * @return The rule to use to format this number |
587 | | */ |
588 | | const NFRule* |
589 | | NFRuleSet::findFractionRuleSetRule(double number) const |
590 | 0 | { |
591 | | // the obvious way to do this (multiply the value being formatted |
592 | | // by each rule's base value until you get an integral result) |
593 | | // doesn't work because of rounding error. This method is more |
594 | | // accurate |
595 | | |
596 | | // find the least common multiple of the rules' base values |
597 | | // and multiply this by the number being formatted. This is |
598 | | // all the precision we need, and we can do all of the rest |
599 | | // of the math using integer arithmetic |
600 | 0 | int64_t leastCommonMultiple = rules[0]->getBaseValue(); |
601 | 0 | if (leastCommonMultiple == 0) { |
602 | 0 | return nullptr; |
603 | 0 | } |
604 | 0 | int64_t numerator; |
605 | 0 | { |
606 | 0 | for (uint32_t i = 1; i < rules.size(); ++i) { |
607 | 0 | leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue()); |
608 | 0 | } |
609 | 0 | numerator = util64_fromDouble(number * static_cast<double>(leastCommonMultiple) + 0.5); |
610 | 0 | } |
611 | | // for each rule, do the following... |
612 | 0 | int64_t tempDifference; |
613 | 0 | int64_t difference = util64_fromDouble(uprv_maxMantissa()); |
614 | 0 | int32_t winner = 0; |
615 | 0 | for (uint32_t i = 0; i < rules.size(); ++i) { |
616 | | // "numerator" is the numerator of the fraction if the |
617 | | // denominator is the LCD. The numerator if the rule's |
618 | | // base value is the denominator is "numerator" times the |
619 | | // base value divided bythe LCD. Here we check to see if |
620 | | // that's an integer, and if not, how close it is to being |
621 | | // an integer. |
622 | 0 | tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple; |
623 | | |
624 | | |
625 | | // normalize the result of the above calculation: we want |
626 | | // the numerator's distance from the CLOSEST multiple |
627 | | // of the LCD |
628 | 0 | if (leastCommonMultiple - tempDifference < tempDifference) { |
629 | 0 | tempDifference = leastCommonMultiple - tempDifference; |
630 | 0 | } |
631 | | |
632 | | // if this is as close as we've come, keep track of how close |
633 | | // that is, and the line number of the rule that did it. If |
634 | | // we've scored a direct hit, we don't have to look at any more |
635 | | // rules |
636 | 0 | if (tempDifference < difference) { |
637 | 0 | difference = tempDifference; |
638 | 0 | winner = i; |
639 | 0 | if (difference == 0) { |
640 | 0 | break; |
641 | 0 | } |
642 | 0 | } |
643 | 0 | } |
644 | | |
645 | | // if we have two successive rules that both have the winning base |
646 | | // value, then the first one (the one we found above) is used if |
647 | | // the numerator of the fraction is 1 and the second one is used if |
648 | | // the numerator of the fraction is anything else (this lets us |
649 | | // do things like "one third"/"two thirds" without having to define |
650 | | // a whole bunch of extra rule sets) |
651 | 0 | if (static_cast<unsigned>(winner + 1) < rules.size() && |
652 | 0 | rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) { |
653 | 0 | double n = static_cast<double>(rules[winner]->getBaseValue()) * number; |
654 | 0 | if (n < 0.5 || n >= 2) { |
655 | 0 | ++winner; |
656 | 0 | } |
657 | 0 | } |
658 | | |
659 | | // finally, return the winning rule |
660 | 0 | return rules[winner]; |
661 | 0 | } |
662 | | |
663 | | /** |
664 | | * Parses a string. Matches the string to be parsed against each |
665 | | * of its rules (with a base value less than upperBound) and returns |
666 | | * the value produced by the rule that matched the most characters |
667 | | * in the source string. |
668 | | * @param text The string to parse |
669 | | * @param parsePosition The initial position is ignored and assumed |
670 | | * to be 0. On exit, this object has been updated to point to the |
671 | | * first character position this rule set didn't consume. |
672 | | * @param upperBound Limits the rules that can be allowed to match. |
673 | | * Only rules whose base values are strictly less than upperBound |
674 | | * are considered. |
675 | | * @return The numerical result of parsing this string. This will |
676 | | * be the matching rule's base value, composed appropriately with |
677 | | * the results of matching any of its substitutions. The object |
678 | | * will be an instance of Long if it's an integral value; otherwise, |
679 | | * it will be an instance of Double. This function always returns |
680 | | * a valid object: If nothing matched the input string at all, |
681 | | * this function returns new Long(0), and the parse position is |
682 | | * left unchanged. |
683 | | */ |
684 | | #ifdef RBNF_DEBUG |
685 | | #include <stdio.h> |
686 | | |
687 | | static void dumpUS(FILE* f, const UnicodeString& us) { |
688 | | int len = us.length(); |
689 | | char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1]; |
690 | | if (buf != nullptr) { |
691 | | us.extract(0, len, buf); |
692 | | buf[len] = 0; |
693 | | fprintf(f, "%s", buf); |
694 | | uprv_free(buf); //delete[] buf; |
695 | | } |
696 | | } |
697 | | #endif |
698 | | |
699 | | UBool |
700 | | NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, int32_t recursionCount, Formattable& result) const |
701 | 7.76M | { |
702 | | // try matching each rule in the rule set against the text being |
703 | | // parsed. Whichever one matches the most characters is the one |
704 | | // that determines the value we return. |
705 | | |
706 | 7.76M | result.setLong(0); |
707 | | |
708 | | // dump out if we've reached the recursion limit |
709 | 7.76M | if (recursionCount >= RECURSION_LIMIT) { |
710 | | // stop recursion |
711 | 107k | return false; |
712 | 107k | } |
713 | | |
714 | | // dump out if there's no text to parse |
715 | 7.65M | if (text.length() == 0) { |
716 | 16.5k | return 0; |
717 | 16.5k | } |
718 | | |
719 | 7.63M | ParsePosition highWaterMark; |
720 | 7.63M | ParsePosition workingPos = pos; |
721 | | |
722 | | #ifdef RBNF_DEBUG |
723 | | fprintf(stderr, "<nfrs> %x '", this); |
724 | | dumpUS(stderr, name); |
725 | | fprintf(stderr, "' text '"); |
726 | | dumpUS(stderr, text); |
727 | | fprintf(stderr, "'\n"); |
728 | | fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0); |
729 | | #endif |
730 | | // Try each of the negative rules, fraction rules, infinity rules and NaN rules |
731 | 53.4M | for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { |
732 | 45.8M | if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) { |
733 | | // Mark this rule as being executed so that we don't try to execute it again. |
734 | 2.39k | nonNumericalExecutedRuleMask |= 1 << i; |
735 | | |
736 | 2.39k | Formattable tempResult; |
737 | 2.39k | UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult); |
738 | 2.39k | if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { |
739 | 245 | result = tempResult; |
740 | 245 | highWaterMark = workingPos; |
741 | 245 | } |
742 | 2.39k | workingPos = pos; |
743 | 2.39k | } |
744 | 45.8M | } |
745 | | #ifdef RBNF_DEBUG |
746 | | fprintf(stderr, "<nfrs> continue other with text '"); |
747 | | dumpUS(stderr, text); |
748 | | fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); |
749 | | #endif |
750 | | |
751 | | // finally, go through the regular rules one at a time. We start |
752 | | // at the end of the list because we want to try matching the most |
753 | | // sigificant rule first (this helps ensure that we parse |
754 | | // "five thousand three hundred six" as |
755 | | // "(five thousand) (three hundred) (six)" rather than |
756 | | // "((five thousand three) hundred) (six)"). Skip rules whose |
757 | | // base values are higher than the upper bound (again, this helps |
758 | | // limit ambiguity by making sure the rules that match a rule's |
759 | | // are less significant than the rule containing the substitutions)/ |
760 | 7.63M | { |
761 | 7.63M | int64_t ub = util64_fromDouble(upperBound); |
762 | | #ifdef RBNF_DEBUG |
763 | | { |
764 | | char ubstr[64]; |
765 | | util64_toa(ub, ubstr, 64); |
766 | | char ubstrhex[64]; |
767 | | util64_toa(ub, ubstrhex, 64, 16); |
768 | | fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex); |
769 | | } |
770 | | #endif |
771 | 292M | for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) { |
772 | 284M | if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) { |
773 | 269M | continue; |
774 | 269M | } |
775 | 15.2M | Formattable tempResult; |
776 | 15.2M | UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult); |
777 | 15.2M | if (success && workingPos.getIndex() > highWaterMark.getIndex()) { |
778 | 637k | result = tempResult; |
779 | 637k | highWaterMark = workingPos; |
780 | 637k | } |
781 | 15.2M | workingPos = pos; |
782 | 15.2M | } |
783 | 7.63M | } |
784 | | #ifdef RBNF_DEBUG |
785 | | fprintf(stderr, "<nfrs> exit\n"); |
786 | | #endif |
787 | | // finally, update the parse position we were passed to point to the |
788 | | // first character we didn't use, and return the result that |
789 | | // corresponds to that string of characters |
790 | 7.63M | pos = highWaterMark; |
791 | | |
792 | 7.63M | return 1; |
793 | 7.65M | } |
794 | | |
795 | | void |
796 | | NFRuleSet::appendRules(UnicodeString& result) const |
797 | 0 | { |
798 | 0 | uint32_t i; |
799 | | |
800 | | // the rule set name goes first... |
801 | 0 | result.append(name); |
802 | 0 | result.append(gColon); |
803 | 0 | result.append(gLineFeed); |
804 | | |
805 | | // followed by the regular rules... |
806 | 0 | for (i = 0; i < rules.size(); i++) { |
807 | 0 | rules[i]->_appendRuleText(result); |
808 | 0 | result.append(gLineFeed); |
809 | 0 | } |
810 | | |
811 | | // followed by the special rules (if they exist) |
812 | 0 | for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) { |
813 | 0 | NFRule *rule = nonNumericalRules[i]; |
814 | 0 | if (nonNumericalRules[i]) { |
815 | 0 | if (rule->getBaseValue() == NFRule::kImproperFractionRule |
816 | 0 | || rule->getBaseValue() == NFRule::kProperFractionRule |
817 | 0 | || rule->getBaseValue() == NFRule::kDefaultRule) |
818 | 0 | { |
819 | 0 | for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) { |
820 | 0 | NFRule *fractionRule = fractionRules[fIdx]; |
821 | 0 | if (fractionRule->getBaseValue() == rule->getBaseValue()) { |
822 | 0 | fractionRule->_appendRuleText(result); |
823 | 0 | result.append(gLineFeed); |
824 | 0 | } |
825 | 0 | } |
826 | 0 | } |
827 | 0 | else { |
828 | 0 | rule->_appendRuleText(result); |
829 | 0 | result.append(gLineFeed); |
830 | 0 | } |
831 | 0 | } |
832 | 0 | } |
833 | 0 | } |
834 | | |
835 | | // utility functions |
836 | | |
837 | 7.64M | int64_t util64_fromDouble(double d) { |
838 | 7.64M | int64_t result = 0; |
839 | 7.64M | if (!uprv_isNaN(d)) { |
840 | 7.64M | double mant = uprv_maxMantissa(); |
841 | 7.64M | if (d < -mant) { |
842 | 0 | d = -mant; |
843 | 7.64M | } else if (d > mant) { |
844 | 993 | d = mant; |
845 | 993 | } |
846 | 7.64M | UBool neg = d < 0; |
847 | 7.64M | if (neg) { |
848 | 0 | d = -d; |
849 | 0 | } |
850 | 7.64M | result = static_cast<int64_t>(uprv_floor(d)); |
851 | 7.64M | if (neg) { |
852 | 0 | result = -result; |
853 | 0 | } |
854 | 7.64M | } |
855 | 7.64M | return result; |
856 | 7.64M | } |
857 | | |
858 | 2.34M | uint64_t util64_pow(uint32_t base, uint16_t exponent) { |
859 | 2.34M | if (base == 0) { |
860 | 0 | return 0; |
861 | 0 | } |
862 | 2.34M | uint64_t result = 1; |
863 | 2.34M | uint64_t pow = base; |
864 | 4.35M | while (true) { |
865 | 4.35M | if ((exponent & 1) == 1) { |
866 | 2.68M | result *= pow; |
867 | 2.68M | } |
868 | 4.35M | exponent >>= 1; |
869 | 4.35M | if (exponent == 0) { |
870 | 2.34M | break; |
871 | 2.34M | } |
872 | 2.01M | pow *= pow; |
873 | 2.01M | } |
874 | 2.34M | return result; |
875 | 2.34M | } |
876 | | |
877 | | static const uint8_t asciiDigits[] = { |
878 | | 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u, |
879 | | 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u, |
880 | | 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu, |
881 | | 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u, |
882 | | 0x77u, 0x78u, 0x79u, 0x7au, |
883 | | }; |
884 | | |
885 | | static const char16_t kUMinus = static_cast<char16_t>(0x002d); |
886 | | |
887 | | #ifdef RBNF_DEBUG |
888 | | static const char kMinus = '-'; |
889 | | |
890 | | static const uint8_t digitInfo[] = { |
891 | | 0, 0, 0, 0, 0, 0, 0, 0, |
892 | | 0, 0, 0, 0, 0, 0, 0, 0, |
893 | | 0, 0, 0, 0, 0, 0, 0, 0, |
894 | | 0, 0, 0, 0, 0, 0, 0, 0, |
895 | | 0, 0, 0, 0, 0, 0, 0, 0, |
896 | | 0, 0, 0, 0, 0, 0, 0, 0, |
897 | | 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u, |
898 | | 0x88u, 0x89u, 0, 0, 0, 0, 0, 0, |
899 | | 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, |
900 | | 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, |
901 | | 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, |
902 | | 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, |
903 | | 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, |
904 | | 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, |
905 | | 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, |
906 | | 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, |
907 | | }; |
908 | | |
909 | | int64_t util64_atoi(const char* str, uint32_t radix) |
910 | | { |
911 | | if (radix > 36) { |
912 | | radix = 36; |
913 | | } else if (radix < 2) { |
914 | | radix = 2; |
915 | | } |
916 | | int64_t lradix = radix; |
917 | | |
918 | | int neg = 0; |
919 | | if (*str == kMinus) { |
920 | | ++str; |
921 | | neg = 1; |
922 | | } |
923 | | int64_t result = 0; |
924 | | uint8_t b; |
925 | | while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) { |
926 | | result *= lradix; |
927 | | result += (int32_t)b; |
928 | | } |
929 | | if (neg) { |
930 | | result = -result; |
931 | | } |
932 | | return result; |
933 | | } |
934 | | |
935 | | int64_t util64_utoi(const char16_t* str, uint32_t radix) |
936 | | { |
937 | | if (radix > 36) { |
938 | | radix = 36; |
939 | | } else if (radix < 2) { |
940 | | radix = 2; |
941 | | } |
942 | | int64_t lradix = radix; |
943 | | |
944 | | int neg = 0; |
945 | | if (*str == kUMinus) { |
946 | | ++str; |
947 | | neg = 1; |
948 | | } |
949 | | int64_t result = 0; |
950 | | char16_t c; |
951 | | uint8_t b; |
952 | | while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) { |
953 | | result *= lradix; |
954 | | result += (int32_t)b; |
955 | | } |
956 | | if (neg) { |
957 | | result = -result; |
958 | | } |
959 | | return result; |
960 | | } |
961 | | |
962 | | uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw) |
963 | | { |
964 | | if (radix > 36) { |
965 | | radix = 36; |
966 | | } else if (radix < 2) { |
967 | | radix = 2; |
968 | | } |
969 | | int64_t base = radix; |
970 | | |
971 | | char* p = buf; |
972 | | if (len && (w < 0) && (radix == 10) && !raw) { |
973 | | w = -w; |
974 | | *p++ = kMinus; |
975 | | --len; |
976 | | } else if (len && (w == 0)) { |
977 | | *p++ = (char)raw ? 0 : asciiDigits[0]; |
978 | | --len; |
979 | | } |
980 | | |
981 | | while (len && w != 0) { |
982 | | int64_t n = w / base; |
983 | | int64_t m = n * base; |
984 | | int32_t d = (int32_t)(w-m); |
985 | | *p++ = raw ? (char)d : asciiDigits[d]; |
986 | | w = n; |
987 | | --len; |
988 | | } |
989 | | if (len) { |
990 | | *p = 0; // null terminate if room for caller convenience |
991 | | } |
992 | | |
993 | | len = p - buf; |
994 | | if (*buf == kMinus) { |
995 | | ++buf; |
996 | | } |
997 | | while (--p > buf) { |
998 | | char c = *p; |
999 | | *p = *buf; |
1000 | | *buf = c; |
1001 | | ++buf; |
1002 | | } |
1003 | | |
1004 | | return len; |
1005 | | } |
1006 | | #endif |
1007 | | |
1008 | | uint32_t util64_tou(int64_t w, char16_t* buf, uint32_t len, uint32_t radix, UBool raw) |
1009 | 0 | { |
1010 | 0 | if (radix > 36) { |
1011 | 0 | radix = 36; |
1012 | 0 | } else if (radix < 2) { |
1013 | 0 | radix = 2; |
1014 | 0 | } |
1015 | 0 | int64_t base = radix; |
1016 | |
|
1017 | 0 | char16_t* p = buf; |
1018 | 0 | if (len && (w < 0) && (radix == 10) && !raw) { |
1019 | 0 | w = -w; |
1020 | 0 | *p++ = kUMinus; |
1021 | 0 | --len; |
1022 | 0 | } else if (len && (w == 0)) { |
1023 | 0 | *p++ = static_cast<char16_t>(raw) ? 0 : asciiDigits[0]; |
1024 | 0 | --len; |
1025 | 0 | } |
1026 | |
|
1027 | 0 | while (len && (w != 0)) { |
1028 | 0 | int64_t n = w / base; |
1029 | 0 | int64_t m = n * base; |
1030 | 0 | int32_t d = static_cast<int32_t>(w - m); |
1031 | 0 | *p++ = static_cast<char16_t>(raw ? d : asciiDigits[d]); |
1032 | 0 | w = n; |
1033 | 0 | --len; |
1034 | 0 | } |
1035 | 0 | if (len) { |
1036 | 0 | *p = 0; // null terminate if room for caller convenience |
1037 | 0 | } |
1038 | |
|
1039 | 0 | len = static_cast<uint32_t>(p - buf); |
1040 | 0 | if (*buf == kUMinus) { |
1041 | 0 | ++buf; |
1042 | 0 | } |
1043 | 0 | while (--p > buf) { |
1044 | 0 | char16_t c = *p; |
1045 | 0 | *p = *buf; |
1046 | 0 | *buf = c; |
1047 | 0 | ++buf; |
1048 | 0 | } |
1049 | |
|
1050 | 0 | return len; |
1051 | 0 | } |
1052 | | |
1053 | | |
1054 | | U_NAMESPACE_END |
1055 | | |
1056 | | /* U_HAVE_RBNF */ |
1057 | | #endif |