Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/i18n/nfrs.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
*   Copyright (C) 1997-2015, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
******************************************************************************
8
*   file name:  nfrs.cpp
9
*   encoding:   UTF-8
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
* Modification history
14
* Date        Name      Comments
15
* 10/11/2001  Doug      Ported from ICU4J
16
*/
17
18
#include "nfrs.h"
19
20
#if U_HAVE_RBNF
21
22
#include "unicode/uchar.h"
23
#include "nfrule.h"
24
#include "nfrlist.h"
25
#include "patternprops.h"
26
#include "putilimp.h"
27
28
#ifdef RBNF_DEBUG
29
#include "cmemory.h"
30
#endif
31
32
enum {
33
    /** -x */
34
    NEGATIVE_RULE_INDEX = 0,
35
    /** x.x */
36
    IMPROPER_FRACTION_RULE_INDEX = 1,
37
    /** 0.x */
38
    PROPER_FRACTION_RULE_INDEX = 2,
39
    /** x.0 */
40
    DEFAULT_RULE_INDEX = 3,
41
    /** Inf */
42
    INFINITY_RULE_INDEX = 4,
43
    /** NaN */
44
    NAN_RULE_INDEX = 5,
45
    NON_NUMERICAL_RULE_LENGTH = 6
46
};
47
48
U_NAMESPACE_BEGIN
49
50
#if 0
51
// euclid's algorithm works with doubles
52
// note, doubles only get us up to one quadrillion or so, which
53
// isn't as much range as we get with longs.  We probably still
54
// want either 64-bit math, or BigInteger.
55
56
static int64_t
57
util_lcm(int64_t x, int64_t y)
58
{
59
    x.abs();
60
    y.abs();
61
62
    if (x == 0 || y == 0) {
63
        return 0;
64
    } else {
65
        do {
66
            if (x < y) {
67
                int64_t t = x; x = y; y = t;
68
            }
69
            x -= y * (x/y);
70
        } while (x != 0);
71
72
        return y;
73
    }
74
}
75
76
#else
77
/**
78
 * Calculates the least common multiple of x and y.
79
 */
80
static int64_t
81
util_lcm(int64_t x, int64_t y)
82
0
{
83
    // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
84
    // vol. 2, 1st ed., pp. 298-299
85
0
    int64_t x1 = x;
86
0
    int64_t y1 = y;
87
88
0
    int p2 = 0;
89
0
    while ((x1 & 1) == 0 && (y1 & 1) == 0) {
90
0
        ++p2;
91
0
        x1 >>= 1;
92
0
        y1 >>= 1;
93
0
    }
94
95
0
    int64_t t;
96
0
    if ((x1 & 1) == 1) {
97
0
        t = -y1;
98
0
    } else {
99
0
        t = x1;
100
0
    }
101
102
0
    while (t != 0) {
103
0
        while ((t & 1) == 0) {
104
0
            t = t >> 1;
105
0
        }
106
0
        if (t > 0) {
107
0
            x1 = t;
108
0
        } else {
109
0
            y1 = -t;
110
0
        }
111
0
        t = x1 - y1;
112
0
    }
113
114
0
    int64_t gcd = x1 << p2;
115
116
    // x * y == gcd(x, y) * lcm(x, y)
117
0
    return x / gcd * y;
118
0
}
119
#endif
120
121
static const UChar gPercent = 0x0025;
122
static const UChar gColon = 0x003a;
123
static const UChar gSemicolon = 0x003b;
124
static const UChar gLineFeed = 0x000a;
125
126
static const UChar gPercentPercent[] =
127
{
128
    0x25, 0x25, 0
129
}; /* "%%" */
130
131
static const UChar gNoparse[] =
132
{
133
    0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
134
}; /* "@noparse" */
135
136
NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
137
0
  : name()
138
0
  , rules(0)
139
0
  , owner(_owner)
140
0
  , fractionRules()
141
0
  , fIsFractionRuleSet(FALSE)
142
0
  , fIsPublic(FALSE)
143
0
  , fIsParseable(TRUE)
144
0
{
145
0
    for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
146
0
        nonNumericalRules[i] = NULL;
147
0
    }
148
149
0
    if (U_FAILURE(status)) {
150
0
        return;
151
0
    }
152
153
0
    UnicodeString& description = descriptions[index]; // !!! make sure index is valid
154
155
0
    if (description.length() == 0) {
156
        // throw new IllegalArgumentException("Empty rule set description");
157
0
        status = U_PARSE_ERROR;
158
0
        return;
159
0
    }
160
161
    // if the description begins with a rule set name (the rule set
162
    // name can be omitted in formatter descriptions that consist
163
    // of only one rule set), copy it out into our "name" member
164
    // and delete it from the description
165
0
    if (description.charAt(0) == gPercent) {
166
0
        int32_t pos = description.indexOf(gColon);
167
0
        if (pos == -1) {
168
            // throw new IllegalArgumentException("Rule set name doesn't end in colon");
169
0
            status = U_PARSE_ERROR;
170
0
        } else {
171
0
            name.setTo(description, 0, pos);
172
0
            while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
173
0
            }
174
0
            description.remove(0, pos);
175
0
        }
176
0
    } else {
177
0
        name.setTo(UNICODE_STRING_SIMPLE("%default"));
178
0
    }
179
180
0
    if (description.length() == 0) {
181
        // throw new IllegalArgumentException("Empty rule set description");
182
0
        status = U_PARSE_ERROR;
183
0
    }
184
185
0
    fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
186
187
0
    if ( name.endsWith(gNoparse,8) ) {
188
0
        fIsParseable = FALSE;
189
0
        name.truncate(name.length()-8); // remove the @noparse from the name
190
0
    }
191
192
    // all of the other members of NFRuleSet are initialized
193
    // by parseRules()
194
0
}
195
196
void
197
NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
198
0
{
199
    // start by creating a Vector whose elements are Strings containing
200
    // the descriptions of the rules (one rule per element).  The rules
201
    // are separated by semicolons (there's no escape facility: ALL
202
    // semicolons are rule delimiters)
203
204
0
    if (U_FAILURE(status)) {
205
0
        return;
206
0
    }
207
208
    // ensure we are starting with an empty rule list
209
0
    rules.deleteAll();
210
211
    // dlf - the original code kept a separate description array for no reason,
212
    // so I got rid of it.  The loop was too complex so I simplified it.
213
214
0
    UnicodeString currentDescription;
215
0
    int32_t oldP = 0;
216
0
    while (oldP < description.length()) {
217
0
        int32_t p = description.indexOf(gSemicolon, oldP);
218
0
        if (p == -1) {
219
0
            p = description.length();
220
0
        }
221
0
        currentDescription.setTo(description, oldP, p - oldP);
222
0
        NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
223
0
        oldP = p + 1;
224
0
    }
225
226
    // for rules that didn't specify a base value, their base values
227
    // were initialized to 0.  Make another pass through the list and
228
    // set all those rules' base values.  We also remove any special
229
    // rules from the list and put them into their own member variables
230
0
    int64_t defaultBaseValue = 0;
231
232
    // (this isn't a for loop because we might be deleting items from
233
    // the vector-- we want to make sure we only increment i when
234
    // we _didn't_ delete anything from the vector)
235
0
    int32_t rulesSize = rules.size();
236
0
    for (int32_t i = 0; i < rulesSize; i++) {
237
0
        NFRule* rule = rules[i];
238
0
        int64_t baseValue = rule->getBaseValue();
239
240
0
        if (baseValue == 0) {
241
            // if the rule's base value is 0, fill in a default
242
            // base value (this will be 1 plus the preceding
243
            // rule's base value for regular rule sets, and the
244
            // same as the preceding rule's base value in fraction
245
            // rule sets)
246
0
            rule->setBaseValue(defaultBaseValue, status);
247
0
        }
248
0
        else {
249
            // if it's a regular rule that already knows its base value,
250
            // check to make sure the rules are in order, and update
251
            // the default base value for the next rule
252
0
            if (baseValue < defaultBaseValue) {
253
                // throw new IllegalArgumentException("Rules are not in order");
254
0
                status = U_PARSE_ERROR;
255
0
                return;
256
0
            }
257
0
            defaultBaseValue = baseValue;
258
0
        }
259
0
        if (!fIsFractionRuleSet) {
260
0
            ++defaultBaseValue;
261
0
        }
262
0
    }
263
0
}
264
265
/**
266
 * Set one of the non-numerical rules.
267
 * @param rule The rule to set.
268
 */
269
0
void NFRuleSet::setNonNumericalRule(NFRule *rule) {
270
0
    int64_t baseValue = rule->getBaseValue();
271
0
    if (baseValue == NFRule::kNegativeNumberRule) {
272
0
        delete nonNumericalRules[NEGATIVE_RULE_INDEX];
273
0
        nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
274
0
    }
275
0
    else if (baseValue == NFRule::kImproperFractionRule) {
276
0
        setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE);
277
0
    }
278
0
    else if (baseValue == NFRule::kProperFractionRule) {
279
0
        setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE);
280
0
    }
281
0
    else if (baseValue == NFRule::kDefaultRule) {
282
0
        setBestFractionRule(DEFAULT_RULE_INDEX, rule, TRUE);
283
0
    }
284
0
    else if (baseValue == NFRule::kInfinityRule) {
285
0
        delete nonNumericalRules[INFINITY_RULE_INDEX];
286
0
        nonNumericalRules[INFINITY_RULE_INDEX] = rule;
287
0
    }
288
0
    else if (baseValue == NFRule::kNaNRule) {
289
0
        delete nonNumericalRules[NAN_RULE_INDEX];
290
0
        nonNumericalRules[NAN_RULE_INDEX] = rule;
291
0
    }
292
0
}
293
294
/**
295
 * Determine the best fraction rule to use. Rules matching the decimal point from
296
 * DecimalFormatSymbols become the main set of rules to use.
297
 * @param originalIndex The index into nonNumericalRules
298
 * @param newRule The new rule to consider
299
 * @param rememberRule Should the new rule be added to fractionRules.
300
 */
301
0
void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
302
0
    if (rememberRule) {
303
0
        fractionRules.add(newRule);
304
0
    }
305
0
    NFRule *bestResult = nonNumericalRules[originalIndex];
306
0
    if (bestResult == NULL) {
307
0
        nonNumericalRules[originalIndex] = newRule;
308
0
    }
309
0
    else {
310
        // We have more than one. Which one is better?
311
0
        const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
312
0
        if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
313
0
            == newRule->getDecimalPoint())
314
0
        {
315
0
            nonNumericalRules[originalIndex] = newRule;
316
0
        }
317
        // else leave it alone
318
0
    }
319
0
}
320
321
NFRuleSet::~NFRuleSet()
322
0
{
323
0
    for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
324
0
        if (i != IMPROPER_FRACTION_RULE_INDEX
325
0
            && i != PROPER_FRACTION_RULE_INDEX
326
0
            && i != DEFAULT_RULE_INDEX)
327
0
        {
328
0
            delete nonNumericalRules[i];
329
0
        }
330
        // else it will be deleted via NFRuleList fractionRules
331
0
    }
332
0
}
333
334
static UBool
335
util_equalRules(const NFRule* rule1, const NFRule* rule2)
336
0
{
337
0
    if (rule1) {
338
0
        if (rule2) {
339
0
            return *rule1 == *rule2;
340
0
        }
341
0
    } else if (!rule2) {
342
0
        return TRUE;
343
0
    }
344
0
    return FALSE;
345
0
}
346
347
bool
348
NFRuleSet::operator==(const NFRuleSet& rhs) const
349
0
{
350
0
    if (rules.size() == rhs.rules.size() &&
351
0
        fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
352
0
        name == rhs.name) {
353
354
        // ...then compare the non-numerical rule lists...
355
0
        for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
356
0
            if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
357
0
                return FALSE;
358
0
            }
359
0
        }
360
361
        // ...then compare the rule lists...
362
0
        for (uint32_t i = 0; i < rules.size(); ++i) {
363
0
            if (*rules[i] != *rhs.rules[i]) {
364
0
                return FALSE;
365
0
            }
366
0
        }
367
0
        return TRUE;
368
0
    }
369
0
    return FALSE;
370
0
}
371
372
void
373
0
NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
374
0
    for (uint32_t i = 0; i < rules.size(); ++i) {
375
0
        rules[i]->setDecimalFormatSymbols(newSymbols, status);
376
0
    }
377
    // Switch the fraction rules to mirror the DecimalFormatSymbols.
378
0
    for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= DEFAULT_RULE_INDEX; nonNumericalIdx++) {
379
0
        if (nonNumericalRules[nonNumericalIdx]) {
380
0
            for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
381
0
                NFRule *fractionRule = fractionRules[fIdx];
382
0
                if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
383
0
                    setBestFractionRule(nonNumericalIdx, fractionRule, FALSE);
384
0
                }
385
0
            }
386
0
        }
387
0
    }
388
389
0
    for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
390
0
        NFRule *rule = nonNumericalRules[nnrIdx];
391
0
        if (rule) {
392
0
            rule->setDecimalFormatSymbols(newSymbols, status);
393
0
        }
394
0
    }
395
0
}
396
397
0
#define RECURSION_LIMIT 64
398
399
void
400
NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
401
0
{
402
0
    if (recursionCount >= RECURSION_LIMIT) {
403
        // stop recursion
404
0
        status = U_INVALID_STATE_ERROR;
405
0
        return;
406
0
    }
407
0
    const NFRule *rule = findNormalRule(number);
408
0
    if (rule) { // else error, but can't report it
409
0
        rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
410
0
    }
411
0
}
412
413
void
414
NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
415
0
{
416
0
    if (recursionCount >= RECURSION_LIMIT) {
417
        // stop recursion
418
0
        status = U_INVALID_STATE_ERROR;
419
0
        return;
420
0
    }
421
0
    const NFRule *rule = findDoubleRule(number);
422
0
    if (rule) { // else error, but can't report it
423
0
        rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
424
0
    }
425
0
}
426
427
const NFRule*
428
NFRuleSet::findDoubleRule(double number) const
429
0
{
430
    // if this is a fraction rule set, use findFractionRuleSetRule()
431
0
    if (isFractionRuleSet()) {
432
0
        return findFractionRuleSetRule(number);
433
0
    }
434
435
0
    if (uprv_isNaN(number)) {
436
0
        const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
437
0
        if (!rule) {
438
0
            rule = owner->getDefaultNaNRule();
439
0
        }
440
0
        return rule;
441
0
    }
442
443
    // if the number is negative, return the negative number rule
444
    // (if there isn't a negative-number rule, we pretend it's a
445
    // positive number)
446
0
    if (number < 0) {
447
0
        if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
448
0
            return  nonNumericalRules[NEGATIVE_RULE_INDEX];
449
0
        } else {
450
0
            number = -number;
451
0
        }
452
0
    }
453
454
0
    if (uprv_isInfinite(number)) {
455
0
        const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
456
0
        if (!rule) {
457
0
            rule = owner->getDefaultInfinityRule();
458
0
        }
459
0
        return rule;
460
0
    }
461
462
    // if the number isn't an integer, we use one of the fraction rules...
463
0
    if (number != uprv_floor(number)) {
464
        // if the number is between 0 and 1, return the proper
465
        // fraction rule
466
0
        if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
467
0
            return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
468
0
        }
469
        // otherwise, return the improper fraction rule
470
0
        else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
471
0
            return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
472
0
        }
473
0
    }
474
475
    // if there's a default rule, use it to format the number
476
0
    if (nonNumericalRules[DEFAULT_RULE_INDEX]) {
477
0
        return nonNumericalRules[DEFAULT_RULE_INDEX];
478
0
    }
479
480
    // and if we haven't yet returned a rule, use findNormalRule()
481
    // to find the applicable rule
482
0
    int64_t r = util64_fromDouble(number + 0.5);
483
0
    return findNormalRule(r);
484
0
}
485
486
const NFRule *
487
NFRuleSet::findNormalRule(int64_t number) const
488
0
{
489
    // if this is a fraction rule set, use findFractionRuleSetRule()
490
    // to find the rule (we should only go into this clause if the
491
    // value is 0)
492
0
    if (fIsFractionRuleSet) {
493
0
        return findFractionRuleSetRule((double)number);
494
0
    }
495
496
    // if the number is negative, return the negative-number rule
497
    // (if there isn't one, pretend the number is positive)
498
0
    if (number < 0) {
499
0
        if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
500
0
            return nonNumericalRules[NEGATIVE_RULE_INDEX];
501
0
        } else {
502
0
            number = -number;
503
0
        }
504
0
    }
505
506
    // we have to repeat the preceding two checks, even though we
507
    // do them in findRule(), because the version of format() that
508
    // takes a long bypasses findRule() and goes straight to this
509
    // function.  This function does skip the fraction rules since
510
    // we know the value is an integer (it also skips the default
511
    // rule, since it's considered a fraction rule.  Skipping the
512
    // default rule in this function is also how we avoid infinite
513
    // recursion)
514
515
    // {dlf} unfortunately this fails if there are no rules except
516
    // special rules.  If there are no rules, use the default rule.
517
518
    // binary-search the rule list for the applicable rule
519
    // (a rule is used for all values from its base value to
520
    // the next rule's base value)
521
0
    int32_t hi = rules.size();
522
0
    if (hi > 0) {
523
0
        int32_t lo = 0;
524
525
0
        while (lo < hi) {
526
0
            int32_t mid = (lo + hi) / 2;
527
0
            if (rules[mid]->getBaseValue() == number) {
528
0
                return rules[mid];
529
0
            }
530
0
            else if (rules[mid]->getBaseValue() > number) {
531
0
                hi = mid;
532
0
            }
533
0
            else {
534
0
                lo = mid + 1;
535
0
            }
536
0
        }
537
0
        if (hi == 0) { // bad rule set, minimum base > 0
538
0
            return NULL; // want to throw exception here
539
0
        }
540
541
0
        NFRule *result = rules[hi - 1];
542
543
        // use shouldRollBack() to see whether we need to invoke the
544
        // rollback rule (see shouldRollBack()'s documentation for
545
        // an explanation of the rollback rule).  If we do, roll back
546
        // one rule and return that one instead of the one we'd normally
547
        // return
548
0
        if (result->shouldRollBack(number)) {
549
0
            if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
550
0
                return NULL;
551
0
            }
552
0
            result = rules[hi - 2];
553
0
        }
554
0
        return result;
555
0
    }
556
    // else use the default rule
557
0
    return nonNumericalRules[DEFAULT_RULE_INDEX];
558
0
}
559
560
/**
561
 * If this rule is a fraction rule set, this function is used by
562
 * findRule() to select the most appropriate rule for formatting
563
 * the number.  Basically, the base value of each rule in the rule
564
 * set is treated as the denominator of a fraction.  Whichever
565
 * denominator can produce the fraction closest in value to the
566
 * number passed in is the result.  If there's a tie, the earlier
567
 * one in the list wins.  (If there are two rules in a row with the
568
 * same base value, the first one is used when the numerator of the
569
 * fraction would be 1, and the second rule is used the rest of the
570
 * time.
571
 * @param number The number being formatted (which will always be
572
 * a number between 0 and 1)
573
 * @return The rule to use to format this number
574
 */
575
const NFRule*
576
NFRuleSet::findFractionRuleSetRule(double number) const
577
0
{
578
    // the obvious way to do this (multiply the value being formatted
579
    // by each rule's base value until you get an integral result)
580
    // doesn't work because of rounding error.  This method is more
581
    // accurate
582
583
    // find the least common multiple of the rules' base values
584
    // and multiply this by the number being formatted.  This is
585
    // all the precision we need, and we can do all of the rest
586
    // of the math using integer arithmetic
587
0
    int64_t leastCommonMultiple = rules[0]->getBaseValue();
588
0
    int64_t numerator;
589
0
    {
590
0
        for (uint32_t i = 1; i < rules.size(); ++i) {
591
0
            leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
592
0
        }
593
0
        numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
594
0
    }
595
    // for each rule, do the following...
596
0
    int64_t tempDifference;
597
0
    int64_t difference = util64_fromDouble(uprv_maxMantissa());
598
0
    int32_t winner = 0;
599
0
    for (uint32_t i = 0; i < rules.size(); ++i) {
600
        // "numerator" is the numerator of the fraction if the
601
        // denominator is the LCD.  The numerator if the rule's
602
        // base value is the denominator is "numerator" times the
603
        // base value divided bythe LCD.  Here we check to see if
604
        // that's an integer, and if not, how close it is to being
605
        // an integer.
606
0
        tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
607
608
609
        // normalize the result of the above calculation: we want
610
        // the numerator's distance from the CLOSEST multiple
611
        // of the LCD
612
0
        if (leastCommonMultiple - tempDifference < tempDifference) {
613
0
            tempDifference = leastCommonMultiple - tempDifference;
614
0
        }
615
616
        // if this is as close as we've come, keep track of how close
617
        // that is, and the line number of the rule that did it.  If
618
        // we've scored a direct hit, we don't have to look at any more
619
        // rules
620
0
        if (tempDifference < difference) {
621
0
            difference = tempDifference;
622
0
            winner = i;
623
0
            if (difference == 0) {
624
0
                break;
625
0
            }
626
0
        }
627
0
    }
628
629
    // if we have two successive rules that both have the winning base
630
    // value, then the first one (the one we found above) is used if
631
    // the numerator of the fraction is 1 and the second one is used if
632
    // the numerator of the fraction is anything else (this lets us
633
    // do things like "one third"/"two thirds" without having to define
634
    // a whole bunch of extra rule sets)
635
0
    if ((unsigned)(winner + 1) < rules.size() &&
636
0
        rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
637
0
        double n = ((double)rules[winner]->getBaseValue()) * number;
638
0
        if (n < 0.5 || n >= 2) {
639
0
            ++winner;
640
0
        }
641
0
    }
642
643
    // finally, return the winning rule
644
0
    return rules[winner];
645
0
}
646
647
/**
648
 * Parses a string.  Matches the string to be parsed against each
649
 * of its rules (with a base value less than upperBound) and returns
650
 * the value produced by the rule that matched the most characters
651
 * in the source string.
652
 * @param text The string to parse
653
 * @param parsePosition The initial position is ignored and assumed
654
 * to be 0.  On exit, this object has been updated to point to the
655
 * first character position this rule set didn't consume.
656
 * @param upperBound Limits the rules that can be allowed to match.
657
 * Only rules whose base values are strictly less than upperBound
658
 * are considered.
659
 * @return The numerical result of parsing this string.  This will
660
 * be the matching rule's base value, composed appropriately with
661
 * the results of matching any of its substitutions.  The object
662
 * will be an instance of Long if it's an integral value; otherwise,
663
 * it will be an instance of Double.  This function always returns
664
 * a valid object: If nothing matched the input string at all,
665
 * this function returns new Long(0), and the parse position is
666
 * left unchanged.
667
 */
668
#ifdef RBNF_DEBUG
669
#include <stdio.h>
670
671
static void dumpUS(FILE* f, const UnicodeString& us) {
672
  int len = us.length();
673
  char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
674
  if (buf != NULL) {
675
    us.extract(0, len, buf);
676
    buf[len] = 0;
677
    fprintf(f, "%s", buf);
678
    uprv_free(buf); //delete[] buf;
679
  }
680
}
681
#endif
682
683
UBool
684
NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, Formattable& result) const
685
0
{
686
    // try matching each rule in the rule set against the text being
687
    // parsed.  Whichever one matches the most characters is the one
688
    // that determines the value we return.
689
690
0
    result.setLong(0);
691
692
    // dump out if there's no text to parse
693
0
    if (text.length() == 0) {
694
0
        return 0;
695
0
    }
696
697
0
    ParsePosition highWaterMark;
698
0
    ParsePosition workingPos = pos;
699
700
#ifdef RBNF_DEBUG
701
    fprintf(stderr, "<nfrs> %x '", this);
702
    dumpUS(stderr, name);
703
    fprintf(stderr, "' text '");
704
    dumpUS(stderr, text);
705
    fprintf(stderr, "'\n");
706
    fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
707
#endif
708
    // Try each of the negative rules, fraction rules, infinity rules and NaN rules
709
0
    for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
710
0
        if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) {
711
            // Mark this rule as being executed so that we don't try to execute it again.
712
0
            nonNumericalExecutedRuleMask |= 1 << i;
713
714
0
            Formattable tempResult;
715
0
            UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult);
716
0
            if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
717
0
                result = tempResult;
718
0
                highWaterMark = workingPos;
719
0
            }
720
0
            workingPos = pos;
721
0
        }
722
0
    }
723
#ifdef RBNF_DEBUG
724
    fprintf(stderr, "<nfrs> continue other with text '");
725
    dumpUS(stderr, text);
726
    fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
727
#endif
728
729
    // finally, go through the regular rules one at a time.  We start
730
    // at the end of the list because we want to try matching the most
731
    // sigificant rule first (this helps ensure that we parse
732
    // "five thousand three hundred six" as
733
    // "(five thousand) (three hundred) (six)" rather than
734
    // "((five thousand three) hundred) (six)").  Skip rules whose
735
    // base values are higher than the upper bound (again, this helps
736
    // limit ambiguity by making sure the rules that match a rule's
737
    // are less significant than the rule containing the substitutions)/
738
0
    {
739
0
        int64_t ub = util64_fromDouble(upperBound);
740
#ifdef RBNF_DEBUG
741
        {
742
            char ubstr[64];
743
            util64_toa(ub, ubstr, 64);
744
            char ubstrhex[64];
745
            util64_toa(ub, ubstrhex, 64, 16);
746
            fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
747
        }
748
#endif
749
0
        for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
750
0
            if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
751
0
                continue;
752
0
            }
753
0
            Formattable tempResult;
754
0
            UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult);
755
0
            if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
756
0
                result = tempResult;
757
0
                highWaterMark = workingPos;
758
0
            }
759
0
            workingPos = pos;
760
0
        }
761
0
    }
762
#ifdef RBNF_DEBUG
763
    fprintf(stderr, "<nfrs> exit\n");
764
#endif
765
    // finally, update the parse position we were passed to point to the
766
    // first character we didn't use, and return the result that
767
    // corresponds to that string of characters
768
0
    pos = highWaterMark;
769
770
0
    return 1;
771
0
}
772
773
void
774
NFRuleSet::appendRules(UnicodeString& result) const
775
0
{
776
0
    uint32_t i;
777
778
    // the rule set name goes first...
779
0
    result.append(name);
780
0
    result.append(gColon);
781
0
    result.append(gLineFeed);
782
783
    // followed by the regular rules...
784
0
    for (i = 0; i < rules.size(); i++) {
785
0
        rules[i]->_appendRuleText(result);
786
0
        result.append(gLineFeed);
787
0
    }
788
789
    // followed by the special rules (if they exist)
790
0
    for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
791
0
        NFRule *rule = nonNumericalRules[i];
792
0
        if (nonNumericalRules[i]) {
793
0
            if (rule->getBaseValue() == NFRule::kImproperFractionRule
794
0
                || rule->getBaseValue() == NFRule::kProperFractionRule
795
0
                || rule->getBaseValue() == NFRule::kDefaultRule)
796
0
            {
797
0
                for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
798
0
                    NFRule *fractionRule = fractionRules[fIdx];
799
0
                    if (fractionRule->getBaseValue() == rule->getBaseValue()) {
800
0
                        fractionRule->_appendRuleText(result);
801
0
                        result.append(gLineFeed);
802
0
                    }
803
0
                }
804
0
            }
805
0
            else {
806
0
                rule->_appendRuleText(result);
807
0
                result.append(gLineFeed);
808
0
            }
809
0
        }
810
0
    }
811
0
}
812
813
// utility functions
814
815
0
int64_t util64_fromDouble(double d) {
816
0
    int64_t result = 0;
817
0
    if (!uprv_isNaN(d)) {
818
0
        double mant = uprv_maxMantissa();
819
0
        if (d < -mant) {
820
0
            d = -mant;
821
0
        } else if (d > mant) {
822
0
            d = mant;
823
0
        }
824
0
        UBool neg = d < 0; 
825
0
        if (neg) {
826
0
            d = -d;
827
0
        }
828
0
        result = (int64_t)uprv_floor(d);
829
0
        if (neg) {
830
0
            result = -result;
831
0
        }
832
0
    }
833
0
    return result;
834
0
}
835
836
0
uint64_t util64_pow(uint32_t base, uint16_t exponent)  {
837
0
    if (base == 0) {
838
0
        return 0;
839
0
    }
840
0
    uint64_t result = 1;
841
0
    uint64_t pow = base;
842
0
    while (true) {
843
0
        if ((exponent & 1) == 1) {
844
0
            result *= pow;
845
0
        }
846
0
        exponent >>= 1;
847
0
        if (exponent == 0) {
848
0
            break;
849
0
        }
850
0
        pow *= pow;
851
0
    }
852
0
    return result;
853
0
}
854
855
static const uint8_t asciiDigits[] = { 
856
    0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
857
    0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
858
    0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
859
    0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
860
    0x77u, 0x78u, 0x79u, 0x7au,  
861
};
862
863
static const UChar kUMinus = (UChar)0x002d;
864
865
#ifdef RBNF_DEBUG
866
static const char kMinus = '-';
867
868
static const uint8_t digitInfo[] = {
869
        0,     0,     0,     0,     0,     0,     0,     0,
870
        0,     0,     0,     0,     0,     0,     0,     0,
871
        0,     0,     0,     0,     0,     0,     0,     0,
872
        0,     0,     0,     0,     0,     0,     0,     0,
873
        0,     0,     0,     0,     0,     0,     0,     0,
874
        0,     0,     0,     0,     0,     0,     0,     0,
875
    0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
876
    0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
877
        0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
878
    0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
879
    0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
880
    0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
881
        0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
882
    0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
883
    0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
884
    0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
885
};
886
887
int64_t util64_atoi(const char* str, uint32_t radix)
888
{
889
    if (radix > 36) {
890
        radix = 36;
891
    } else if (radix < 2) {
892
        radix = 2;
893
    }
894
    int64_t lradix = radix;
895
896
    int neg = 0;
897
    if (*str == kMinus) {
898
        ++str;
899
        neg = 1;
900
    }
901
    int64_t result = 0;
902
    uint8_t b;
903
    while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
904
        result *= lradix;
905
        result += (int32_t)b;
906
    }
907
    if (neg) {
908
        result = -result;
909
    }
910
    return result;
911
}
912
913
int64_t util64_utoi(const UChar* str, uint32_t radix)
914
{
915
    if (radix > 36) {
916
        radix = 36;
917
    } else if (radix < 2) {
918
        radix = 2;
919
    }
920
    int64_t lradix = radix;
921
922
    int neg = 0;
923
    if (*str == kUMinus) {
924
        ++str;
925
        neg = 1;
926
    }
927
    int64_t result = 0;
928
    UChar c;
929
    uint8_t b;
930
    while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
931
        result *= lradix;
932
        result += (int32_t)b;
933
    }
934
    if (neg) {
935
        result = -result;
936
    }
937
    return result;
938
}
939
940
uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
941
{    
942
    if (radix > 36) {
943
        radix = 36;
944
    } else if (radix < 2) {
945
        radix = 2;
946
    }
947
    int64_t base = radix;
948
949
    char* p = buf;
950
    if (len && (w < 0) && (radix == 10) && !raw) {
951
        w = -w;
952
        *p++ = kMinus;
953
        --len;
954
    } else if (len && (w == 0)) {
955
        *p++ = (char)raw ? 0 : asciiDigits[0];
956
        --len;
957
    }
958
959
    while (len && w != 0) {
960
        int64_t n = w / base;
961
        int64_t m = n * base;
962
        int32_t d = (int32_t)(w-m);
963
        *p++ = raw ? (char)d : asciiDigits[d];
964
        w = n;
965
        --len;
966
    }
967
    if (len) {
968
        *p = 0; // null terminate if room for caller convenience
969
    }
970
971
    len = p - buf;
972
    if (*buf == kMinus) {
973
        ++buf;
974
    }
975
    while (--p > buf) {
976
        char c = *p;
977
        *p = *buf;
978
        *buf = c;
979
        ++buf;
980
    }
981
982
    return len;
983
}
984
#endif
985
986
uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
987
0
{    
988
0
    if (radix > 36) {
989
0
        radix = 36;
990
0
    } else if (radix < 2) {
991
0
        radix = 2;
992
0
    }
993
0
    int64_t base = radix;
994
995
0
    UChar* p = buf;
996
0
    if (len && (w < 0) && (radix == 10) && !raw) {
997
0
        w = -w;
998
0
        *p++ = kUMinus;
999
0
        --len;
1000
0
    } else if (len && (w == 0)) {
1001
0
        *p++ = (UChar)raw ? 0 : asciiDigits[0];
1002
0
        --len;
1003
0
    }
1004
1005
0
    while (len && (w != 0)) {
1006
0
        int64_t n = w / base;
1007
0
        int64_t m = n * base;
1008
0
        int32_t d = (int32_t)(w-m);
1009
0
        *p++ = (UChar)(raw ? d : asciiDigits[d]);
1010
0
        w = n;
1011
0
        --len;
1012
0
    }
1013
0
    if (len) {
1014
0
        *p = 0; // null terminate if room for caller convenience
1015
0
    }
1016
1017
0
    len = (uint32_t)(p - buf);
1018
0
    if (*buf == kUMinus) {
1019
0
        ++buf;
1020
0
    }
1021
0
    while (--p > buf) {
1022
0
        UChar c = *p;
1023
0
        *p = *buf;
1024
0
        *buf = c;
1025
0
        ++buf;
1026
0
    }
1027
1028
0
    return len;
1029
0
}
1030
1031
1032
U_NAMESPACE_END
1033
1034
/* U_HAVE_RBNF */
1035
#endif