Coverage Report

Created: 2025-12-07 06:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/i18n/nfrs.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
*   Copyright (C) 1997-2015, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
******************************************************************************
8
*   file name:  nfrs.cpp
9
*   encoding:   UTF-8
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
* Modification history
14
* Date        Name      Comments
15
* 10/11/2001  Doug      Ported from ICU4J
16
*/
17
18
#include "nfrs.h"
19
20
#if U_HAVE_RBNF
21
22
#include "unicode/uchar.h"
23
#include "nfrule.h"
24
#include "nfrlist.h"
25
#include "patternprops.h"
26
#include "putilimp.h"
27
28
#ifdef RBNF_DEBUG
29
#include "cmemory.h"
30
#endif
31
32
enum {
33
    /** -x */
34
    NEGATIVE_RULE_INDEX = 0,
35
    /** x.x */
36
    IMPROPER_FRACTION_RULE_INDEX = 1,
37
    /** 0.x */
38
    PROPER_FRACTION_RULE_INDEX = 2,
39
    /** x.0 */
40
    DEFAULT_RULE_INDEX = 3,
41
    /** Inf */
42
    INFINITY_RULE_INDEX = 4,
43
    /** NaN */
44
    NAN_RULE_INDEX = 5,
45
    NON_NUMERICAL_RULE_LENGTH = 6
46
};
47
48
U_NAMESPACE_BEGIN
49
50
#if 0
51
// euclid's algorithm works with doubles
52
// note, doubles only get us up to one quadrillion or so, which
53
// isn't as much range as we get with longs.  We probably still
54
// want either 64-bit math, or BigInteger.
55
56
static int64_t
57
util_lcm(int64_t x, int64_t y)
58
{
59
    x.abs();
60
    y.abs();
61
62
    if (x == 0 || y == 0) {
63
        return 0;
64
    } else {
65
        do {
66
            if (x < y) {
67
                int64_t t = x; x = y; y = t;
68
            }
69
            x -= y * (x/y);
70
        } while (x != 0);
71
72
        return y;
73
    }
74
}
75
76
#else
77
/**
78
 * Calculates the least common multiple of x and y.
79
 */
80
static int64_t
81
util_lcm(int64_t x, int64_t y)
82
0
{
83
    // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
84
    // vol. 2, 1st ed., pp. 298-299
85
0
    int64_t x1 = x;
86
0
    int64_t y1 = y;
87
88
0
    int p2 = 0;
89
0
    while ((x1 & 1) == 0 && (y1 & 1) == 0) {
90
0
        ++p2;
91
0
        x1 >>= 1;
92
0
        y1 >>= 1;
93
0
    }
94
95
0
    int64_t t;
96
0
    if ((x1 & 1) == 1) {
97
0
        t = -y1;
98
0
    } else {
99
0
        t = x1;
100
0
    }
101
102
0
    while (t != 0) {
103
0
        while ((t & 1) == 0) {
104
0
            t = t >> 1;
105
0
        }
106
0
        if (t > 0) {
107
0
            x1 = t;
108
0
        } else {
109
0
            y1 = -t;
110
0
        }
111
0
        t = x1 - y1;
112
0
    }
113
114
0
    int64_t gcd = x1 << p2;
115
116
    // x * y == gcd(x, y) * lcm(x, y)
117
0
    return x / gcd * y;
118
0
}
119
#endif
120
121
static const char16_t gPercent = 0x0025;
122
static const char16_t gColon = 0x003a;
123
static const char16_t gSemicolon = 0x003b;
124
static const char16_t gLineFeed = 0x000a;
125
126
static const char16_t gPercentPercent[] =
127
{
128
    0x25, 0x25, 0
129
}; /* "%%" */
130
131
static const char16_t gNoparse[] =
132
{
133
    0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
134
}; /* "@noparse" */
135
136
NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
137
86.9k
  : rules(0)
138
86.9k
  , owner(_owner)
139
86.9k
  , fractionRules()
140
86.9k
{
141
608k
    for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
142
521k
        nonNumericalRules[i] = nullptr;
143
521k
    }
144
145
86.9k
    if (U_FAILURE(status)) {
146
0
        return;
147
0
    }
148
149
86.9k
    UnicodeString& description = descriptions[index]; // !!! make sure index is valid
150
151
86.9k
    if (description.isEmpty()) {
152
        // throw new IllegalArgumentException("Empty rule set description");
153
40
        status = U_PARSE_ERROR;
154
40
        return;
155
40
    }
156
157
    // if the description begins with a rule set name (the rule set
158
    // name can be omitted in formatter descriptions that consist
159
    // of only one rule set), copy it out into our "name" member
160
    // and delete it from the description
161
86.8k
    if (description.charAt(0) == gPercent) {
162
75.9k
        int32_t pos = description.indexOf(gColon);
163
        // if there are no name or the name is "%".
164
75.9k
        if (pos < 2) {
165
            // throw new IllegalArgumentException("Rule set name doesn't end in colon");
166
97
            status = U_PARSE_ERROR;
167
97
            return;
168
75.9k
        } else {
169
75.9k
            name.setTo(description, 0, pos);
170
78.8k
            while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
171
2.93k
            }
172
75.9k
            description.remove(0, pos);
173
75.9k
        }
174
75.9k
    } else {
175
10.8k
        name.setTo(UNICODE_STRING_SIMPLE("%default"));
176
10.8k
    }
177
178
86.7k
    if (description.isEmpty()) {
179
        // throw new IllegalArgumentException("Empty rule set description");
180
6
        status = U_PARSE_ERROR;
181
6
        return;
182
6
    }
183
184
86.7k
    fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
185
186
86.7k
    if (name.endsWith(gNoparse, 8)) {
187
0
        fIsParseable = false;
188
0
        name.truncate(name.length() - 8); // remove the @noparse from the name
189
0
    }
190
191
    // all of the other members of NFRuleSet are initialized
192
    // by parseRules()
193
86.7k
}
194
195
void
196
NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
197
84.9k
{
198
    // start by creating a Vector whose elements are Strings containing
199
    // the descriptions of the rules (one rule per element).  The rules
200
    // are separated by semicolons (there's no escape facility: ALL
201
    // semicolons are rule delimiters)
202
203
84.9k
    if (U_FAILURE(status)) {
204
0
        return;
205
0
    }
206
207
    // ensure we are starting with an empty rule list
208
84.9k
    rules.deleteAll();
209
210
    // dlf - the original code kept a separate description array for no reason,
211
    // so I got rid of it.  The loop was too complex so I simplified it.
212
213
84.9k
    UnicodeString currentDescription;
214
84.9k
    int32_t oldP = 0;
215
1.32M
    while (oldP < description.length()) {
216
1.24M
        int32_t p = description.indexOf(gSemicolon, oldP);
217
1.24M
        if (p == -1) {
218
10.0k
            p = description.length();
219
10.0k
        }
220
1.24M
        currentDescription.setTo(description, oldP, p - oldP);
221
1.24M
        NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
222
1.24M
        if (U_FAILURE(status)) {
223
1.09k
            return;
224
1.09k
        }
225
1.24M
        oldP = p + 1;
226
1.24M
    }
227
228
    // for rules that didn't specify a base value, their base values
229
    // were initialized to 0.  Make another pass through the list and
230
    // set all those rules' base values.  We also remove any special
231
    // rules from the list and put them into their own member variables
232
83.9k
    int64_t defaultBaseValue = 0;
233
234
    // (this isn't a for loop because we might be deleting items from
235
    // the vector-- we want to make sure we only increment i when
236
    // we _didn't_ delete anything from the vector)
237
83.9k
    int32_t rulesSize = rules.size();
238
1.51M
    for (int32_t i = 0; i < rulesSize; i++) {
239
1.43M
        NFRule* rule = rules[i];
240
1.43M
        int64_t baseValue = rule->getBaseValue();
241
242
1.43M
        if (baseValue == 0) {
243
            // if the rule's base value is 0, fill in a default
244
            // base value (this will be 1 plus the preceding
245
            // rule's base value for regular rule sets, and the
246
            // same as the preceding rule's base value in fraction
247
            // rule sets)
248
283k
            rule->setBaseValue(defaultBaseValue, status);
249
283k
            if (U_FAILURE(status)) {
250
0
                return;
251
0
            }
252
283k
        }
253
1.14M
        else {
254
            // if it's a regular rule that already knows its base value,
255
            // check to make sure the rules are in order, and update
256
            // the default base value for the next rule
257
1.14M
            if (baseValue < defaultBaseValue) {
258
                // throw new IllegalArgumentException("Rules are not in order");
259
65
                status = U_PARSE_ERROR;
260
65
                return;
261
65
            }
262
1.14M
            defaultBaseValue = baseValue;
263
1.14M
        }
264
1.43M
        if (!fIsFractionRuleSet) {
265
1.42M
            ++defaultBaseValue;
266
1.42M
        }
267
1.43M
    }
268
83.9k
}
269
270
/**
271
 * Set one of the non-numerical rules.
272
 * @param rule The rule to set.
273
 */
274
81.5k
void NFRuleSet::setNonNumericalRule(NFRule *rule) {
275
81.5k
    switch (rule->getBaseValue()) {
276
47.2k
        case NFRule::kNegativeNumberRule:
277
47.2k
            delete nonNumericalRules[NEGATIVE_RULE_INDEX];
278
47.2k
            nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
279
47.2k
            return;
280
27.3k
        case NFRule::kImproperFractionRule:
281
27.3k
            setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, true);
282
27.3k
            return;
283
1.71k
        case NFRule::kProperFractionRule:
284
1.71k
            setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, true);
285
1.71k
            return;
286
5.26k
        case NFRule::kDefaultRule:
287
5.26k
            setBestFractionRule(DEFAULT_RULE_INDEX, rule, true);
288
5.26k
            return;
289
0
        case NFRule::kInfinityRule:
290
0
            delete nonNumericalRules[INFINITY_RULE_INDEX];
291
0
            nonNumericalRules[INFINITY_RULE_INDEX] = rule;
292
0
            return;
293
0
        case NFRule::kNaNRule:
294
0
            delete nonNumericalRules[NAN_RULE_INDEX];
295
0
            nonNumericalRules[NAN_RULE_INDEX] = rule;
296
0
            return;
297
0
        case NFRule::kNoBase:
298
0
        case NFRule::kOtherRule:
299
0
        default:
300
            // If we do not remember the rule inside the object.
301
            // delete it here to prevent memory leak.
302
0
            delete rule;
303
0
            return;
304
81.5k
    }
305
81.5k
}
306
307
/**
308
 * Determine the best fraction rule to use. Rules matching the decimal point from
309
 * DecimalFormatSymbols become the main set of rules to use.
310
 * @param originalIndex The index into nonNumericalRules
311
 * @param newRule The new rule to consider
312
 * @param rememberRule Should the new rule be added to fractionRules.
313
 */
314
34.3k
void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
315
34.3k
    if (rememberRule) {
316
34.3k
        fractionRules.add(newRule);
317
34.3k
    }
318
34.3k
    NFRule *bestResult = nonNumericalRules[originalIndex];
319
34.3k
    if (bestResult == nullptr) {
320
20.7k
        nonNumericalRules[originalIndex] = newRule;
321
20.7k
    }
322
13.6k
    else {
323
        // We have more than one. Which one is better?
324
13.6k
        const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
325
13.6k
        if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
326
13.6k
            == newRule->getDecimalPoint())
327
195
        {
328
195
            nonNumericalRules[originalIndex] = newRule;
329
195
        }
330
        // else leave it alone
331
13.6k
    }
332
34.3k
}
333
334
NFRuleSet::~NFRuleSet()
335
86.8k
{
336
607k
    for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
337
521k
        if (i != IMPROPER_FRACTION_RULE_INDEX
338
434k
            && i != PROPER_FRACTION_RULE_INDEX
339
347k
            && i != DEFAULT_RULE_INDEX)
340
260k
        {
341
260k
            delete nonNumericalRules[i];
342
260k
        }
343
        // else it will be deleted via NFRuleList fractionRules
344
521k
    }
345
86.8k
}
346
347
static UBool
348
util_equalRules(const NFRule* rule1, const NFRule* rule2)
349
0
{
350
0
    if (rule1) {
351
0
        if (rule2) {
352
0
            return *rule1 == *rule2;
353
0
        }
354
0
    } else if (!rule2) {
355
0
        return true;
356
0
    }
357
0
    return false;
358
0
}
359
360
bool
361
NFRuleSet::operator==(const NFRuleSet& rhs) const
362
0
{
363
0
    if (rules.size() == rhs.rules.size() &&
364
0
        fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
365
0
        name == rhs.name) {
366
367
        // ...then compare the non-numerical rule lists...
368
0
        for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
369
0
            if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
370
0
                return false;
371
0
            }
372
0
        }
373
374
        // ...then compare the rule lists...
375
0
        for (uint32_t i = 0; i < rules.size(); ++i) {
376
0
            if (*rules[i] != *rhs.rules[i]) {
377
0
                return false;
378
0
            }
379
0
        }
380
0
        return true;
381
0
    }
382
0
    return false;
383
0
}
384
385
void
386
0
NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
387
0
    for (uint32_t i = 0; i < rules.size(); ++i) {
388
0
        rules[i]->setDecimalFormatSymbols(newSymbols, status);
389
0
    }
390
    // Switch the fraction rules to mirror the DecimalFormatSymbols.
391
0
    for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= DEFAULT_RULE_INDEX; nonNumericalIdx++) {
392
0
        if (nonNumericalRules[nonNumericalIdx]) {
393
0
            for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
394
0
                NFRule *fractionRule = fractionRules[fIdx];
395
0
                if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
396
0
                    setBestFractionRule(nonNumericalIdx, fractionRule, false);
397
0
                }
398
0
            }
399
0
        }
400
0
    }
401
402
0
    for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
403
0
        NFRule *rule = nonNumericalRules[nnrIdx];
404
0
        if (rule) {
405
0
            rule->setDecimalFormatSymbols(newSymbols, status);
406
0
        }
407
0
    }
408
0
}
409
410
7.76M
#define RECURSION_LIMIT 64
411
412
void
413
NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
414
929
{
415
929
    if (recursionCount >= RECURSION_LIMIT) {
416
        // stop recursion
417
0
        status = U_INVALID_STATE_ERROR;
418
0
        return;
419
0
    }
420
929
    const NFRule *rule = findNormalRule(number);
421
929
    if (rule) { // else error, but can't report it
422
929
        rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
423
929
    }
424
929
}
425
426
void
427
NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
428
0
{
429
0
    if (recursionCount >= RECURSION_LIMIT) {
430
        // stop recursion
431
0
        status = U_INVALID_STATE_ERROR;
432
0
        return;
433
0
    }
434
0
    const NFRule *rule = findDoubleRule(number);
435
0
    if (rule) { // else error, but can't report it
436
0
        rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
437
0
    }
438
0
}
439
440
const NFRule*
441
NFRuleSet::findDoubleRule(double number) const
442
0
{
443
    // if this is a fraction rule set, use findFractionRuleSetRule()
444
0
    if (isFractionRuleSet()) {
445
0
        return findFractionRuleSetRule(number);
446
0
    }
447
448
0
    if (uprv_isNaN(number)) {
449
0
        const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
450
0
        if (!rule) {
451
0
            rule = owner->getDefaultNaNRule();
452
0
        }
453
0
        return rule;
454
0
    }
455
456
    // if the number is negative, return the negative number rule
457
    // (if there isn't a negative-number rule, we pretend it's a
458
    // positive number)
459
0
    if (number < 0) {
460
0
        if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
461
0
            return  nonNumericalRules[NEGATIVE_RULE_INDEX];
462
0
        } else {
463
0
            number = -number;
464
0
        }
465
0
    }
466
467
0
    if (uprv_isInfinite(number)) {
468
0
        const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
469
0
        if (!rule) {
470
0
            rule = owner->getDefaultInfinityRule();
471
0
        }
472
0
        return rule;
473
0
    }
474
475
    // if the number isn't an integer, we use one of the fraction rules...
476
0
    if (number != uprv_floor(number)) {
477
        // if the number is between 0 and 1, return the proper
478
        // fraction rule
479
0
        if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
480
0
            return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
481
0
        }
482
        // otherwise, return the improper fraction rule
483
0
        else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
484
0
            return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
485
0
        }
486
0
    }
487
488
    // if there's a default rule, use it to format the number
489
0
    if (nonNumericalRules[DEFAULT_RULE_INDEX]) {
490
0
        return nonNumericalRules[DEFAULT_RULE_INDEX];
491
0
    }
492
493
    // and if we haven't yet returned a rule, use findNormalRule()
494
    // to find the applicable rule
495
0
    int64_t r = util64_fromDouble(number + 0.5);
496
0
    return findNormalRule(r);
497
0
}
498
499
const NFRule *
500
NFRuleSet::findNormalRule(int64_t number) const
501
929
{
502
    // if this is a fraction rule set, use findFractionRuleSetRule()
503
    // to find the rule (we should only go into this clause if the
504
    // value is 0)
505
929
    if (fIsFractionRuleSet) {
506
0
        return findFractionRuleSetRule(static_cast<double>(number));
507
0
    }
508
509
    // if the number is negative, return the negative-number rule
510
    // (if there isn't one, pretend the number is positive)
511
929
    if (number < 0) {
512
0
        if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
513
0
            return nonNumericalRules[NEGATIVE_RULE_INDEX];
514
0
        } else {
515
0
            number = -number;
516
0
        }
517
0
    }
518
519
    // we have to repeat the preceding two checks, even though we
520
    // do them in findRule(), because the version of format() that
521
    // takes a long bypasses findRule() and goes straight to this
522
    // function.  This function does skip the fraction rules since
523
    // we know the value is an integer (it also skips the default
524
    // rule, since it's considered a fraction rule.  Skipping the
525
    // default rule in this function is also how we avoid infinite
526
    // recursion)
527
528
    // {dlf} unfortunately this fails if there are no rules except
529
    // special rules.  If there are no rules, use the default rule.
530
531
    // binary-search the rule list for the applicable rule
532
    // (a rule is used for all values from its base value to
533
    // the next rule's base value)
534
929
    int32_t hi = rules.size();
535
929
    if (hi > 0) {
536
929
        int32_t lo = 0;
537
538
5.67k
        while (lo < hi) {
539
5.35k
            int32_t mid = (lo + hi) / 2;
540
5.35k
            if (rules[mid]->getBaseValue() == number) {
541
605
                return rules[mid];
542
605
            }
543
4.74k
            else if (rules[mid]->getBaseValue() > number) {
544
3.32k
                hi = mid;
545
3.32k
            }
546
1.42k
            else {
547
1.42k
                lo = mid + 1;
548
1.42k
            }
549
5.35k
        }
550
324
        if (hi == 0) { // bad rule set, minimum base > 0
551
0
            return nullptr; // want to throw exception here
552
0
        }
553
554
324
        NFRule *result = rules[hi - 1];
555
556
        // use shouldRollBack() to see whether we need to invoke the
557
        // rollback rule (see shouldRollBack()'s documentation for
558
        // an explanation of the rollback rule).  If we do, roll back
559
        // one rule and return that one instead of the one we'd normally
560
        // return
561
324
        if (result->shouldRollBack(number)) {
562
0
            if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
563
0
                return nullptr;
564
0
            }
565
0
            result = rules[hi - 2];
566
0
        }
567
324
        return result;
568
324
    }
569
    // else use the default rule
570
0
    return nonNumericalRules[DEFAULT_RULE_INDEX];
571
929
}
572
573
/**
574
 * If this rule is a fraction rule set, this function is used by
575
 * findRule() to select the most appropriate rule for formatting
576
 * the number.  Basically, the base value of each rule in the rule
577
 * set is treated as the denominator of a fraction.  Whichever
578
 * denominator can produce the fraction closest in value to the
579
 * number passed in is the result.  If there's a tie, the earlier
580
 * one in the list wins.  (If there are two rules in a row with the
581
 * same base value, the first one is used when the numerator of the
582
 * fraction would be 1, and the second rule is used the rest of the
583
 * time.
584
 * @param number The number being formatted (which will always be
585
 * a number between 0 and 1)
586
 * @return The rule to use to format this number
587
 */
588
const NFRule*
589
NFRuleSet::findFractionRuleSetRule(double number) const
590
0
{
591
    // the obvious way to do this (multiply the value being formatted
592
    // by each rule's base value until you get an integral result)
593
    // doesn't work because of rounding error.  This method is more
594
    // accurate
595
596
    // find the least common multiple of the rules' base values
597
    // and multiply this by the number being formatted.  This is
598
    // all the precision we need, and we can do all of the rest
599
    // of the math using integer arithmetic
600
0
    int64_t leastCommonMultiple = rules[0]->getBaseValue();
601
0
    if (leastCommonMultiple == 0) {
602
0
        return nullptr;
603
0
    }
604
0
    int64_t numerator;
605
0
    {
606
0
        for (uint32_t i = 1; i < rules.size(); ++i) {
607
0
            leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
608
0
        }
609
0
        numerator = util64_fromDouble(number * static_cast<double>(leastCommonMultiple) + 0.5);
610
0
    }
611
    // for each rule, do the following...
612
0
    int64_t tempDifference;
613
0
    int64_t difference = util64_fromDouble(uprv_maxMantissa());
614
0
    int32_t winner = 0;
615
0
    for (uint32_t i = 0; i < rules.size(); ++i) {
616
        // "numerator" is the numerator of the fraction if the
617
        // denominator is the LCD.  The numerator if the rule's
618
        // base value is the denominator is "numerator" times the
619
        // base value divided bythe LCD.  Here we check to see if
620
        // that's an integer, and if not, how close it is to being
621
        // an integer.
622
0
        tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
623
624
625
        // normalize the result of the above calculation: we want
626
        // the numerator's distance from the CLOSEST multiple
627
        // of the LCD
628
0
        if (leastCommonMultiple - tempDifference < tempDifference) {
629
0
            tempDifference = leastCommonMultiple - tempDifference;
630
0
        }
631
632
        // if this is as close as we've come, keep track of how close
633
        // that is, and the line number of the rule that did it.  If
634
        // we've scored a direct hit, we don't have to look at any more
635
        // rules
636
0
        if (tempDifference < difference) {
637
0
            difference = tempDifference;
638
0
            winner = i;
639
0
            if (difference == 0) {
640
0
                break;
641
0
            }
642
0
        }
643
0
    }
644
645
    // if we have two successive rules that both have the winning base
646
    // value, then the first one (the one we found above) is used if
647
    // the numerator of the fraction is 1 and the second one is used if
648
    // the numerator of the fraction is anything else (this lets us
649
    // do things like "one third"/"two thirds" without having to define
650
    // a whole bunch of extra rule sets)
651
0
    if (static_cast<unsigned>(winner + 1) < rules.size() &&
652
0
        rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
653
0
        double n = static_cast<double>(rules[winner]->getBaseValue()) * number;
654
0
        if (n < 0.5 || n >= 2) {
655
0
            ++winner;
656
0
        }
657
0
    }
658
659
    // finally, return the winning rule
660
0
    return rules[winner];
661
0
}
662
663
/**
664
 * Parses a string.  Matches the string to be parsed against each
665
 * of its rules (with a base value less than upperBound) and returns
666
 * the value produced by the rule that matched the most characters
667
 * in the source string.
668
 * @param text The string to parse
669
 * @param parsePosition The initial position is ignored and assumed
670
 * to be 0.  On exit, this object has been updated to point to the
671
 * first character position this rule set didn't consume.
672
 * @param upperBound Limits the rules that can be allowed to match.
673
 * Only rules whose base values are strictly less than upperBound
674
 * are considered.
675
 * @return The numerical result of parsing this string.  This will
676
 * be the matching rule's base value, composed appropriately with
677
 * the results of matching any of its substitutions.  The object
678
 * will be an instance of Long if it's an integral value; otherwise,
679
 * it will be an instance of Double.  This function always returns
680
 * a valid object: If nothing matched the input string at all,
681
 * this function returns new Long(0), and the parse position is
682
 * left unchanged.
683
 */
684
#ifdef RBNF_DEBUG
685
#include <stdio.h>
686
687
static void dumpUS(FILE* f, const UnicodeString& us) {
688
  int len = us.length();
689
  char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
690
  if (buf != nullptr) {
691
    us.extract(0, len, buf);
692
    buf[len] = 0;
693
    fprintf(f, "%s", buf);
694
    uprv_free(buf); //delete[] buf;
695
  }
696
}
697
#endif
698
699
UBool
700
NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, int32_t recursionCount, Formattable& result) const
701
7.76M
{
702
    // try matching each rule in the rule set against the text being
703
    // parsed.  Whichever one matches the most characters is the one
704
    // that determines the value we return.
705
706
7.76M
    result.setLong(0);
707
708
    // dump out if we've reached the recursion limit
709
7.76M
    if (recursionCount >= RECURSION_LIMIT) {
710
        // stop recursion
711
107k
        return false;
712
107k
    }
713
714
    // dump out if there's no text to parse
715
7.65M
    if (text.length() == 0) {
716
16.5k
        return 0;
717
16.5k
    }
718
719
7.63M
    ParsePosition highWaterMark;
720
7.63M
    ParsePosition workingPos = pos;
721
722
#ifdef RBNF_DEBUG
723
    fprintf(stderr, "<nfrs> %x '", this);
724
    dumpUS(stderr, name);
725
    fprintf(stderr, "' text '");
726
    dumpUS(stderr, text);
727
    fprintf(stderr, "'\n");
728
    fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
729
#endif
730
    // Try each of the negative rules, fraction rules, infinity rules and NaN rules
731
53.4M
    for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
732
45.8M
        if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) {
733
            // Mark this rule as being executed so that we don't try to execute it again.
734
2.39k
            nonNumericalExecutedRuleMask |= 1 << i;
735
736
2.39k
            Formattable tempResult;
737
2.39k
            UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult);
738
2.39k
            if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
739
245
                result = tempResult;
740
245
                highWaterMark = workingPos;
741
245
            }
742
2.39k
            workingPos = pos;
743
2.39k
        }
744
45.8M
    }
745
#ifdef RBNF_DEBUG
746
    fprintf(stderr, "<nfrs> continue other with text '");
747
    dumpUS(stderr, text);
748
    fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
749
#endif
750
751
    // finally, go through the regular rules one at a time.  We start
752
    // at the end of the list because we want to try matching the most
753
    // sigificant rule first (this helps ensure that we parse
754
    // "five thousand three hundred six" as
755
    // "(five thousand) (three hundred) (six)" rather than
756
    // "((five thousand three) hundred) (six)").  Skip rules whose
757
    // base values are higher than the upper bound (again, this helps
758
    // limit ambiguity by making sure the rules that match a rule's
759
    // are less significant than the rule containing the substitutions)/
760
7.63M
    {
761
7.63M
        int64_t ub = util64_fromDouble(upperBound);
762
#ifdef RBNF_DEBUG
763
        {
764
            char ubstr[64];
765
            util64_toa(ub, ubstr, 64);
766
            char ubstrhex[64];
767
            util64_toa(ub, ubstrhex, 64, 16);
768
            fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
769
        }
770
#endif
771
292M
        for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
772
284M
            if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
773
269M
                continue;
774
269M
            }
775
15.2M
            Formattable tempResult;
776
15.2M
            UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult);
777
15.2M
            if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
778
637k
                result = tempResult;
779
637k
                highWaterMark = workingPos;
780
637k
            }
781
15.2M
            workingPos = pos;
782
15.2M
        }
783
7.63M
    }
784
#ifdef RBNF_DEBUG
785
    fprintf(stderr, "<nfrs> exit\n");
786
#endif
787
    // finally, update the parse position we were passed to point to the
788
    // first character we didn't use, and return the result that
789
    // corresponds to that string of characters
790
7.63M
    pos = highWaterMark;
791
792
7.63M
    return 1;
793
7.65M
}
794
795
void
796
NFRuleSet::appendRules(UnicodeString& result) const
797
0
{
798
0
    uint32_t i;
799
800
    // the rule set name goes first...
801
0
    result.append(name);
802
0
    result.append(gColon);
803
0
    result.append(gLineFeed);
804
805
    // followed by the regular rules...
806
0
    for (i = 0; i < rules.size(); i++) {
807
0
        rules[i]->_appendRuleText(result);
808
0
        result.append(gLineFeed);
809
0
    }
810
811
    // followed by the special rules (if they exist)
812
0
    for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
813
0
        NFRule *rule = nonNumericalRules[i];
814
0
        if (nonNumericalRules[i]) {
815
0
            if (rule->getBaseValue() == NFRule::kImproperFractionRule
816
0
                || rule->getBaseValue() == NFRule::kProperFractionRule
817
0
                || rule->getBaseValue() == NFRule::kDefaultRule)
818
0
            {
819
0
                for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
820
0
                    NFRule *fractionRule = fractionRules[fIdx];
821
0
                    if (fractionRule->getBaseValue() == rule->getBaseValue()) {
822
0
                        fractionRule->_appendRuleText(result);
823
0
                        result.append(gLineFeed);
824
0
                    }
825
0
                }
826
0
            }
827
0
            else {
828
0
                rule->_appendRuleText(result);
829
0
                result.append(gLineFeed);
830
0
            }
831
0
        }
832
0
    }
833
0
}
834
835
// utility functions
836
837
7.64M
int64_t util64_fromDouble(double d) {
838
7.64M
    int64_t result = 0;
839
7.64M
    if (!uprv_isNaN(d)) {
840
7.64M
        double mant = uprv_maxMantissa();
841
7.64M
        if (d < -mant) {
842
0
            d = -mant;
843
7.64M
        } else if (d > mant) {
844
993
            d = mant;
845
993
        }
846
7.64M
        UBool neg = d < 0; 
847
7.64M
        if (neg) {
848
0
            d = -d;
849
0
        }
850
7.64M
        result = static_cast<int64_t>(uprv_floor(d));
851
7.64M
        if (neg) {
852
0
            result = -result;
853
0
        }
854
7.64M
    }
855
7.64M
    return result;
856
7.64M
}
857
858
2.34M
uint64_t util64_pow(uint32_t base, uint16_t exponent)  {
859
2.34M
    if (base == 0) {
860
0
        return 0;
861
0
    }
862
2.34M
    uint64_t result = 1;
863
2.34M
    uint64_t pow = base;
864
4.35M
    while (true) {
865
4.35M
        if ((exponent & 1) == 1) {
866
2.68M
            result *= pow;
867
2.68M
        }
868
4.35M
        exponent >>= 1;
869
4.35M
        if (exponent == 0) {
870
2.34M
            break;
871
2.34M
        }
872
2.01M
        pow *= pow;
873
2.01M
    }
874
2.34M
    return result;
875
2.34M
}
876
877
static const uint8_t asciiDigits[] = { 
878
    0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
879
    0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
880
    0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
881
    0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
882
    0x77u, 0x78u, 0x79u, 0x7au,  
883
};
884
885
static const char16_t kUMinus = static_cast<char16_t>(0x002d);
886
887
#ifdef RBNF_DEBUG
888
static const char kMinus = '-';
889
890
static const uint8_t digitInfo[] = {
891
        0,     0,     0,     0,     0,     0,     0,     0,
892
        0,     0,     0,     0,     0,     0,     0,     0,
893
        0,     0,     0,     0,     0,     0,     0,     0,
894
        0,     0,     0,     0,     0,     0,     0,     0,
895
        0,     0,     0,     0,     0,     0,     0,     0,
896
        0,     0,     0,     0,     0,     0,     0,     0,
897
    0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
898
    0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
899
        0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
900
    0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
901
    0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
902
    0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
903
        0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
904
    0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
905
    0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
906
    0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
907
};
908
909
int64_t util64_atoi(const char* str, uint32_t radix)
910
{
911
    if (radix > 36) {
912
        radix = 36;
913
    } else if (radix < 2) {
914
        radix = 2;
915
    }
916
    int64_t lradix = radix;
917
918
    int neg = 0;
919
    if (*str == kMinus) {
920
        ++str;
921
        neg = 1;
922
    }
923
    int64_t result = 0;
924
    uint8_t b;
925
    while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
926
        result *= lradix;
927
        result += (int32_t)b;
928
    }
929
    if (neg) {
930
        result = -result;
931
    }
932
    return result;
933
}
934
935
int64_t util64_utoi(const char16_t* str, uint32_t radix)
936
{
937
    if (radix > 36) {
938
        radix = 36;
939
    } else if (radix < 2) {
940
        radix = 2;
941
    }
942
    int64_t lradix = radix;
943
944
    int neg = 0;
945
    if (*str == kUMinus) {
946
        ++str;
947
        neg = 1;
948
    }
949
    int64_t result = 0;
950
    char16_t c;
951
    uint8_t b;
952
    while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
953
        result *= lradix;
954
        result += (int32_t)b;
955
    }
956
    if (neg) {
957
        result = -result;
958
    }
959
    return result;
960
}
961
962
uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
963
{    
964
    if (radix > 36) {
965
        radix = 36;
966
    } else if (radix < 2) {
967
        radix = 2;
968
    }
969
    int64_t base = radix;
970
971
    char* p = buf;
972
    if (len && (w < 0) && (radix == 10) && !raw) {
973
        w = -w;
974
        *p++ = kMinus;
975
        --len;
976
    } else if (len && (w == 0)) {
977
        *p++ = (char)raw ? 0 : asciiDigits[0];
978
        --len;
979
    }
980
981
    while (len && w != 0) {
982
        int64_t n = w / base;
983
        int64_t m = n * base;
984
        int32_t d = (int32_t)(w-m);
985
        *p++ = raw ? (char)d : asciiDigits[d];
986
        w = n;
987
        --len;
988
    }
989
    if (len) {
990
        *p = 0; // null terminate if room for caller convenience
991
    }
992
993
    len = p - buf;
994
    if (*buf == kMinus) {
995
        ++buf;
996
    }
997
    while (--p > buf) {
998
        char c = *p;
999
        *p = *buf;
1000
        *buf = c;
1001
        ++buf;
1002
    }
1003
1004
    return len;
1005
}
1006
#endif
1007
1008
uint32_t util64_tou(int64_t w, char16_t* buf, uint32_t len, uint32_t radix, UBool raw)
1009
0
{    
1010
0
    if (radix > 36) {
1011
0
        radix = 36;
1012
0
    } else if (radix < 2) {
1013
0
        radix = 2;
1014
0
    }
1015
0
    int64_t base = radix;
1016
1017
0
    char16_t* p = buf;
1018
0
    if (len && (w < 0) && (radix == 10) && !raw) {
1019
0
        w = -w;
1020
0
        *p++ = kUMinus;
1021
0
        --len;
1022
0
    } else if (len && (w == 0)) {
1023
0
        *p++ = static_cast<char16_t>(raw) ? 0 : asciiDigits[0];
1024
0
        --len;
1025
0
    }
1026
1027
0
    while (len && (w != 0)) {
1028
0
        int64_t n = w / base;
1029
0
        int64_t m = n * base;
1030
0
        int32_t d = static_cast<int32_t>(w - m);
1031
0
        *p++ = static_cast<char16_t>(raw ? d : asciiDigits[d]);
1032
0
        w = n;
1033
0
        --len;
1034
0
    }
1035
0
    if (len) {
1036
0
        *p = 0; // null terminate if room for caller convenience
1037
0
    }
1038
1039
0
    len = static_cast<uint32_t>(p - buf);
1040
0
    if (*buf == kUMinus) {
1041
0
        ++buf;
1042
0
    }
1043
0
    while (--p > buf) {
1044
0
        char16_t c = *p;
1045
0
        *p = *buf;
1046
0
        *buf = c;
1047
0
        ++buf;
1048
0
    }
1049
1050
0
    return len;
1051
0
}
1052
1053
1054
U_NAMESPACE_END
1055
1056
/* U_HAVE_RBNF */
1057
#endif