Coverage Report

Created: 2026-05-06 06:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/i18n/nfrs.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
*   Copyright (C) 1997-2015, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
******************************************************************************
8
*   file name:  nfrs.cpp
9
*   encoding:   UTF-8
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
* Modification history
14
* Date        Name      Comments
15
* 10/11/2001  Doug      Ported from ICU4J
16
*/
17
18
#include "nfrs.h"
19
20
#if U_HAVE_RBNF
21
22
#include "unicode/uchar.h"
23
#include "nfrule.h"
24
#include "nfrlist.h"
25
#include "patternprops.h"
26
#include "putilimp.h"
27
28
#ifdef RBNF_DEBUG
29
#include "cmemory.h"
30
#endif
31
32
enum {
33
    /** -x */
34
    NEGATIVE_RULE_INDEX = 0,
35
    /** x.x */
36
    IMPROPER_FRACTION_RULE_INDEX = 1,
37
    /** 0.x */
38
    PROPER_FRACTION_RULE_INDEX = 2,
39
    /** x.0 */
40
    DEFAULT_RULE_INDEX = 3,
41
    /** Inf */
42
    INFINITY_RULE_INDEX = 4,
43
    /** NaN */
44
    NAN_RULE_INDEX = 5,
45
    NON_NUMERICAL_RULE_LENGTH = 6
46
};
47
48
U_NAMESPACE_BEGIN
49
50
#if 0
51
// euclid's algorithm works with doubles
52
// note, doubles only get us up to one quadrillion or so, which
53
// isn't as much range as we get with longs.  We probably still
54
// want either 64-bit math, or BigInteger.
55
56
static int64_t
57
util_lcm(int64_t x, int64_t y)
58
{
59
    x.abs();
60
    y.abs();
61
62
    if (x == 0 || y == 0) {
63
        return 0;
64
    } else {
65
        do {
66
            if (x < y) {
67
                int64_t t = x; x = y; y = t;
68
            }
69
            x -= y * (x/y);
70
        } while (x != 0);
71
72
        return y;
73
    }
74
}
75
76
#else
77
/**
78
 * Calculates the least common multiple of x and y.
79
 */
80
static int64_t
81
util_lcm(int64_t x, int64_t y)
82
0
{
83
    // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
84
    // vol. 2, 1st ed., pp. 298-299
85
0
    int64_t x1 = x;
86
0
    int64_t y1 = y;
87
88
0
    int p2 = 0;
89
0
    while ((x1 & 1) == 0 && (y1 & 1) == 0) {
90
0
        ++p2;
91
0
        x1 >>= 1;
92
0
        y1 >>= 1;
93
0
    }
94
95
0
    int64_t t;
96
0
    if ((x1 & 1) == 1) {
97
0
        t = -y1;
98
0
    } else {
99
0
        t = x1;
100
0
    }
101
102
0
    while (t != 0) {
103
0
        while ((t & 1) == 0) {
104
0
            t = t >> 1;
105
0
        }
106
0
        if (t > 0) {
107
0
            x1 = t;
108
0
        } else {
109
0
            y1 = -t;
110
0
        }
111
0
        t = x1 - y1;
112
0
    }
113
114
0
    int64_t gcd = x1 << p2;
115
116
    // x * y == gcd(x, y) * lcm(x, y)
117
0
    return x / gcd * y;
118
0
}
119
#endif
120
121
static const char16_t gPercent = 0x0025;
122
static const char16_t gColon = 0x003a;
123
static const char16_t gSemicolon = 0x003b;
124
static const char16_t gLineFeed = 0x000a;
125
126
static const char16_t gPercentPercent[] =
127
{
128
    0x25, 0x25, 0
129
}; /* "%%" */
130
131
static const char16_t gNoparse[] =
132
{
133
    0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
134
}; /* "@noparse" */
135
136
NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
137
128k
  : rules(0)
138
128k
  , owner(_owner)
139
128k
  , fractionRules()
140
128k
{
141
896k
    for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
142
768k
        nonNumericalRules[i] = nullptr;
143
768k
    }
144
145
128k
    if (U_FAILURE(status)) {
146
0
        return;
147
0
    }
148
149
128k
    UnicodeString& description = descriptions[index]; // !!! make sure index is valid
150
151
128k
    if (description.isEmpty()) {
152
        // throw new IllegalArgumentException("Empty rule set description");
153
36
        status = U_PARSE_ERROR;
154
36
        return;
155
36
    }
156
157
    // if the description begins with a rule set name (the rule set
158
    // name can be omitted in formatter descriptions that consist
159
    // of only one rule set), copy it out into our "name" member
160
    // and delete it from the description
161
128k
    if (description.charAt(0) == gPercent) {
162
116k
        int32_t pos = description.indexOf(gColon);
163
        // if there are no name or the name is "%".
164
116k
        if (pos < 2) {
165
            // throw new IllegalArgumentException("Rule set name doesn't end in colon");
166
95
            status = U_PARSE_ERROR;
167
95
            return;
168
116k
        } else {
169
116k
            name.setTo(description, 0, pos);
170
118k
            while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
171
2.06k
            }
172
116k
            description.remove(0, pos);
173
116k
        }
174
116k
    } else {
175
11.5k
        name.setTo(UNICODE_STRING_SIMPLE("%default"));
176
11.5k
    }
177
178
127k
    if (description.isEmpty()) {
179
        // throw new IllegalArgumentException("Empty rule set description");
180
10
        status = U_PARSE_ERROR;
181
10
        return;
182
10
    }
183
184
127k
    fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
185
186
127k
    if (name.endsWith(gNoparse, 8)) {
187
0
        fIsParseable = false;
188
0
        name.truncate(name.length() - 8); // remove the @noparse from the name
189
0
    }
190
191
    // all of the other members of NFRuleSet are initialized
192
    // by parseRules()
193
127k
}
194
195
void
196
NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
197
124k
{
198
    // start by creating a Vector whose elements are Strings containing
199
    // the descriptions of the rules (one rule per element).  The rules
200
    // are separated by semicolons (there's no escape facility: ALL
201
    // semicolons are rule delimiters)
202
203
124k
    if (U_FAILURE(status)) {
204
0
        return;
205
0
    }
206
207
    // ensure we are starting with an empty rule list
208
124k
    rules.deleteAll();
209
210
    // dlf - the original code kept a separate description array for no reason,
211
    // so I got rid of it.  The loop was too complex so I simplified it.
212
213
124k
    UnicodeString currentDescription;
214
124k
    int32_t oldP = 0;
215
2.08M
    while (oldP < description.length()) {
216
1.96M
        int32_t p = description.indexOf(gSemicolon, oldP);
217
1.96M
        if (p == -1) {
218
10.7k
            p = description.length();
219
10.7k
        }
220
1.96M
        currentDescription.setTo(description, oldP, p - oldP);
221
1.96M
        NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
222
1.96M
        if (U_FAILURE(status)) {
223
1.11k
            return;
224
1.11k
        }
225
1.96M
        oldP = p + 1;
226
1.96M
    }
227
228
    // for rules that didn't specify a base value, their base values
229
    // were initialized to 0.  Make another pass through the list and
230
    // set all those rules' base values.  We also remove any special
231
    // rules from the list and put them into their own member variables
232
123k
    int64_t defaultBaseValue = 0;
233
234
    // (this isn't a for loop because we might be deleting items from
235
    // the vector-- we want to make sure we only increment i when
236
    // we _didn't_ delete anything from the vector)
237
123k
    int32_t rulesSize = rules.size();
238
2.61M
    for (int32_t i = 0; i < rulesSize; i++) {
239
2.49M
        NFRule* rule = rules[i];
240
2.49M
        int64_t baseValue = rule->getBaseValue();
241
242
2.49M
        if (baseValue == 0) {
243
            // if the rule's base value is 0, fill in a default
244
            // base value (this will be 1 plus the preceding
245
            // rule's base value for regular rule sets, and the
246
            // same as the preceding rule's base value in fraction
247
            // rule sets)
248
465k
            rule->setBaseValue(defaultBaseValue, status);
249
465k
            if (U_FAILURE(status)) {
250
0
                return;
251
0
            }
252
465k
        }
253
2.02M
        else {
254
            // if it's a regular rule that already knows its base value,
255
            // check to make sure the rules are in order, and update
256
            // the default base value for the next rule
257
2.02M
            if (baseValue < defaultBaseValue) {
258
                // throw new IllegalArgumentException("Rules are not in order");
259
61
                status = U_PARSE_ERROR;
260
61
                return;
261
61
            }
262
2.02M
            defaultBaseValue = baseValue;
263
2.02M
        }
264
2.49M
        if (!fIsFractionRuleSet) {
265
2.49M
            ++defaultBaseValue;
266
2.49M
        }
267
2.49M
    }
268
123k
}
269
270
/**
271
 * Set one of the non-numerical rules.
272
 * @param rule The rule to set.
273
 */
274
116k
void NFRuleSet::setNonNumericalRule(NFRule *rule) {
275
116k
    switch (rule->getBaseValue()) {
276
73.1k
        case NFRule::kNegativeNumberRule:
277
73.1k
            delete nonNumericalRules[NEGATIVE_RULE_INDEX];
278
73.1k
            nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
279
73.1k
            return;
280
34.1k
        case NFRule::kImproperFractionRule:
281
34.1k
            setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, true);
282
34.1k
            return;
283
2.21k
        case NFRule::kProperFractionRule:
284
2.21k
            setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, true);
285
2.21k
            return;
286
7.16k
        case NFRule::kDefaultRule:
287
7.16k
            setBestFractionRule(DEFAULT_RULE_INDEX, rule, true);
288
7.16k
            return;
289
0
        case NFRule::kInfinityRule:
290
0
            delete nonNumericalRules[INFINITY_RULE_INDEX];
291
0
            nonNumericalRules[INFINITY_RULE_INDEX] = rule;
292
0
            return;
293
0
        case NFRule::kNaNRule:
294
0
            delete nonNumericalRules[NAN_RULE_INDEX];
295
0
            nonNumericalRules[NAN_RULE_INDEX] = rule;
296
0
            return;
297
0
        case NFRule::kNoBase:
298
0
        case NFRule::kOtherRule:
299
0
        default:
300
            // If we do not remember the rule inside the object.
301
            // delete it here to prevent memory leak.
302
0
            delete rule;
303
0
            return;
304
116k
    }
305
116k
}
306
307
/**
308
 * Determine the best fraction rule to use. Rules matching the decimal point from
309
 * DecimalFormatSymbols become the main set of rules to use.
310
 * @param originalIndex The index into nonNumericalRules
311
 * @param newRule The new rule to consider
312
 * @param rememberRule Should the new rule be added to fractionRules.
313
 */
314
43.5k
void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
315
43.5k
    if (rememberRule) {
316
43.5k
        fractionRules.add(newRule);
317
43.5k
    }
318
43.5k
    NFRule *bestResult = nonNumericalRules[originalIndex];
319
43.5k
    if (bestResult == nullptr) {
320
30.6k
        nonNumericalRules[originalIndex] = newRule;
321
30.6k
    }
322
12.9k
    else {
323
        // We have more than one. Which one is better?
324
12.9k
        const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
325
12.9k
        if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
326
12.9k
            == newRule->getDecimalPoint())
327
1.68k
        {
328
1.68k
            nonNumericalRules[originalIndex] = newRule;
329
1.68k
        }
330
        // else leave it alone
331
12.9k
    }
332
43.5k
}
333
334
NFRuleSet::~NFRuleSet()
335
128k
{
336
896k
    for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
337
768k
        if (i != IMPROPER_FRACTION_RULE_INDEX
338
640k
            && i != PROPER_FRACTION_RULE_INDEX
339
512k
            && i != DEFAULT_RULE_INDEX)
340
384k
        {
341
384k
            delete nonNumericalRules[i];
342
384k
        }
343
        // else it will be deleted via NFRuleList fractionRules
344
768k
    }
345
128k
}
346
347
static UBool
348
util_equalRules(const NFRule* rule1, const NFRule* rule2)
349
0
{
350
0
    if (rule1) {
351
0
        if (rule2) {
352
0
            return *rule1 == *rule2;
353
0
        }
354
0
    } else if (!rule2) {
355
0
        return true;
356
0
    }
357
0
    return false;
358
0
}
359
360
bool
361
NFRuleSet::operator==(const NFRuleSet& rhs) const
362
0
{
363
0
    if (rules.size() == rhs.rules.size() &&
364
0
        fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
365
0
        name == rhs.name) {
366
367
        // ...then compare the non-numerical rule lists...
368
0
        for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
369
0
            if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
370
0
                return false;
371
0
            }
372
0
        }
373
374
        // ...then compare the rule lists...
375
0
        for (uint32_t i = 0; i < rules.size(); ++i) {
376
0
            if (*rules[i] != *rhs.rules[i]) {
377
0
                return false;
378
0
            }
379
0
        }
380
0
        return true;
381
0
    }
382
0
    return false;
383
0
}
384
385
void
386
0
NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
387
0
    for (uint32_t i = 0; i < rules.size(); ++i) {
388
0
        rules[i]->setDecimalFormatSymbols(newSymbols, status);
389
0
    }
390
    // Switch the fraction rules to mirror the DecimalFormatSymbols.
391
0
    for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= DEFAULT_RULE_INDEX; nonNumericalIdx++) {
392
0
        if (nonNumericalRules[nonNumericalIdx]) {
393
0
            for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
394
0
                NFRule *fractionRule = fractionRules[fIdx];
395
0
                if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
396
0
                    setBestFractionRule(nonNumericalIdx, fractionRule, false);
397
0
                }
398
0
            }
399
0
        }
400
0
    }
401
402
0
    for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
403
0
        NFRule *rule = nonNumericalRules[nnrIdx];
404
0
        if (rule) {
405
0
            rule->setDecimalFormatSymbols(newSymbols, status);
406
0
        }
407
0
    }
408
0
}
409
410
8.61M
#define RECURSION_LIMIT 64
411
412
void
413
NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
414
1.31k
{
415
1.31k
    if (recursionCount >= RECURSION_LIMIT) {
416
        // stop recursion
417
0
        status = U_INVALID_STATE_ERROR;
418
0
        return;
419
0
    }
420
1.31k
    const NFRule *rule = findNormalRule(number);
421
1.31k
    if (rule) { // else error, but can't report it
422
1.31k
        rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
423
1.31k
    }
424
1.31k
}
425
426
void
427
NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
428
0
{
429
0
    if (recursionCount >= RECURSION_LIMIT) {
430
        // stop recursion
431
0
        status = U_INVALID_STATE_ERROR;
432
0
        return;
433
0
    }
434
0
    const NFRule *rule = findDoubleRule(number);
435
0
    if (rule) { // else error, but can't report it
436
0
        rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
437
0
    }
438
0
}
439
440
const NFRule*
441
NFRuleSet::findDoubleRule(double number) const
442
0
{
443
    // if this is a fraction rule set, use findFractionRuleSetRule()
444
0
    if (isFractionRuleSet()) {
445
0
        return findFractionRuleSetRule(number);
446
0
    }
447
448
0
    if (uprv_isNaN(number)) {
449
0
        const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
450
0
        if (!rule) {
451
0
            rule = owner->getDefaultNaNRule();
452
0
        }
453
0
        return rule;
454
0
    }
455
456
    // if the number is negative, return the negative number rule
457
    // (if there isn't a negative-number rule, we pretend it's a
458
    // positive number)
459
0
    if (number < 0) {
460
0
        if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
461
0
            return  nonNumericalRules[NEGATIVE_RULE_INDEX];
462
0
        } else {
463
0
            number = -number;
464
0
        }
465
0
    }
466
467
0
    if (uprv_isInfinite(number)) {
468
0
        const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
469
0
        if (!rule) {
470
0
            rule = owner->getDefaultInfinityRule();
471
0
        }
472
0
        return rule;
473
0
    }
474
475
    // if the number isn't an integer, we use one of the fraction rules...
476
0
    if (number != uprv_floor(number)) {
477
        // if the number is between 0 and 1, return the proper
478
        // fraction rule
479
0
        if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
480
0
            return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
481
0
        }
482
        // otherwise, return the improper fraction rule
483
0
        else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
484
0
            return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
485
0
        }
486
0
    }
487
488
    // if there's a default rule, use it to format the number
489
0
    if (nonNumericalRules[DEFAULT_RULE_INDEX]) {
490
0
        return nonNumericalRules[DEFAULT_RULE_INDEX];
491
0
    }
492
493
    // and if we haven't yet returned a rule, use findNormalRule()
494
    // to find the applicable rule
495
0
    int64_t r = util64_fromDouble(number + 0.5);
496
0
    return findNormalRule(r);
497
0
}
498
499
const NFRule *
500
NFRuleSet::findNormalRule(int64_t number) const
501
1.31k
{
502
    // if this is a fraction rule set, use findFractionRuleSetRule()
503
    // to find the rule (we should only go into this clause if the
504
    // value is 0)
505
1.31k
    if (fIsFractionRuleSet) {
506
0
        return findFractionRuleSetRule(static_cast<double>(number));
507
0
    }
508
509
    // if the number is negative, return the negative-number rule
510
    // (if there isn't one, pretend the number is positive)
511
1.31k
    if (number < 0) {
512
0
        if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
513
0
            return nonNumericalRules[NEGATIVE_RULE_INDEX];
514
0
        } else {
515
0
            number = -number;
516
0
        }
517
0
    }
518
519
    // we have to repeat the preceding two checks, even though we
520
    // do them in findRule(), because the version of format() that
521
    // takes a long bypasses findRule() and goes straight to this
522
    // function.  This function does skip the fraction rules since
523
    // we know the value is an integer (it also skips the default
524
    // rule, since it's considered a fraction rule.  Skipping the
525
    // default rule in this function is also how we avoid infinite
526
    // recursion)
527
528
    // {dlf} unfortunately this fails if there are no rules except
529
    // special rules.  If there are no rules, use the default rule.
530
531
    // binary-search the rule list for the applicable rule
532
    // (a rule is used for all values from its base value to
533
    // the next rule's base value)
534
1.31k
    int32_t hi = rules.size();
535
1.31k
    if (hi > 0) {
536
1.31k
        int32_t lo = 0;
537
538
8.03k
        while (lo < hi) {
539
7.59k
            int32_t mid = (lo + hi) / 2;
540
7.59k
            if (rules[mid]->getBaseValue() == number) {
541
883
                return rules[mid];
542
883
            }
543
6.71k
            else if (rules[mid]->getBaseValue() > number) {
544
4.69k
                hi = mid;
545
4.69k
            }
546
2.01k
            else {
547
2.01k
                lo = mid + 1;
548
2.01k
            }
549
7.59k
        }
550
436
        if (hi == 0) { // bad rule set, minimum base > 0
551
0
            return nullptr; // want to throw exception here
552
0
        }
553
554
436
        NFRule *result = rules[hi - 1];
555
556
        // use shouldRollBack() to see whether we need to invoke the
557
        // rollback rule (see shouldRollBack()'s documentation for
558
        // an explanation of the rollback rule).  If we do, roll back
559
        // one rule and return that one instead of the one we'd normally
560
        // return
561
436
        if (result->shouldRollBack(number)) {
562
0
            if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
563
0
                return nullptr;
564
0
            }
565
0
            result = rules[hi - 2];
566
0
        }
567
436
        return result;
568
436
    }
569
    // else use the default rule
570
0
    return nonNumericalRules[DEFAULT_RULE_INDEX];
571
1.31k
}
572
573
/**
574
 * If this rule is a fraction rule set, this function is used by
575
 * findRule() to select the most appropriate rule for formatting
576
 * the number.  Basically, the base value of each rule in the rule
577
 * set is treated as the denominator of a fraction.  Whichever
578
 * denominator can produce the fraction closest in value to the
579
 * number passed in is the result.  If there's a tie, the earlier
580
 * one in the list wins.  (If there are two rules in a row with the
581
 * same base value, the first one is used when the numerator of the
582
 * fraction would be 1, and the second rule is used the rest of the
583
 * time.
584
 * @param number The number being formatted (which will always be
585
 * a number between 0 and 1)
586
 * @return The rule to use to format this number
587
 */
588
const NFRule*
589
NFRuleSet::findFractionRuleSetRule(double number) const
590
0
{
591
    // the obvious way to do this (multiply the value being formatted
592
    // by each rule's base value until you get an integral result)
593
    // doesn't work because of rounding error.  This method is more
594
    // accurate
595
596
    // find the least common multiple of the rules' base values
597
    // and multiply this by the number being formatted.  This is
598
    // all the precision we need, and we can do all of the rest
599
    // of the math using integer arithmetic
600
0
    int64_t leastCommonMultiple = rules[0]->getBaseValue();
601
0
    if (leastCommonMultiple == 0) {
602
0
        return nullptr;
603
0
    }
604
0
    int64_t numerator;
605
0
    {
606
0
        for (uint32_t i = 1; i < rules.size(); ++i) {
607
0
            leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
608
0
        }
609
0
        numerator = util64_fromDouble(number * static_cast<double>(leastCommonMultiple) + 0.5);
610
0
    }
611
    // for each rule, do the following...
612
0
    int64_t tempDifference;
613
0
    int64_t difference = util64_fromDouble(uprv_maxMantissa());
614
0
    int32_t winner = 0;
615
0
    for (uint32_t i = 0; i < rules.size(); ++i) {
616
        // "numerator" is the numerator of the fraction if the
617
        // denominator is the LCD.  The numerator if the rule's
618
        // base value is the denominator is "numerator" times the
619
        // base value divided bythe LCD.  Here we check to see if
620
        // that's an integer, and if not, how close it is to being
621
        // an integer.
622
0
        tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
623
624
625
        // normalize the result of the above calculation: we want
626
        // the numerator's distance from the CLOSEST multiple
627
        // of the LCD
628
0
        if (leastCommonMultiple - tempDifference < tempDifference) {
629
0
            tempDifference = leastCommonMultiple - tempDifference;
630
0
        }
631
632
        // if this is as close as we've come, keep track of how close
633
        // that is, and the line number of the rule that did it.  If
634
        // we've scored a direct hit, we don't have to look at any more
635
        // rules
636
0
        if (tempDifference < difference) {
637
0
            difference = tempDifference;
638
0
            winner = i;
639
0
            if (difference == 0) {
640
0
                break;
641
0
            }
642
0
        }
643
0
    }
644
645
    // if we have two successive rules that both have the winning base
646
    // value, then the first one (the one we found above) is used if
647
    // the numerator of the fraction is 1 and the second one is used if
648
    // the numerator of the fraction is anything else (this lets us
649
    // do things like "one third"/"two thirds" without having to define
650
    // a whole bunch of extra rule sets)
651
0
    if (static_cast<unsigned>(winner + 1) < rules.size() &&
652
0
        rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
653
0
        double n = static_cast<double>(rules[winner]->getBaseValue()) * number;
654
0
        if (n < 0.5 || n >= 2) {
655
0
            ++winner;
656
0
        }
657
0
    }
658
659
    // finally, return the winning rule
660
0
    return rules[winner];
661
0
}
662
663
/**
664
 * Parses a string.  Matches the string to be parsed against each
665
 * of its rules (with a base value less than upperBound) and returns
666
 * the value produced by the rule that matched the most characters
667
 * in the source string.
668
 * @param text The string to parse
669
 * @param parsePosition The initial position is ignored and assumed
670
 * to be 0.  On exit, this object has been updated to point to the
671
 * first character position this rule set didn't consume.
672
 * @param upperBound Limits the rules that can be allowed to match.
673
 * Only rules whose base values are strictly less than upperBound
674
 * are considered.
675
 * @return The numerical result of parsing this string.  This will
676
 * be the matching rule's base value, composed appropriately with
677
 * the results of matching any of its substitutions.  The object
678
 * will be an instance of Long if it's an integral value; otherwise,
679
 * it will be an instance of Double.  This function always returns
680
 * a valid object: If nothing matched the input string at all,
681
 * this function returns new Long(0), and the parse position is
682
 * left unchanged.
683
 */
684
#ifdef RBNF_DEBUG
685
#include <stdio.h>
686
687
static void dumpUS(FILE* f, const UnicodeString& us) {
688
  int len = us.length();
689
  char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
690
  if (buf != nullptr) {
691
    us.extract(0, len, buf);
692
    buf[len] = 0;
693
    fprintf(f, "%s", buf);
694
    uprv_free(buf); //delete[] buf;
695
  }
696
}
697
#endif
698
699
UBool
700
NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, int32_t recursionCount, Formattable& result) const
701
8.61M
{
702
    // try matching each rule in the rule set against the text being
703
    // parsed.  Whichever one matches the most characters is the one
704
    // that determines the value we return.
705
706
8.61M
    result.setLong(0);
707
708
    // dump out if we've reached the recursion limit
709
8.61M
    if (recursionCount >= RECURSION_LIMIT) {
710
        // stop recursion
711
202k
        return false;
712
202k
    }
713
714
    // dump out if there's no text to parse
715
8.41M
    if (text.length() == 0) {
716
13.6k
        return 0;
717
13.6k
    }
718
719
8.39M
    ParsePosition highWaterMark;
720
8.39M
    ParsePosition workingPos = pos;
721
722
#ifdef RBNF_DEBUG
723
    fprintf(stderr, "<nfrs> %x '", this);
724
    dumpUS(stderr, name);
725
    fprintf(stderr, "' text '");
726
    dumpUS(stderr, text);
727
    fprintf(stderr, "'\n");
728
    fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
729
#endif
730
    // Try each of the negative rules, fraction rules, infinity rules and NaN rules
731
58.7M
    for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
732
50.3M
        if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) {
733
            // Mark this rule as being executed so that we don't try to execute it again.
734
3.51k
            nonNumericalExecutedRuleMask |= 1 << i;
735
736
3.51k
            Formattable tempResult;
737
3.51k
            UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult);
738
3.51k
            if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
739
327
                result = tempResult;
740
327
                highWaterMark = workingPos;
741
327
            }
742
3.51k
            workingPos = pos;
743
3.51k
        }
744
50.3M
    }
745
#ifdef RBNF_DEBUG
746
    fprintf(stderr, "<nfrs> continue other with text '");
747
    dumpUS(stderr, text);
748
    fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
749
#endif
750
751
    // finally, go through the regular rules one at a time.  We start
752
    // at the end of the list because we want to try matching the most
753
    // sigificant rule first (this helps ensure that we parse
754
    // "five thousand three hundred six" as
755
    // "(five thousand) (three hundred) (six)" rather than
756
    // "((five thousand three) hundred) (six)").  Skip rules whose
757
    // base values are higher than the upper bound (again, this helps
758
    // limit ambiguity by making sure the rules that match a rule's
759
    // are less significant than the rule containing the substitutions)/
760
8.39M
    {
761
8.39M
        int64_t ub = util64_fromDouble(upperBound);
762
#ifdef RBNF_DEBUG
763
        {
764
            char ubstr[64];
765
            util64_toa(ub, ubstr, 64);
766
            char ubstrhex[64];
767
            util64_toa(ub, ubstrhex, 64, 16);
768
            fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
769
        }
770
#endif
771
494M
        for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
772
485M
            if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
773
471M
                continue;
774
471M
            }
775
14.2M
            Formattable tempResult;
776
14.2M
            UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult);
777
14.2M
            if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
778
764k
                result = tempResult;
779
764k
                highWaterMark = workingPos;
780
764k
            }
781
14.2M
            workingPos = pos;
782
14.2M
        }
783
8.39M
    }
784
#ifdef RBNF_DEBUG
785
    fprintf(stderr, "<nfrs> exit\n");
786
#endif
787
    // finally, update the parse position we were passed to point to the
788
    // first character we didn't use, and return the result that
789
    // corresponds to that string of characters
790
8.39M
    pos = highWaterMark;
791
792
8.39M
    return 1;
793
8.41M
}
794
795
void
796
NFRuleSet::appendRules(UnicodeString& result) const
797
0
{
798
0
    uint32_t i;
799
800
    // the rule set name goes first...
801
0
    result.append(name);
802
0
    result.append(gColon);
803
0
    result.append(gLineFeed);
804
805
    // followed by the regular rules...
806
0
    for (i = 0; i < rules.size(); i++) {
807
0
        rules[i]->_appendRuleText(result);
808
0
        result.append(gLineFeed);
809
0
    }
810
811
    // followed by the special rules (if they exist)
812
0
    for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
813
0
        NFRule *rule = nonNumericalRules[i];
814
0
        if (nonNumericalRules[i]) {
815
0
            if (rule->getBaseValue() == NFRule::kImproperFractionRule
816
0
                || rule->getBaseValue() == NFRule::kProperFractionRule
817
0
                || rule->getBaseValue() == NFRule::kDefaultRule)
818
0
            {
819
0
                for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
820
0
                    NFRule *fractionRule = fractionRules[fIdx];
821
0
                    if (fractionRule->getBaseValue() == rule->getBaseValue()) {
822
0
                        fractionRule->_appendRuleText(result);
823
0
                        result.append(gLineFeed);
824
0
                    }
825
0
                }
826
0
            }
827
0
            else {
828
0
                rule->_appendRuleText(result);
829
0
                result.append(gLineFeed);
830
0
            }
831
0
        }
832
0
    }
833
0
}
834
835
// utility functions
836
837
8.40M
int64_t util64_fromDouble(double d) {
838
8.40M
    int64_t result = 0;
839
8.40M
    if (!uprv_isNaN(d)) {
840
8.40M
        double mant = uprv_maxMantissa();
841
8.40M
        if (d < -mant) {
842
0
            d = -mant;
843
8.40M
        } else if (d > mant) {
844
1.09k
            d = mant;
845
1.09k
        }
846
8.40M
        UBool neg = d < 0; 
847
8.40M
        if (neg) {
848
0
            d = -d;
849
0
        }
850
8.40M
        result = static_cast<int64_t>(uprv_floor(d));
851
8.40M
        if (neg) {
852
0
            result = -result;
853
0
        }
854
8.40M
    }
855
8.40M
    return result;
856
8.40M
}
857
858
3.75M
uint64_t util64_pow(uint32_t base, uint16_t exponent)  {
859
3.75M
    if (base == 0) {
860
0
        return 0;
861
0
    }
862
3.75M
    uint64_t result = 1;
863
3.75M
    uint64_t pow = base;
864
7.99M
    while (true) {
865
7.99M
        if ((exponent & 1) == 1) {
866
4.95M
            result *= pow;
867
4.95M
        }
868
7.99M
        exponent >>= 1;
869
7.99M
        if (exponent == 0) {
870
3.75M
            break;
871
3.75M
        }
872
4.24M
        pow *= pow;
873
4.24M
    }
874
3.75M
    return result;
875
3.75M
}
876
877
static const uint8_t asciiDigits[] = { 
878
    0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
879
    0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
880
    0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
881
    0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
882
    0x77u, 0x78u, 0x79u, 0x7au,  
883
};
884
885
static const char16_t kUMinus = static_cast<char16_t>(0x002d);
886
887
#ifdef RBNF_DEBUG
888
static const char kMinus = '-';
889
890
static const uint8_t digitInfo[] = {
891
        0,     0,     0,     0,     0,     0,     0,     0,
892
        0,     0,     0,     0,     0,     0,     0,     0,
893
        0,     0,     0,     0,     0,     0,     0,     0,
894
        0,     0,     0,     0,     0,     0,     0,     0,
895
        0,     0,     0,     0,     0,     0,     0,     0,
896
        0,     0,     0,     0,     0,     0,     0,     0,
897
    0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
898
    0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
899
        0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
900
    0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
901
    0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
902
    0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
903
        0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
904
    0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
905
    0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
906
    0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
907
};
908
909
int64_t util64_atoi(const char* str, uint32_t radix)
910
{
911
    if (radix > 36) {
912
        radix = 36;
913
    } else if (radix < 2) {
914
        radix = 2;
915
    }
916
    int64_t lradix = radix;
917
918
    int neg = 0;
919
    if (*str == kMinus) {
920
        ++str;
921
        neg = 1;
922
    }
923
    int64_t result = 0;
924
    uint8_t b;
925
    while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
926
        result *= lradix;
927
        result += (int32_t)b;
928
    }
929
    if (neg) {
930
        result = -result;
931
    }
932
    return result;
933
}
934
935
int64_t util64_utoi(const char16_t* str, uint32_t radix)
936
{
937
    if (radix > 36) {
938
        radix = 36;
939
    } else if (radix < 2) {
940
        radix = 2;
941
    }
942
    int64_t lradix = radix;
943
944
    int neg = 0;
945
    if (*str == kUMinus) {
946
        ++str;
947
        neg = 1;
948
    }
949
    int64_t result = 0;
950
    char16_t c;
951
    uint8_t b;
952
    while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
953
        result *= lradix;
954
        result += (int32_t)b;
955
    }
956
    if (neg) {
957
        result = -result;
958
    }
959
    return result;
960
}
961
962
uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
963
{    
964
    if (radix > 36) {
965
        radix = 36;
966
    } else if (radix < 2) {
967
        radix = 2;
968
    }
969
    int64_t base = radix;
970
971
    char* p = buf;
972
    if (len && (w < 0) && (radix == 10) && !raw) {
973
        w = -w;
974
        *p++ = kMinus;
975
        --len;
976
    } else if (len && (w == 0)) {
977
        *p++ = (char)raw ? 0 : asciiDigits[0];
978
        --len;
979
    }
980
981
    while (len && w != 0) {
982
        int64_t n = w / base;
983
        int64_t m = n * base;
984
        int32_t d = (int32_t)(w-m);
985
        *p++ = raw ? (char)d : asciiDigits[d];
986
        w = n;
987
        --len;
988
    }
989
    if (len) {
990
        *p = 0; // null terminate if room for caller convenience
991
    }
992
993
    len = p - buf;
994
    if (*buf == kMinus) {
995
        ++buf;
996
    }
997
    while (--p > buf) {
998
        char c = *p;
999
        *p = *buf;
1000
        *buf = c;
1001
        ++buf;
1002
    }
1003
1004
    return len;
1005
}
1006
#endif
1007
1008
uint32_t util64_tou(int64_t w, char16_t* buf, uint32_t len, uint32_t radix, UBool raw)
1009
0
{    
1010
0
    if (radix > 36) {
1011
0
        radix = 36;
1012
0
    } else if (radix < 2) {
1013
0
        radix = 2;
1014
0
    }
1015
0
    int64_t base = radix;
1016
1017
0
    char16_t* p = buf;
1018
0
    if (len && (w < 0) && (radix == 10) && !raw) {
1019
0
        w = -w;
1020
0
        *p++ = kUMinus;
1021
0
        --len;
1022
0
    } else if (len && (w == 0)) {
1023
0
        *p++ = static_cast<char16_t>(raw) ? 0 : asciiDigits[0];
1024
0
        --len;
1025
0
    }
1026
1027
0
    while (len && (w != 0)) {
1028
0
        int64_t n = w / base;
1029
0
        int64_t m = n * base;
1030
0
        int32_t d = static_cast<int32_t>(w - m);
1031
0
        *p++ = static_cast<char16_t>(raw ? d : asciiDigits[d]);
1032
0
        w = n;
1033
0
        --len;
1034
0
    }
1035
0
    if (len) {
1036
0
        *p = 0; // null terminate if room for caller convenience
1037
0
    }
1038
1039
0
    len = static_cast<uint32_t>(p - buf);
1040
0
    if (*buf == kUMinus) {
1041
0
        ++buf;
1042
0
    }
1043
0
    while (--p > buf) {
1044
0
        char16_t c = *p;
1045
0
        *p = *buf;
1046
0
        *buf = c;
1047
0
        ++buf;
1048
0
    }
1049
1050
0
    return len;
1051
0
}
1052
1053
1054
U_NAMESPACE_END
1055
1056
/* U_HAVE_RBNF */
1057
#endif