Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/i18n/plurfmt.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2009-2015, International Business Machines Corporation and
6
* others. All Rights Reserved.
7
*******************************************************************************
8
*
9
* File PLURFMT.CPP
10
*******************************************************************************
11
*/
12
13
#include "unicode/decimfmt.h"
14
#include "unicode/messagepattern.h"
15
#include "unicode/plurfmt.h"
16
#include "unicode/plurrule.h"
17
#include "unicode/utypes.h"
18
#include "cmemory.h"
19
#include "messageimpl.h"
20
#include "nfrule.h"
21
#include "plurrule_impl.h"
22
#include "uassert.h"
23
#include "uhash.h"
24
#include "number_decimalquantity.h"
25
#include "number_utils.h"
26
#include "number_utypes.h"
27
28
#if !UCONFIG_NO_FORMATTING
29
30
U_NAMESPACE_BEGIN
31
32
using number::impl::DecimalQuantity;
33
34
static const UChar OTHER_STRING[] = {
35
    0x6F, 0x74, 0x68, 0x65, 0x72, 0  // "other"
36
};
37
38
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
39
40
PluralFormat::PluralFormat(UErrorCode& status)
41
        : locale(Locale::getDefault()),
42
          msgPattern(status),
43
          numberFormat(NULL),
44
0
          offset(0) {
45
0
    init(NULL, UPLURAL_TYPE_CARDINAL, status);
46
0
}
47
48
PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
49
        : locale(loc),
50
          msgPattern(status),
51
          numberFormat(NULL),
52
0
          offset(0) {
53
0
    init(NULL, UPLURAL_TYPE_CARDINAL, status);
54
0
}
55
56
PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
57
        : locale(Locale::getDefault()),
58
          msgPattern(status),
59
          numberFormat(NULL),
60
0
          offset(0) {
61
0
    init(&rules, UPLURAL_TYPE_COUNT, status);
62
0
}
63
64
PluralFormat::PluralFormat(const Locale& loc,
65
                           const PluralRules& rules,
66
                           UErrorCode& status)
67
        : locale(loc),
68
          msgPattern(status),
69
          numberFormat(NULL),
70
0
          offset(0) {
71
0
    init(&rules, UPLURAL_TYPE_COUNT, status);
72
0
}
73
74
PluralFormat::PluralFormat(const Locale& loc,
75
                           UPluralType type,
76
                           UErrorCode& status)
77
        : locale(loc),
78
          msgPattern(status),
79
          numberFormat(NULL),
80
0
          offset(0) {
81
0
    init(NULL, type, status);
82
0
}
83
84
PluralFormat::PluralFormat(const UnicodeString& pat,
85
                           UErrorCode& status)
86
        : locale(Locale::getDefault()),
87
          msgPattern(status),
88
          numberFormat(NULL),
89
0
          offset(0) {
90
0
    init(NULL, UPLURAL_TYPE_CARDINAL, status);
91
0
    applyPattern(pat, status);
92
0
}
93
94
PluralFormat::PluralFormat(const Locale& loc,
95
                           const UnicodeString& pat,
96
                           UErrorCode& status)
97
        : locale(loc),
98
          msgPattern(status),
99
          numberFormat(NULL),
100
0
          offset(0) {
101
0
    init(NULL, UPLURAL_TYPE_CARDINAL, status);
102
0
    applyPattern(pat, status);
103
0
}
104
105
PluralFormat::PluralFormat(const PluralRules& rules,
106
                           const UnicodeString& pat,
107
                           UErrorCode& status)
108
        : locale(Locale::getDefault()),
109
          msgPattern(status),
110
          numberFormat(NULL),
111
0
          offset(0) {
112
0
    init(&rules, UPLURAL_TYPE_COUNT, status);
113
0
    applyPattern(pat, status);
114
0
}
115
116
PluralFormat::PluralFormat(const Locale& loc,
117
                           const PluralRules& rules,
118
                           const UnicodeString& pat,
119
                           UErrorCode& status)
120
        : locale(loc),
121
          msgPattern(status),
122
          numberFormat(NULL),
123
0
          offset(0) {
124
0
    init(&rules, UPLURAL_TYPE_COUNT, status);
125
0
    applyPattern(pat, status);
126
0
}
127
128
PluralFormat::PluralFormat(const Locale& loc,
129
                           UPluralType type,
130
                           const UnicodeString& pat,
131
                           UErrorCode& status)
132
        : locale(loc),
133
          msgPattern(status),
134
          numberFormat(NULL),
135
0
          offset(0) {
136
0
    init(NULL, type, status);
137
0
    applyPattern(pat, status);
138
0
}
139
140
PluralFormat::PluralFormat(const PluralFormat& other)
141
        : Format(other),
142
          locale(other.locale),
143
          msgPattern(other.msgPattern),
144
          numberFormat(NULL),
145
0
          offset(other.offset) {
146
0
    copyObjects(other);
147
0
}
148
149
void
150
0
PluralFormat::copyObjects(const PluralFormat& other) {
151
0
    UErrorCode status = U_ZERO_ERROR;
152
0
    if (numberFormat != NULL) {
153
0
        delete numberFormat;
154
0
    }
155
0
    if (pluralRulesWrapper.pluralRules != NULL) {
156
0
        delete pluralRulesWrapper.pluralRules;
157
0
    }
158
0
159
0
    if (other.numberFormat == NULL) {
160
0
        numberFormat = NumberFormat::createInstance(locale, status);
161
0
    } else {
162
0
        numberFormat = (NumberFormat*)other.numberFormat->clone();
163
0
    }
164
0
    if (other.pluralRulesWrapper.pluralRules == NULL) {
165
0
        pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
166
0
    } else {
167
0
        pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
168
0
    }
169
0
}
170
171
172
0
PluralFormat::~PluralFormat() {
173
0
    delete numberFormat;
174
0
}
175
176
void
177
0
PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
178
0
    if (U_FAILURE(status)) {
179
0
        return;
180
0
    }
181
0
182
0
    if (rules==NULL) {
183
0
        pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
184
0
    } else {
185
0
        pluralRulesWrapper.pluralRules = rules->clone();
186
0
        if (pluralRulesWrapper.pluralRules == NULL) {
187
0
            status = U_MEMORY_ALLOCATION_ERROR;
188
0
            return;
189
0
        }
190
0
    }
191
0
192
0
    numberFormat= NumberFormat::createInstance(locale, status);
193
0
}
194
195
void
196
0
PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
197
0
    msgPattern.parsePluralStyle(newPattern, NULL, status);
198
0
    if (U_FAILURE(status)) {
199
0
        msgPattern.clear();
200
0
        offset = 0;
201
0
        return;
202
0
    }
203
0
    offset = msgPattern.getPluralOffset(0);
204
0
}
205
206
UnicodeString&
207
PluralFormat::format(const Formattable& obj,
208
                   UnicodeString& appendTo,
209
                   FieldPosition& pos,
210
                   UErrorCode& status) const
211
0
{
212
0
    if (U_FAILURE(status)) return appendTo;
213
0
214
0
    if (obj.isNumeric()) {
215
0
        return format(obj, obj.getDouble(), appendTo, pos, status);
216
0
    } else {
217
0
        status = U_ILLEGAL_ARGUMENT_ERROR;
218
0
        return appendTo;
219
0
    }
220
0
}
221
222
UnicodeString
223
0
PluralFormat::format(int32_t number, UErrorCode& status) const {
224
0
    FieldPosition fpos(FieldPosition::DONT_CARE);
225
0
    UnicodeString result;
226
0
    return format(Formattable(number), number, result, fpos, status);
227
0
}
228
229
UnicodeString
230
0
PluralFormat::format(double number, UErrorCode& status) const {
231
0
    FieldPosition fpos(FieldPosition::DONT_CARE);
232
0
    UnicodeString result;
233
0
    return format(Formattable(number), number, result, fpos, status);
234
0
}
235
236
237
UnicodeString&
238
PluralFormat::format(int32_t number,
239
                     UnicodeString& appendTo,
240
                     FieldPosition& pos,
241
0
                     UErrorCode& status) const {
242
0
    return format(Formattable(number), (double)number, appendTo, pos, status);
243
0
}
244
245
UnicodeString&
246
PluralFormat::format(double number,
247
                     UnicodeString& appendTo,
248
                     FieldPosition& pos,
249
0
                     UErrorCode& status) const {
250
0
    return format(Formattable(number), (double)number, appendTo, pos, status);
251
0
}
252
253
UnicodeString&
254
PluralFormat::format(const Formattable& numberObject, double number,
255
                     UnicodeString& appendTo,
256
                     FieldPosition& pos,
257
0
                     UErrorCode& status) const {
258
0
    if (U_FAILURE(status)) {
259
0
        return appendTo;
260
0
    }
261
0
    if (msgPattern.countParts() == 0) {
262
0
        return numberFormat->format(numberObject, appendTo, pos, status);
263
0
    }
264
0
265
0
    // Get the appropriate sub-message.
266
0
    // Select it based on the formatted number-offset.
267
0
    double numberMinusOffset = number - offset;
268
0
    // Call NumberFormatter to get both the DecimalQuantity and the string.
269
0
    // This call site needs to use more internal APIs than the Java equivalent.
270
0
    number::impl::UFormattedNumberData data;
271
0
    if (offset == 0) {
272
0
        // could be BigDecimal etc.
273
0
        numberObject.populateDecimalQuantity(data.quantity, status);
274
0
    } else {
275
0
        data.quantity.setToDouble(numberMinusOffset);
276
0
    }
277
0
    UnicodeString numberString;
278
0
    auto *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
279
0
    if(decFmt != nullptr) {
280
0
        decFmt->toNumberFormatter().formatImpl(&data, status); // mutates &data
281
0
        numberString = data.string.toUnicodeString();
282
0
    } else {
283
0
        if (offset == 0) {
284
0
            numberFormat->format(numberObject, numberString, status);
285
0
        } else {
286
0
            numberFormat->format(numberMinusOffset, numberString, status);
287
0
        }
288
0
    }
289
0
290
0
    int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &data.quantity, number, status);
291
0
    if (U_FAILURE(status)) { return appendTo; }
292
0
    // Replace syntactic # signs in the top level of this sub-message
293
0
    // (not in nested arguments) with the formatted number-offset.
294
0
    const UnicodeString& pattern = msgPattern.getPatternString();
295
0
    int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
296
0
    for (;;) {
297
0
        const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
298
0
        const UMessagePatternPartType type = part.getType();
299
0
        int32_t index = part.getIndex();
300
0
        if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
301
0
            return appendTo.append(pattern, prevIndex, index - prevIndex);
302
0
        } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
303
0
            (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
304
0
            appendTo.append(pattern, prevIndex, index - prevIndex);
305
0
            if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
306
0
                appendTo.append(numberString);
307
0
            }
308
0
            prevIndex = part.getLimit();
309
0
        } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
310
0
            appendTo.append(pattern, prevIndex, index - prevIndex);
311
0
            prevIndex = index;
312
0
            partIndex = msgPattern.getLimitPartIndex(partIndex);
313
0
            index = msgPattern.getPart(partIndex).getLimit();
314
0
            MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
315
0
            prevIndex = index;
316
0
        }
317
0
    }
318
0
}
319
320
UnicodeString&
321
0
PluralFormat::toPattern(UnicodeString& appendTo) {
322
0
    if (0 == msgPattern.countParts()) {
323
0
        appendTo.setToBogus();
324
0
    } else {
325
0
        appendTo.append(msgPattern.getPatternString());
326
0
    }
327
0
    return appendTo;
328
0
}
329
330
void
331
0
PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
332
0
    if (U_FAILURE(status)) {
333
0
        return;
334
0
    }
335
0
    locale = loc;
336
0
    msgPattern.clear();
337
0
    delete numberFormat;
338
0
    offset = 0;
339
0
    numberFormat = NULL;
340
0
    pluralRulesWrapper.reset();
341
0
    init(NULL, UPLURAL_TYPE_CARDINAL, status);
342
0
}
343
344
void
345
0
PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
346
0
    if (U_FAILURE(status)) {
347
0
        return;
348
0
    }
349
0
    NumberFormat* nf = (NumberFormat*)format->clone();
350
0
    if (nf != NULL) {
351
0
        delete numberFormat;
352
0
        numberFormat = nf;
353
0
    } else {
354
0
        status = U_MEMORY_ALLOCATION_ERROR;
355
0
    }
356
0
}
357
358
Format*
359
PluralFormat::clone() const
360
0
{
361
0
    return new PluralFormat(*this);
362
0
}
363
364
365
PluralFormat&
366
0
PluralFormat::operator=(const PluralFormat& other) {
367
0
    if (this != &other) {
368
0
        locale = other.locale;
369
0
        msgPattern = other.msgPattern;
370
0
        offset = other.offset;
371
0
        copyObjects(other);
372
0
    }
373
0
374
0
    return *this;
375
0
}
376
377
UBool
378
0
PluralFormat::operator==(const Format& other) const {
379
0
    if (this == &other) {
380
0
        return TRUE;
381
0
    }
382
0
    if (!Format::operator==(other)) {
383
0
        return FALSE;
384
0
    }
385
0
    const PluralFormat& o = (const PluralFormat&)other;
386
0
    return
387
0
        locale == o.locale &&
388
0
        msgPattern == o.msgPattern &&  // implies same offset
389
0
        (numberFormat == NULL) == (o.numberFormat == NULL) &&
390
0
        (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
391
0
        (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
392
0
        (pluralRulesWrapper.pluralRules == NULL ||
393
0
            *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
394
0
}
395
396
UBool
397
0
PluralFormat::operator!=(const Format& other) const {
398
0
    return  !operator==(other);
399
0
}
400
401
void
402
PluralFormat::parseObject(const UnicodeString& /*source*/,
403
                        Formattable& /*result*/,
404
                        ParsePosition& pos) const
405
0
{
406
0
    // Parsing not supported.
407
0
    pos.setErrorIndex(pos.getIndex());
408
0
}
409
410
int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
411
                                     const PluralSelector& selector, void *context,
412
0
                                     double number, UErrorCode& ec) {
413
0
    if (U_FAILURE(ec)) {
414
0
        return 0;
415
0
    }
416
0
    int32_t count=pattern.countParts();
417
0
    double offset;
418
0
    const MessagePattern::Part* part=&pattern.getPart(partIndex);
419
0
    if (MessagePattern::Part::hasNumericValue(part->getType())) {
420
0
        offset=pattern.getNumericValue(*part);
421
0
        ++partIndex;
422
0
    } else {
423
0
        offset=0;
424
0
    }
425
0
    // The keyword is empty until we need to match against a non-explicit, not-"other" value.
426
0
    // Then we get the keyword from the selector.
427
0
    // (In other words, we never call the selector if we match against an explicit value,
428
0
    // or if the only non-explicit keyword is "other".)
429
0
    UnicodeString keyword;
430
0
    UnicodeString other(FALSE, OTHER_STRING, 5);
431
0
    // When we find a match, we set msgStart>0 and also set this boolean to true
432
0
    // to avoid matching the keyword again (duplicates are allowed)
433
0
    // while we continue to look for an explicit-value match.
434
0
    UBool haveKeywordMatch=FALSE;
435
0
    // msgStart is 0 until we find any appropriate sub-message.
436
0
    // We remember the first "other" sub-message if we have not seen any
437
0
    // appropriate sub-message before.
438
0
    // We remember the first matching-keyword sub-message if we have not seen
439
0
    // one of those before.
440
0
    // (The parser allows [does not check for] duplicate keywords.
441
0
    // We just have to make sure to take the first one.)
442
0
    // We avoid matching the keyword twice by also setting haveKeywordMatch=true
443
0
    // at the first keyword match.
444
0
    // We keep going until we find an explicit-value match or reach the end of the plural style.
445
0
    int32_t msgStart=0;
446
0
    // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
447
0
    // until ARG_LIMIT or end of plural-only pattern.
448
0
    do {
449
0
        part=&pattern.getPart(partIndex++);
450
0
        const UMessagePatternPartType type = part->getType();
451
0
        if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
452
0
            break;
453
0
        }
454
0
        U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
455
0
        // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
456
0
        if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
457
0
            // explicit value like "=2"
458
0
            part=&pattern.getPart(partIndex++);
459
0
            if(number==pattern.getNumericValue(*part)) {
460
0
                // matches explicit value
461
0
                return partIndex;
462
0
            }
463
0
        } else if(!haveKeywordMatch) {
464
0
            // plural keyword like "few" or "other"
465
0
            // Compare "other" first and call the selector if this is not "other".
466
0
            if(pattern.partSubstringMatches(*part, other)) {
467
0
                if(msgStart==0) {
468
0
                    msgStart=partIndex;
469
0
                    if(0 == keyword.compare(other)) {
470
0
                        // This is the first "other" sub-message,
471
0
                        // and the selected keyword is also "other".
472
0
                        // Do not match "other" again.
473
0
                        haveKeywordMatch=TRUE;
474
0
                    }
475
0
                }
476
0
            } else {
477
0
                if(keyword.isEmpty()) {
478
0
                    keyword=selector.select(context, number-offset, ec);
479
0
                    if(msgStart!=0 && (0 == keyword.compare(other))) {
480
0
                        // We have already seen an "other" sub-message.
481
0
                        // Do not match "other" again.
482
0
                        haveKeywordMatch=TRUE;
483
0
                        // Skip keyword matching but do getLimitPartIndex().
484
0
                    }
485
0
                }
486
0
                if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
487
0
                    // keyword matches
488
0
                    msgStart=partIndex;
489
0
                    // Do not match this keyword again.
490
0
                    haveKeywordMatch=TRUE;
491
0
                }
492
0
            }
493
0
        }
494
0
        partIndex=pattern.getLimitPartIndex(partIndex);
495
0
    } while(++partIndex<count);
496
0
    return msgStart;
497
0
}
498
499
0
void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const {
500
0
    // If no pattern was applied, return null.
501
0
    if (msgPattern.countParts() == 0) {
502
0
        pos.setBeginIndex(-1);
503
0
        pos.setEndIndex(-1);
504
0
        return;
505
0
    }
506
0
    int partIndex = 0;
507
0
    int currMatchIndex;
508
0
    int count=msgPattern.countParts();
509
0
    int startingAt = pos.getBeginIndex();
510
0
    if (startingAt < 0) {
511
0
        startingAt = 0;
512
0
    }
513
0
514
0
    // The keyword is null until we need to match against a non-explicit, not-"other" value.
515
0
    // Then we get the keyword from the selector.
516
0
    // (In other words, we never call the selector if we match against an explicit value,
517
0
    // or if the only non-explicit keyword is "other".)
518
0
    UnicodeString keyword;
519
0
    UnicodeString matchedWord;
520
0
    const UnicodeString& pattern = msgPattern.getPatternString();
521
0
    int matchedIndex = -1;
522
0
    // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
523
0
    // until the end of the plural-only pattern.
524
0
    while (partIndex < count) {
525
0
        const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++);
526
0
        if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) {
527
0
            // Bad format
528
0
            continue;
529
0
        }
530
0
531
0
        const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++);
532
0
        if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) {
533
0
            // Bad format
534
0
            continue;
535
0
        }
536
0
537
0
        const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++);
538
0
        if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) {
539
0
            // Bad format
540
0
            continue;
541
0
        }
542
0
543
0
        UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
544
0
        if (rbnfLenientScanner != NULL) {
545
0
            // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
546
0
            int32_t length = -1;
547
0
            currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
548
0
        }
549
0
        else {
550
0
            currMatchIndex = source.indexOf(currArg, startingAt);
551
0
        }
552
0
        if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) {
553
0
            matchedIndex = currMatchIndex;
554
0
            matchedWord = currArg;
555
0
            keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
556
0
        }
557
0
    }
558
0
    if (matchedIndex >= 0) {
559
0
        pos.setBeginIndex(matchedIndex);
560
0
        pos.setEndIndex(matchedIndex + matchedWord.length());
561
0
        result.setString(keyword);
562
0
        return;
563
0
    }
564
0
565
0
    // Not found!
566
0
    pos.setBeginIndex(-1);
567
0
    pos.setEndIndex(-1);
568
0
}
569
570
0
PluralFormat::PluralSelector::~PluralSelector() {}
571
572
0
PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
573
0
    delete pluralRules;
574
0
}
575
576
UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
577
0
                                                          UErrorCode& /*ec*/) const {
578
0
    (void)number;  // unused except in the assertion
579
0
    IFixedDecimal *dec=static_cast<IFixedDecimal *>(context);
580
0
    return pluralRules->select(*dec);
581
0
}
582
583
0
void PluralFormat::PluralSelectorAdapter::reset() {
584
0
    delete pluralRules;
585
0
    pluralRules = NULL;
586
0
}
587
588
589
U_NAMESPACE_END
590
591
592
#endif /* #if !UCONFIG_NO_FORMATTING */
593
594
//eof