Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/i18n/choicfmt.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 1997-2013, International Business Machines Corporation and    *
6
* others. All Rights Reserved.                                                *
7
*******************************************************************************
8
*
9
* File CHOICFMT.CPP
10
*
11
* Modification History:
12
*
13
*   Date        Name        Description
14
*   02/19/97    aliu        Converted from java.
15
*   03/20/97    helena      Finished first cut of implementation and got rid 
16
*                           of nextDouble/previousDouble and replaced with
17
*                           boolean array.
18
*   4/10/97     aliu        Clean up.  Modified to work on AIX.
19
*   06/04/97    helena      Fixed applyPattern(), toPattern() and not to include 
20
*                           wchar.h.
21
*   07/09/97    helena      Made ParsePosition into a class.
22
*   08/06/97    nos         removed overloaded constructor, fixed 'format(array)'
23
*   07/22/98    stephen     JDK 1.2 Sync - removed UBool array (doubleFlags)
24
*   02/22/99    stephen     Removed character literals for EBCDIC safety
25
********************************************************************************
26
*/
27
28
#include "unicode/utypes.h"
29
30
#if !UCONFIG_NO_FORMATTING
31
32
#include "unicode/choicfmt.h"
33
#include "unicode/numfmt.h"
34
#include "unicode/locid.h"
35
#include "cpputils.h"
36
#include "cstring.h"
37
#include "messageimpl.h"
38
#include "putilimp.h"
39
#include "uassert.h"
40
#include <stdio.h>
41
#include <float.h>
42
43
// *****************************************************************************
44
// class ChoiceFormat
45
// *****************************************************************************
46
47
U_NAMESPACE_BEGIN
48
49
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
50
51
// Special characters used by ChoiceFormat.  There are two characters
52
// used interchangeably to indicate <=.  Either is parsed, but only
53
// LESS_EQUAL is generated by toPattern().
54
0
#define SINGLE_QUOTE ((UChar)0x0027)   /*'*/
55
0
#define LESS_THAN    ((UChar)0x003C)   /*<*/
56
0
#define LESS_EQUAL   ((UChar)0x0023)   /*#*/
57
#define LESS_EQUAL2  ((UChar)0x2264)
58
0
#define VERTICAL_BAR ((UChar)0x007C)   /*|*/
59
0
#define MINUS        ((UChar)0x002D)   /*-*/
60
61
static const UChar LEFT_CURLY_BRACE = 0x7B;     /*{*/
62
static const UChar RIGHT_CURLY_BRACE = 0x7D;    /*}*/
63
64
#ifdef INFINITY
65
#undef INFINITY
66
#endif
67
0
#define INFINITY     ((UChar)0x221E)
68
69
//static const UChar gPositiveInfinity[] = {INFINITY, 0};
70
//static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
71
#define POSITIVE_INF_STRLEN 1
72
#define NEGATIVE_INF_STRLEN 2
73
74
// -------------------------------------
75
// Creates a ChoiceFormat instance based on the pattern.
76
77
ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
78
                           UErrorCode& status)
79
0
: constructorErrorCode(status),
80
0
  msgPattern(status)
81
0
{
82
0
    applyPattern(newPattern, status);
83
0
}
84
85
// -------------------------------------
86
// Creates a ChoiceFormat instance with the limit array and 
87
// format strings for each limit.
88
89
ChoiceFormat::ChoiceFormat(const double* limits, 
90
                           const UnicodeString* formats, 
91
                           int32_t cnt )
92
0
: constructorErrorCode(U_ZERO_ERROR),
93
0
  msgPattern(constructorErrorCode)
94
0
{
95
0
    setChoices(limits, NULL, formats, cnt, constructorErrorCode);
96
0
}
97
98
// -------------------------------------
99
100
ChoiceFormat::ChoiceFormat(const double* limits, 
101
                           const UBool* closures,
102
                           const UnicodeString* formats, 
103
                           int32_t cnt )
104
0
: constructorErrorCode(U_ZERO_ERROR),
105
0
  msgPattern(constructorErrorCode)
106
0
{
107
0
    setChoices(limits, closures, formats, cnt, constructorErrorCode);
108
0
}
109
110
// -------------------------------------
111
// copy constructor
112
113
ChoiceFormat::ChoiceFormat(const    ChoiceFormat&   that) 
114
0
: NumberFormat(that),
115
0
  constructorErrorCode(that.constructorErrorCode),
116
0
  msgPattern(that.msgPattern)
117
0
{
118
0
}
119
120
// -------------------------------------
121
// Private constructor that creates a 
122
// ChoiceFormat instance based on the 
123
// pattern and populates UParseError
124
125
ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
126
                           UParseError& parseError,
127
                           UErrorCode& status)
128
0
: constructorErrorCode(status),
129
0
  msgPattern(status)
130
0
{
131
0
    applyPattern(newPattern,parseError, status);
132
0
}
133
// -------------------------------------
134
135
bool
136
ChoiceFormat::operator==(const Format& that) const
137
0
{
138
0
    if (this == &that) return TRUE;
139
0
    if (!NumberFormat::operator==(that)) return FALSE;
140
0
    ChoiceFormat& thatAlias = (ChoiceFormat&)that;
141
0
    return msgPattern == thatAlias.msgPattern;
142
0
}
143
144
// -------------------------------------
145
// copy constructor
146
147
const ChoiceFormat&
148
ChoiceFormat::operator=(const   ChoiceFormat& that)
149
0
{
150
0
    if (this != &that) {
151
0
        NumberFormat::operator=(that);
152
0
        constructorErrorCode = that.constructorErrorCode;
153
0
        msgPattern = that.msgPattern;
154
0
    }
155
0
    return *this;
156
0
}
157
158
// -------------------------------------
159
160
ChoiceFormat::~ChoiceFormat()
161
0
{
162
0
}
163
164
// -------------------------------------
165
166
/**
167
 * Convert a double value to a string without the overhead of NumberFormat.
168
 */
169
UnicodeString&
170
ChoiceFormat::dtos(double value,
171
                   UnicodeString& string)
172
0
{
173
    /* Buffer to contain the digits and any extra formatting stuff. */
174
0
    char temp[DBL_DIG + 16];
175
0
    char *itrPtr = temp;
176
0
    char *expPtr;
177
178
0
    sprintf(temp, "%.*g", DBL_DIG, value);
179
180
    /* Find and convert the decimal point.
181
       Using setlocale on some machines will cause sprintf to use a comma for certain locales.
182
    */
183
0
    while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
184
0
        itrPtr++;
185
0
    }
186
0
    if (*itrPtr != 0 && *itrPtr != 'e') {
187
        /* We reached something that looks like a decimal point.
188
        In case someone used setlocale(), which changes the decimal point. */
189
0
        *itrPtr = '.';
190
0
        itrPtr++;
191
0
    }
192
    /* Search for the exponent */
193
0
    while (*itrPtr && *itrPtr != 'e') {
194
0
        itrPtr++;
195
0
    }
196
0
    if (*itrPtr == 'e') {
197
0
        itrPtr++;
198
        /* Verify the exponent sign */
199
0
        if (*itrPtr == '+' || *itrPtr == '-') {
200
0
            itrPtr++;
201
0
        }
202
        /* Remove leading zeros. You will see this on Windows machines. */
203
0
        expPtr = itrPtr;
204
0
        while (*itrPtr == '0') {
205
0
            itrPtr++;
206
0
        }
207
0
        if (*itrPtr && expPtr != itrPtr) {
208
            /* Shift the exponent without zeros. */
209
0
            while (*itrPtr) {
210
0
                *(expPtr++)  = *(itrPtr++);
211
0
            }
212
            // NULL terminate
213
0
            *expPtr = 0;
214
0
        }
215
0
    }
216
217
0
    string = UnicodeString(temp, -1, US_INV);    /* invariant codepage */
218
0
    return string;
219
0
}
220
221
// -------------------------------------
222
// calls the overloaded applyPattern method.
223
224
void
225
ChoiceFormat::applyPattern(const UnicodeString& pattern,
226
                           UErrorCode& status)
227
0
{
228
0
    msgPattern.parseChoiceStyle(pattern, NULL, status);
229
0
    constructorErrorCode = status;
230
0
}
231
232
// -------------------------------------
233
// Applies the pattern to this ChoiceFormat instance.
234
235
void
236
ChoiceFormat::applyPattern(const UnicodeString& pattern,
237
                           UParseError& parseError,
238
                           UErrorCode& status)
239
0
{
240
0
    msgPattern.parseChoiceStyle(pattern, &parseError, status);
241
0
    constructorErrorCode = status;
242
0
}
243
// -------------------------------------
244
// Returns the input pattern string.
245
246
UnicodeString&
247
ChoiceFormat::toPattern(UnicodeString& result) const
248
0
{
249
0
    return result = msgPattern.getPatternString();
250
0
}
251
252
// -------------------------------------
253
// Sets the limit and format arrays. 
254
void
255
ChoiceFormat::setChoices(  const double* limits, 
256
                           const UnicodeString* formats, 
257
                           int32_t cnt )
258
0
{
259
0
    UErrorCode errorCode = U_ZERO_ERROR;
260
0
    setChoices(limits, NULL, formats, cnt, errorCode);
261
0
}
262
263
// -------------------------------------
264
// Sets the limit and format arrays. 
265
void
266
ChoiceFormat::setChoices(  const double* limits, 
267
                           const UBool* closures,
268
                           const UnicodeString* formats, 
269
                           int32_t cnt )
270
0
{
271
0
    UErrorCode errorCode = U_ZERO_ERROR;
272
0
    setChoices(limits, closures, formats, cnt, errorCode);
273
0
}
274
275
void
276
ChoiceFormat::setChoices(const double* limits,
277
                         const UBool* closures,
278
                         const UnicodeString* formats,
279
                         int32_t count,
280
0
                         UErrorCode &errorCode) {
281
0
    if (U_FAILURE(errorCode)) {
282
0
        return;
283
0
    }
284
0
    if (limits == NULL || formats == NULL) {
285
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
286
0
        return;
287
0
    }
288
    // Reconstruct the original input pattern.
289
    // Modified version of the pre-ICU 4.8 toPattern() implementation.
290
0
    UnicodeString result;
291
0
    for (int32_t i = 0; i < count; ++i) {
292
0
        if (i != 0) {
293
0
            result += VERTICAL_BAR;
294
0
        }
295
0
        UnicodeString buf;
296
0
        if (uprv_isPositiveInfinity(limits[i])) {
297
0
            result += INFINITY;
298
0
        } else if (uprv_isNegativeInfinity(limits[i])) {
299
0
            result += MINUS;
300
0
            result += INFINITY;
301
0
        } else {
302
0
            result += dtos(limits[i], buf);
303
0
        }
304
0
        if (closures != NULL && closures[i]) {
305
0
            result += LESS_THAN;
306
0
        } else {
307
0
            result += LESS_EQUAL;
308
0
        }
309
        // Append formats[i], using quotes if there are special
310
        // characters.  Single quotes themselves must be escaped in
311
        // either case.
312
0
        const UnicodeString& text = formats[i];
313
0
        int32_t textLength = text.length();
314
0
        int32_t nestingLevel = 0;
315
0
        for (int32_t j = 0; j < textLength; ++j) {
316
0
            UChar c = text[j];
317
0
            if (c == SINGLE_QUOTE && nestingLevel == 0) {
318
                // Double each top-level apostrophe.
319
0
                result.append(c);
320
0
            } else if (c == VERTICAL_BAR && nestingLevel == 0) {
321
                // Surround each pipe symbol with apostrophes for quoting.
322
                // If the next character is an apostrophe, then that will be doubled,
323
                // and although the parser will see the apostrophe pairs beginning
324
                // and ending one character earlier than our doubling, the result
325
                // is as desired.
326
                //   | -> '|'
327
                //   |' -> '|'''
328
                //   |'' -> '|''''' etc.
329
0
                result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
330
0
                continue;  // Skip the append(c) at the end of the loop body.
331
0
            } else if (c == LEFT_CURLY_BRACE) {
332
0
                ++nestingLevel;
333
0
            } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
334
0
                --nestingLevel;
335
0
            }
336
0
            result.append(c);
337
0
        }
338
0
    }
339
    // Apply the reconstructed pattern.
340
0
    applyPattern(result, errorCode);
341
0
}
342
343
// -------------------------------------
344
// Gets the limit array.
345
346
const double*
347
ChoiceFormat::getLimits(int32_t& cnt) const 
348
0
{
349
0
    cnt = 0;
350
0
    return NULL;
351
0
}
352
353
// -------------------------------------
354
// Gets the closures array.
355
356
const UBool*
357
ChoiceFormat::getClosures(int32_t& cnt) const 
358
0
{
359
0
    cnt = 0;
360
0
    return NULL;
361
0
}
362
363
// -------------------------------------
364
// Gets the format array.
365
366
const UnicodeString*
367
ChoiceFormat::getFormats(int32_t& cnt) const
368
0
{
369
0
    cnt = 0;
370
0
    return NULL;
371
0
}
372
373
// -------------------------------------
374
// Formats an int64 number, it's actually formatted as
375
// a double.  The returned format string may differ
376
// from the input number because of this.
377
378
UnicodeString&
379
ChoiceFormat::format(int64_t number, 
380
                     UnicodeString& appendTo, 
381
                     FieldPosition& status) const
382
0
{
383
0
    return format((double) number, appendTo, status);
384
0
}
385
386
// -------------------------------------
387
// Formats an int32_t number, it's actually formatted as
388
// a double.
389
390
UnicodeString&
391
ChoiceFormat::format(int32_t number, 
392
                     UnicodeString& appendTo, 
393
                     FieldPosition& status) const
394
0
{
395
0
    return format((double) number, appendTo, status);
396
0
}
397
398
// -------------------------------------
399
// Formats a double number.
400
401
UnicodeString&
402
ChoiceFormat::format(double number, 
403
                     UnicodeString& appendTo, 
404
                     FieldPosition& /*pos*/) const
405
0
{
406
0
    if (msgPattern.countParts() == 0) {
407
        // No pattern was applied, or it failed.
408
0
        return appendTo;
409
0
    }
410
    // Get the appropriate sub-message.
411
0
    int32_t msgStart = findSubMessage(msgPattern, 0, number);
412
0
    if (!MessageImpl::jdkAposMode(msgPattern)) {
413
0
        int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
414
0
        int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
415
0
        appendTo.append(msgPattern.getPatternString(),
416
0
                        patternStart,
417
0
                        msgPattern.getPatternIndex(msgLimit) - patternStart);
418
0
        return appendTo;
419
0
    }
420
    // JDK compatibility mode: Remove SKIP_SYNTAX.
421
0
    return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
422
0
}
423
424
int32_t
425
0
ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
426
0
    int32_t count = pattern.countParts();
427
0
    int32_t msgStart;
428
    // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
429
    // until ARG_LIMIT or end of choice-only pattern.
430
    // Ignore the first number and selector and start the loop on the first message.
431
0
    partIndex += 2;
432
0
    for (;;) {
433
        // Skip but remember the current sub-message.
434
0
        msgStart = partIndex;
435
0
        partIndex = pattern.getLimitPartIndex(partIndex);
436
0
        if (++partIndex >= count) {
437
            // Reached the end of the choice-only pattern.
438
            // Return with the last sub-message.
439
0
            break;
440
0
        }
441
0
        const MessagePattern::Part &part = pattern.getPart(partIndex++);
442
0
        UMessagePatternPartType type = part.getType();
443
0
        if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
444
            // Reached the end of the ChoiceFormat style.
445
            // Return with the last sub-message.
446
0
            break;
447
0
        }
448
        // part is an ARG_INT or ARG_DOUBLE
449
0
        U_ASSERT(MessagePattern::Part::hasNumericValue(type));
450
0
        double boundary = pattern.getNumericValue(part);
451
        // Fetch the ARG_SELECTOR character.
452
0
        int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
453
0
        UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
454
0
        if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
455
            // The number is in the interval between the previous boundary and the current one.
456
            // Return with the sub-message between them.
457
            // The !(a>b) and !(a>=b) comparisons are equivalent to
458
            // (a<=b) and (a<b) except they "catch" NaN.
459
0
            break;
460
0
        }
461
0
    }
462
0
    return msgStart;
463
0
}
464
465
// -------------------------------------
466
// Formats an array of objects. Checks if the data type of the objects
467
// to get the right value for formatting.  
468
469
UnicodeString&
470
ChoiceFormat::format(const Formattable* objs,
471
                     int32_t cnt,
472
                     UnicodeString& appendTo,
473
                     FieldPosition& pos,
474
                     UErrorCode& status) const
475
0
{
476
0
    if(cnt < 0) {
477
0
        status = U_ILLEGAL_ARGUMENT_ERROR;
478
0
        return appendTo;
479
0
    }
480
0
    if (msgPattern.countParts() == 0) {
481
0
        status = U_INVALID_STATE_ERROR;
482
0
        return appendTo;
483
0
    }
484
485
0
    for (int32_t i = 0; i < cnt; i++) {
486
0
        double objDouble = objs[i].getDouble(status);
487
0
        if (U_SUCCESS(status)) {
488
0
            format(objDouble, appendTo, pos);
489
0
        }
490
0
    }
491
492
0
    return appendTo;
493
0
}
494
495
// -------------------------------------
496
497
void
498
ChoiceFormat::parse(const UnicodeString& text, 
499
                    Formattable& result,
500
                    ParsePosition& pos) const
501
0
{
502
0
    result.setDouble(parseArgument(msgPattern, 0, text, pos));
503
0
}
504
505
double
506
ChoiceFormat::parseArgument(
507
        const MessagePattern &pattern, int32_t partIndex,
508
0
        const UnicodeString &source, ParsePosition &pos) {
509
    // find the best number (defined as the one with the longest parse)
510
0
    int32_t start = pos.getIndex();
511
0
    int32_t furthest = start;
512
0
    double bestNumber = uprv_getNaN();
513
0
    double tempNumber = 0.0;
514
0
    int32_t count = pattern.countParts();
515
0
    while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
516
0
        tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
517
0
        partIndex += 2;  // skip the numeric part and ignore the ARG_SELECTOR
518
0
        int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
519
0
        int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
520
0
        if (len >= 0) {
521
0
            int32_t newIndex = start + len;
522
0
            if (newIndex > furthest) {
523
0
                furthest = newIndex;
524
0
                bestNumber = tempNumber;
525
0
                if (furthest == source.length()) {
526
0
                    break;
527
0
                }
528
0
            }
529
0
        }
530
0
        partIndex = msgLimit + 1;
531
0
    }
532
0
    if (furthest == start) {
533
0
        pos.setErrorIndex(start);
534
0
    } else {
535
0
        pos.setIndex(furthest);
536
0
    }
537
0
    return bestNumber;
538
0
}
539
540
int32_t
541
ChoiceFormat::matchStringUntilLimitPart(
542
        const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
543
0
        const UnicodeString &source, int32_t sourceOffset) {
544
0
    int32_t matchingSourceLength = 0;
545
0
    const UnicodeString &msgString = pattern.getPatternString();
546
0
    int32_t prevIndex = pattern.getPart(partIndex).getLimit();
547
0
    for (;;) {
548
0
        const MessagePattern::Part &part = pattern.getPart(++partIndex);
549
0
        if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
550
0
            int32_t index = part.getIndex();
551
0
            int32_t length = index - prevIndex;
552
0
            if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
553
0
                return -1;  // mismatch
554
0
            }
555
0
            matchingSourceLength += length;
556
0
            if (partIndex == limitPartIndex) {
557
0
                return matchingSourceLength;
558
0
            }
559
0
            prevIndex = part.getLimit();  // SKIP_SYNTAX
560
0
        }
561
0
    }
562
0
}
563
564
// -------------------------------------
565
566
ChoiceFormat*
567
ChoiceFormat::clone() const
568
0
{
569
0
    ChoiceFormat *aCopy = new ChoiceFormat(*this);
570
0
    return aCopy;
571
0
}
572
573
U_NAMESPACE_END
574
575
#endif /* #if !UCONFIG_NO_FORMATTING */
576
577
//eof