Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/i18n/number_affixutils.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2017 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
#include "unicode/utypes.h"
5
6
#if !UCONFIG_NO_FORMATTING
7
8
#include "number_affixutils.h"
9
#include "unicode/utf16.h"
10
#include "unicode/uniset.h"
11
12
using namespace icu;
13
using namespace icu::number;
14
using namespace icu::number::impl;
15
16
0
TokenConsumer::~TokenConsumer() = default;
17
0
SymbolProvider::~SymbolProvider() = default;
18
19
0
int32_t AffixUtils::estimateLength(const UnicodeString &patternString, UErrorCode &status) {
20
0
    AffixPatternState state = STATE_BASE;
21
0
    int32_t offset = 0;
22
0
    int32_t length = 0;
23
0
    for (; offset < patternString.length();) {
24
0
        UChar32 cp = patternString.char32At(offset);
25
0
26
0
        switch (state) {
27
0
            case STATE_BASE:
28
0
                if (cp == u'\'') {
29
0
                    // First quote
30
0
                    state = STATE_FIRST_QUOTE;
31
0
                } else {
32
0
                    // Unquoted symbol
33
0
                    length++;
34
0
                }
35
0
                break;
36
0
            case STATE_FIRST_QUOTE:
37
0
                if (cp == u'\'') {
38
0
                    // Repeated quote
39
0
                    length++;
40
0
                    state = STATE_BASE;
41
0
                } else {
42
0
                    // Quoted code point
43
0
                    length++;
44
0
                    state = STATE_INSIDE_QUOTE;
45
0
                }
46
0
                break;
47
0
            case STATE_INSIDE_QUOTE:
48
0
                if (cp == u'\'') {
49
0
                    // End of quoted sequence
50
0
                    state = STATE_AFTER_QUOTE;
51
0
                } else {
52
0
                    // Quoted code point
53
0
                    length++;
54
0
                }
55
0
                break;
56
0
            case STATE_AFTER_QUOTE:
57
0
                if (cp == u'\'') {
58
0
                    // Double quote inside of quoted sequence
59
0
                    length++;
60
0
                    state = STATE_INSIDE_QUOTE;
61
0
                } else {
62
0
                    // Unquoted symbol
63
0
                    length++;
64
0
                }
65
0
                break;
66
0
            default:
67
0
                U_ASSERT(false);
68
0
        }
69
0
70
0
        offset += U16_LENGTH(cp);
71
0
    }
72
0
73
0
    switch (state) {
74
0
        case STATE_FIRST_QUOTE:
75
0
        case STATE_INSIDE_QUOTE:
76
0
            status = U_ILLEGAL_ARGUMENT_ERROR;
77
0
            break;
78
0
        default:
79
0
            break;
80
0
    }
81
0
82
0
    return length;
83
0
}
84
85
0
UnicodeString AffixUtils::escape(const UnicodeString &input) {
86
0
    AffixPatternState state = STATE_BASE;
87
0
    int32_t offset = 0;
88
0
    UnicodeString output;
89
0
    for (; offset < input.length();) {
90
0
        UChar32 cp = input.char32At(offset);
91
0
92
0
        switch (cp) {
93
0
            case u'\'':
94
0
                output.append(u"''", -1);
95
0
                break;
96
0
97
0
            case u'-':
98
0
            case u'+':
99
0
            case u'%':
100
0
            case u'‰':
101
0
            case u'¤':
102
0
                if (state == STATE_BASE) {
103
0
                    output.append(u'\'');
104
0
                    output.append(cp);
105
0
                    state = STATE_INSIDE_QUOTE;
106
0
                } else {
107
0
                    output.append(cp);
108
0
                }
109
0
                break;
110
0
111
0
            default:
112
0
                if (state == STATE_INSIDE_QUOTE) {
113
0
                    output.append(u'\'');
114
0
                    output.append(cp);
115
0
                    state = STATE_BASE;
116
0
                } else {
117
0
                    output.append(cp);
118
0
                }
119
0
                break;
120
0
        }
121
0
        offset += U16_LENGTH(cp);
122
0
    }
123
0
124
0
    if (state == STATE_INSIDE_QUOTE) {
125
0
        output.append(u'\'');
126
0
    }
127
0
128
0
    return output;
129
0
}
130
131
Field AffixUtils::getFieldForType(AffixPatternType type) {
132
    switch (type) {
133
        case TYPE_MINUS_SIGN:
134
            return Field::UNUM_SIGN_FIELD;
135
        case TYPE_PLUS_SIGN:
136
            return Field::UNUM_SIGN_FIELD;
137
        case TYPE_PERCENT:
138
            return Field::UNUM_PERCENT_FIELD;
139
        case TYPE_PERMILLE:
140
            return Field::UNUM_PERMILL_FIELD;
141
        case TYPE_CURRENCY_SINGLE:
142
            return Field::UNUM_CURRENCY_FIELD;
143
        case TYPE_CURRENCY_DOUBLE:
144
            return Field::UNUM_CURRENCY_FIELD;
145
        case TYPE_CURRENCY_TRIPLE:
146
            return Field::UNUM_CURRENCY_FIELD;
147
        case TYPE_CURRENCY_QUAD:
148
            return Field::UNUM_CURRENCY_FIELD;
149
        case TYPE_CURRENCY_QUINT:
150
            return Field::UNUM_CURRENCY_FIELD;
151
        case TYPE_CURRENCY_OVERFLOW:
152
            return Field::UNUM_CURRENCY_FIELD;
153
        default:
154
            U_ASSERT(false);
155
            return Field::UNUM_FIELD_COUNT; // suppress "control reaches end of non-void function"
156
    }
157
}
158
159
int32_t
160
AffixUtils::unescape(const UnicodeString &affixPattern, NumberStringBuilder &output, int32_t position,
161
0
                     const SymbolProvider &provider, UErrorCode &status) {
162
0
    int32_t length = 0;
163
0
    AffixTag tag;
164
0
    while (hasNext(tag, affixPattern)) {
165
0
        tag = nextToken(tag, affixPattern, status);
166
0
        if (U_FAILURE(status)) { return length; }
167
0
        if (tag.type == TYPE_CURRENCY_OVERFLOW) {
168
0
            // Don't go to the provider for this special case
169
0
            length += output.insertCodePoint(position + length, 0xFFFD, UNUM_CURRENCY_FIELD, status);
170
0
        } else if (tag.type < 0) {
171
0
            length += output.insert(
172
0
                    position + length, provider.getSymbol(tag.type), getFieldForType(tag.type), status);
173
0
        } else {
174
0
            length += output.insertCodePoint(position + length, tag.codePoint, UNUM_FIELD_COUNT, status);
175
0
        }
176
0
    }
177
0
    return length;
178
0
}
179
180
int32_t AffixUtils::unescapedCodePointCount(const UnicodeString &affixPattern,
181
0
                                            const SymbolProvider &provider, UErrorCode &status) {
182
0
    int32_t length = 0;
183
0
    AffixTag tag;
184
0
    while (hasNext(tag, affixPattern)) {
185
0
        tag = nextToken(tag, affixPattern, status);
186
0
        if (U_FAILURE(status)) { return length; }
187
0
        if (tag.type == TYPE_CURRENCY_OVERFLOW) {
188
0
            length += 1;
189
0
        } else if (tag.type < 0) {
190
0
            length += provider.getSymbol(tag.type).length();
191
0
        } else {
192
0
            length += U16_LENGTH(tag.codePoint);
193
0
        }
194
0
    }
195
0
    return length;
196
0
}
197
198
bool
199
0
AffixUtils::containsType(const UnicodeString &affixPattern, AffixPatternType type, UErrorCode &status) {
200
0
    if (affixPattern.length() == 0) {
201
0
        return false;
202
0
    }
203
0
    AffixTag tag;
204
0
    while (hasNext(tag, affixPattern)) {
205
0
        tag = nextToken(tag, affixPattern, status);
206
0
        if (U_FAILURE(status)) { return false; }
207
0
        if (tag.type == type) {
208
0
            return true;
209
0
        }
210
0
    }
211
0
    return false;
212
0
}
213
214
0
bool AffixUtils::hasCurrencySymbols(const UnicodeString &affixPattern, UErrorCode &status) {
215
0
    if (affixPattern.length() == 0) {
216
0
        return false;
217
0
    }
218
0
    AffixTag tag;
219
0
    while (hasNext(tag, affixPattern)) {
220
0
        tag = nextToken(tag, affixPattern, status);
221
0
        if (U_FAILURE(status)) { return false; }
222
0
        if (tag.type < 0 && getFieldForType(tag.type) == UNUM_CURRENCY_FIELD) {
223
0
            return true;
224
0
        }
225
0
    }
226
0
    return false;
227
0
}
228
229
UnicodeString AffixUtils::replaceType(const UnicodeString &affixPattern, AffixPatternType type,
230
0
                                      char16_t replacementChar, UErrorCode &status) {
231
0
    UnicodeString output(affixPattern); // copy
232
0
    if (affixPattern.length() == 0) {
233
0
        return output;
234
0
    };
235
0
    AffixTag tag;
236
0
    while (hasNext(tag, affixPattern)) {
237
0
        tag = nextToken(tag, affixPattern, status);
238
0
        if (U_FAILURE(status)) { return output; }
239
0
        if (tag.type == type) {
240
0
            output.replace(tag.offset - 1, 1, replacementChar);
241
0
        }
242
0
    }
243
0
    return output;
244
0
}
245
246
bool AffixUtils::containsOnlySymbolsAndIgnorables(const UnicodeString& affixPattern,
247
0
                                                  const UnicodeSet& ignorables, UErrorCode& status) {
248
0
    if (affixPattern.length() == 0) {
249
0
        return true;
250
0
    };
251
0
    AffixTag tag;
252
0
    while (hasNext(tag, affixPattern)) {
253
0
        tag = nextToken(tag, affixPattern, status);
254
0
        if (U_FAILURE(status)) { return false; }
255
0
        if (tag.type == TYPE_CODEPOINT && !ignorables.contains(tag.codePoint)) {
256
0
            return false;
257
0
        }
258
0
    }
259
0
    return true;
260
0
}
261
262
void AffixUtils::iterateWithConsumer(const UnicodeString& affixPattern, TokenConsumer& consumer,
263
0
                                     UErrorCode& status) {
264
0
    if (affixPattern.length() == 0) {
265
0
        return;
266
0
    };
267
0
    AffixTag tag;
268
0
    while (hasNext(tag, affixPattern)) {
269
0
        tag = nextToken(tag, affixPattern, status);
270
0
        if (U_FAILURE(status)) { return; }
271
0
        consumer.consumeToken(tag.type, tag.codePoint, status);
272
0
        if (U_FAILURE(status)) { return; }
273
0
    }
274
0
}
275
276
0
AffixTag AffixUtils::nextToken(AffixTag tag, const UnicodeString &patternString, UErrorCode &status) {
277
0
    int32_t offset = tag.offset;
278
0
    int32_t state = tag.state;
279
0
    for (; offset < patternString.length();) {
280
0
        UChar32 cp = patternString.char32At(offset);
281
0
        int32_t count = U16_LENGTH(cp);
282
0
283
0
        switch (state) {
284
0
            case STATE_BASE:
285
0
                switch (cp) {
286
0
                    case u'\'':
287
0
                        state = STATE_FIRST_QUOTE;
288
0
                        offset += count;
289
0
                        // continue to the next code point
290
0
                        break;
291
0
                    case u'-':
292
0
                        return makeTag(offset + count, TYPE_MINUS_SIGN, STATE_BASE, 0);
293
0
                    case u'+':
294
0
                        return makeTag(offset + count, TYPE_PLUS_SIGN, STATE_BASE, 0);
295
0
                    case u'%':
296
0
                        return makeTag(offset + count, TYPE_PERCENT, STATE_BASE, 0);
297
0
                    case u'‰':
298
0
                        return makeTag(offset + count, TYPE_PERMILLE, STATE_BASE, 0);
299
0
                    case u'¤':
300
0
                        state = STATE_FIRST_CURR;
301
0
                        offset += count;
302
0
                        // continue to the next code point
303
0
                        break;
304
0
                    default:
305
0
                        return makeTag(offset + count, TYPE_CODEPOINT, STATE_BASE, cp);
306
0
                }
307
0
                break;
308
0
            case STATE_FIRST_QUOTE:
309
0
                if (cp == u'\'') {
310
0
                    return makeTag(offset + count, TYPE_CODEPOINT, STATE_BASE, cp);
311
0
                } else {
312
0
                    return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_QUOTE, cp);
313
0
                }
314
0
            case STATE_INSIDE_QUOTE:
315
0
                if (cp == u'\'') {
316
0
                    state = STATE_AFTER_QUOTE;
317
0
                    offset += count;
318
0
                    // continue to the next code point
319
0
                    break;
320
0
                } else {
321
0
                    return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_QUOTE, cp);
322
0
                }
323
0
            case STATE_AFTER_QUOTE:
324
0
                if (cp == u'\'') {
325
0
                    return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_QUOTE, cp);
326
0
                } else {
327
0
                    state = STATE_BASE;
328
0
                    // re-evaluate this code point
329
0
                    break;
330
0
                }
331
0
            case STATE_FIRST_CURR:
332
0
                if (cp == u'¤') {
333
0
                    state = STATE_SECOND_CURR;
334
0
                    offset += count;
335
0
                    // continue to the next code point
336
0
                    break;
337
0
                } else {
338
0
                    return makeTag(offset, TYPE_CURRENCY_SINGLE, STATE_BASE, 0);
339
0
                }
340
0
            case STATE_SECOND_CURR:
341
0
                if (cp == u'¤') {
342
0
                    state = STATE_THIRD_CURR;
343
0
                    offset += count;
344
0
                    // continue to the next code point
345
0
                    break;
346
0
                } else {
347
0
                    return makeTag(offset, TYPE_CURRENCY_DOUBLE, STATE_BASE, 0);
348
0
                }
349
0
            case STATE_THIRD_CURR:
350
0
                if (cp == u'¤') {
351
0
                    state = STATE_FOURTH_CURR;
352
0
                    offset += count;
353
0
                    // continue to the next code point
354
0
                    break;
355
0
                } else {
356
0
                    return makeTag(offset, TYPE_CURRENCY_TRIPLE, STATE_BASE, 0);
357
0
                }
358
0
            case STATE_FOURTH_CURR:
359
0
                if (cp == u'¤') {
360
0
                    state = STATE_FIFTH_CURR;
361
0
                    offset += count;
362
0
                    // continue to the next code point
363
0
                    break;
364
0
                } else {
365
0
                    return makeTag(offset, TYPE_CURRENCY_QUAD, STATE_BASE, 0);
366
0
                }
367
0
            case STATE_FIFTH_CURR:
368
0
                if (cp == u'¤') {
369
0
                    state = STATE_OVERFLOW_CURR;
370
0
                    offset += count;
371
0
                    // continue to the next code point
372
0
                    break;
373
0
                } else {
374
0
                    return makeTag(offset, TYPE_CURRENCY_QUINT, STATE_BASE, 0);
375
0
                }
376
0
            case STATE_OVERFLOW_CURR:
377
0
                if (cp == u'¤') {
378
0
                    offset += count;
379
0
                    // continue to the next code point and loop back to this state
380
0
                    break;
381
0
                } else {
382
0
                    return makeTag(offset, TYPE_CURRENCY_OVERFLOW, STATE_BASE, 0);
383
0
                }
384
0
            default:
385
0
                U_ASSERT(false);
386
0
        }
387
0
    }
388
0
    // End of string
389
0
    switch (state) {
390
0
        case STATE_BASE:
391
0
            // No more tokens in string.
392
0
            return {-1};
393
0
        case STATE_FIRST_QUOTE:
394
0
        case STATE_INSIDE_QUOTE:
395
0
            // For consistent behavior with the JDK and ICU 58, set an error here.
396
0
            status = U_ILLEGAL_ARGUMENT_ERROR;
397
0
            return {-1};
398
0
        case STATE_AFTER_QUOTE:
399
0
            // No more tokens in string.
400
0
            return {-1};
401
0
        case STATE_FIRST_CURR:
402
0
            return makeTag(offset, TYPE_CURRENCY_SINGLE, STATE_BASE, 0);
403
0
        case STATE_SECOND_CURR:
404
0
            return makeTag(offset, TYPE_CURRENCY_DOUBLE, STATE_BASE, 0);
405
0
        case STATE_THIRD_CURR:
406
0
            return makeTag(offset, TYPE_CURRENCY_TRIPLE, STATE_BASE, 0);
407
0
        case STATE_FOURTH_CURR:
408
0
            return makeTag(offset, TYPE_CURRENCY_QUAD, STATE_BASE, 0);
409
0
        case STATE_FIFTH_CURR:
410
0
            return makeTag(offset, TYPE_CURRENCY_QUINT, STATE_BASE, 0);
411
0
        case STATE_OVERFLOW_CURR:
412
0
            return makeTag(offset, TYPE_CURRENCY_OVERFLOW, STATE_BASE, 0);
413
0
        default:
414
0
            U_ASSERT(false);
415
0
            return {-1}; // suppress "control reaches end of non-void function"
416
0
    }
417
0
}
418
419
0
bool AffixUtils::hasNext(const AffixTag &tag, const UnicodeString &string) {
420
0
    // First check for the {-1} and default initializer syntax.
421
0
    if (tag.offset < 0) {
422
0
        return false;
423
0
    } else if (tag.offset == 0) {
424
0
        return string.length() > 0;
425
0
    }
426
0
    // The rest of the fields are safe to use now.
427
0
    // Special case: the last character in string is an end quote.
428
0
    if (tag.state == STATE_INSIDE_QUOTE && tag.offset == string.length() - 1 &&
429
0
        string.charAt(tag.offset) == u'\'') {
430
0
        return false;
431
0
    } else if (tag.state != STATE_BASE) {
432
0
        return true;
433
0
    } else {
434
0
        return tag.offset < string.length();
435
0
    }
436
0
}
437
438
#endif /* #if !UCONFIG_NO_FORMATTING */