Coverage Report

Created: 2025-11-07 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/common/messagepattern.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*   Copyright (C) 2011-2012, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
*   file name:  messagepattern.cpp
9
*   encoding:   UTF-8
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
*   created on: 2011mar14
14
*   created by: Markus W. Scherer
15
*/
16
17
#include "unicode/utypes.h"
18
19
#if !UCONFIG_NO_FORMATTING
20
21
#include "unicode/messagepattern.h"
22
#include "unicode/unistr.h"
23
#include "unicode/utf16.h"
24
#include "cmemory.h"
25
#include "cstring.h"
26
#include "messageimpl.h"
27
#include "patternprops.h"
28
#include "putilimp.h"
29
#include "uassert.h"
30
31
U_NAMESPACE_BEGIN
32
33
// Unicode character/code point constants ---------------------------------- ***
34
35
static const char16_t u_pound=0x23;
36
static const char16_t u_apos=0x27;
37
static const char16_t u_plus=0x2B;
38
static const char16_t u_comma=0x2C;
39
static const char16_t u_minus=0x2D;
40
static const char16_t u_dot=0x2E;
41
static const char16_t u_colon=0x3A;
42
static const char16_t u_lessThan=0x3C;
43
static const char16_t u_equal=0x3D;
44
static const char16_t u_A=0x41;
45
static const char16_t u_C=0x43;
46
static const char16_t u_D=0x44;
47
static const char16_t u_E=0x45;
48
static const char16_t u_H=0x48;
49
static const char16_t u_I=0x49;
50
static const char16_t u_L=0x4C;
51
static const char16_t u_N=0x4E;
52
static const char16_t u_O=0x4F;
53
static const char16_t u_P=0x50;
54
static const char16_t u_R=0x52;
55
static const char16_t u_S=0x53;
56
static const char16_t u_T=0x54;
57
static const char16_t u_U=0x55;
58
static const char16_t u_Z=0x5A;
59
static const char16_t u_a=0x61;
60
static const char16_t u_c=0x63;
61
static const char16_t u_d=0x64;
62
static const char16_t u_e=0x65;
63
static const char16_t u_f=0x66;
64
static const char16_t u_h=0x68;
65
static const char16_t u_i=0x69;
66
static const char16_t u_l=0x6C;
67
static const char16_t u_n=0x6E;
68
static const char16_t u_o=0x6F;
69
static const char16_t u_p=0x70;
70
static const char16_t u_r=0x72;
71
static const char16_t u_s=0x73;
72
static const char16_t u_t=0x74;
73
static const char16_t u_u=0x75;
74
static const char16_t u_z=0x7A;
75
static const char16_t u_leftCurlyBrace=0x7B;
76
static const char16_t u_pipe=0x7C;
77
static const char16_t u_rightCurlyBrace=0x7D;
78
static const char16_t u_lessOrEqual=0x2264;  // U+2264 is <=
79
80
static const char16_t kOffsetColon[]={  // "offset:"
81
    u_o, u_f, u_f, u_s, u_e, u_t, u_colon
82
};
83
84
static const char16_t kOther[]={  // "other"
85
    u_o, u_t, u_h, u_e, u_r
86
};
87
88
// MessagePatternList ------------------------------------------------------ ***
89
90
template<typename T, int32_t stackCapacity>
91
class MessagePatternList : public UMemory {
92
public:
93
47.5k
    MessagePatternList() {}
icu_79::MessagePatternList<icu_79::MessagePattern::Part, 32>::MessagePatternList()
Line
Count
Source
93
44.7k
    MessagePatternList() {}
icu_79::MessagePatternList<double, 8>::MessagePatternList()
Line
Count
Source
93
2.73k
    MessagePatternList() {}
94
    void copyFrom(const MessagePatternList<T, stackCapacity> &other,
95
                  int32_t length,
96
                  UErrorCode &errorCode);
97
    UBool ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode);
98
0
    UBool equals(const MessagePatternList<T, stackCapacity> &other, int32_t length) const {
99
0
        for(int32_t i=0; i<length; ++i) {
100
0
            if(a[i]!=other.a[i]) { return false; }
101
0
        }
102
0
        return true;
103
0
    }
104
105
    MaybeStackArray<T, stackCapacity> a;
106
};
107
108
template<typename T, int32_t stackCapacity>
109
void
110
MessagePatternList<T, stackCapacity>::copyFrom(
111
        const MessagePatternList<T, stackCapacity> &other,
112
        int32_t length,
113
0
        UErrorCode &errorCode) {
114
0
    if(U_SUCCESS(errorCode) && length>0) {
115
0
        if(length>a.getCapacity() && nullptr==a.resize(length)) {
116
0
            errorCode=U_MEMORY_ALLOCATION_ERROR;
117
0
            return;
118
0
        }
119
0
        uprv_memcpy(a.getAlias(), other.a.getAlias(), (size_t)length*sizeof(T));
120
0
    }
121
0
}
Unexecuted instantiation: icu_79::MessagePatternList<icu_79::MessagePattern::Part, 32>::copyFrom(icu_79::MessagePatternList<icu_79::MessagePattern::Part, 32> const&, int, UErrorCode&)
Unexecuted instantiation: icu_79::MessagePatternList<double, 8>::copyFrom(icu_79::MessagePatternList<double, 8> const&, int, UErrorCode&)
122
123
template<typename T, int32_t stackCapacity>
124
UBool
125
7.69M
MessagePatternList<T, stackCapacity>::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) {
126
7.69M
    if(U_FAILURE(errorCode)) {
127
72.2k
        return false;
128
72.2k
    }
129
7.62M
    if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=nullptr) {
130
7.62M
        return true;
131
7.62M
    }
132
0
    errorCode=U_MEMORY_ALLOCATION_ERROR;
133
0
    return false;
134
7.62M
}
icu_79::MessagePatternList<icu_79::MessagePattern::Part, 32>::ensureCapacityForOneMore(int, UErrorCode&)
Line
Count
Source
125
7.30M
MessagePatternList<T, stackCapacity>::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) {
126
7.30M
    if(U_FAILURE(errorCode)) {
127
72.2k
        return false;
128
72.2k
    }
129
7.22M
    if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=nullptr) {
130
7.22M
        return true;
131
7.22M
    }
132
0
    errorCode=U_MEMORY_ALLOCATION_ERROR;
133
0
    return false;
134
7.22M
}
icu_79::MessagePatternList<double, 8>::ensureCapacityForOneMore(int, UErrorCode&)
Line
Count
Source
125
397k
MessagePatternList<T, stackCapacity>::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) {
126
397k
    if(U_FAILURE(errorCode)) {
127
0
        return false;
128
0
    }
129
397k
    if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=nullptr) {
130
397k
        return true;
131
397k
    }
132
0
    errorCode=U_MEMORY_ALLOCATION_ERROR;
133
0
    return false;
134
397k
}
135
136
// MessagePatternList specializations -------------------------------------- ***
137
138
class MessagePatternDoubleList : public MessagePatternList<double, 8> {
139
};
140
141
class MessagePatternPartsList : public MessagePatternList<MessagePattern::Part, 32> {
142
};
143
144
// MessagePattern constructors etc. ---------------------------------------- ***
145
146
MessagePattern::MessagePattern(UErrorCode &errorCode)
147
37.6k
        : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE),
148
37.6k
          partsList(nullptr), parts(nullptr), partsLength(0),
149
37.6k
          numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
150
37.6k
          hasArgNames(false), hasArgNumbers(false), needsAutoQuoting(false) {
151
37.6k
    init(errorCode);
152
37.6k
}
153
154
MessagePattern::MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode)
155
0
        : aposMode(mode),
156
0
          partsList(nullptr), parts(nullptr), partsLength(0),
157
0
          numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
158
0
          hasArgNames(false), hasArgNumbers(false), needsAutoQuoting(false) {
159
0
    init(errorCode);
160
0
}
161
162
MessagePattern::MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
163
7.12k
        : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE),
164
7.12k
          partsList(nullptr), parts(nullptr), partsLength(0),
165
7.12k
          numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
166
7.12k
          hasArgNames(false), hasArgNumbers(false), needsAutoQuoting(false) {
167
7.12k
    if(init(errorCode)) {
168
7.12k
        parse(pattern, parseError, errorCode);
169
7.12k
    }
170
7.12k
}
171
172
UBool
173
44.7k
MessagePattern::init(UErrorCode &errorCode) {
174
44.7k
    if(U_FAILURE(errorCode)) {
175
0
        return false;
176
0
    }
177
44.7k
    partsList=new MessagePatternPartsList();
178
44.7k
    if(partsList==nullptr) {
179
0
        errorCode=U_MEMORY_ALLOCATION_ERROR;
180
0
        return false;
181
0
    }
182
44.7k
    parts=partsList->a.getAlias();
183
44.7k
    return true;
184
44.7k
}
185
186
MessagePattern::MessagePattern(const MessagePattern &other)
187
0
        : UObject(other), aposMode(other.aposMode), msg(other.msg),
188
0
          partsList(nullptr), parts(nullptr), partsLength(0),
189
0
          numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
190
0
          hasArgNames(other.hasArgNames), hasArgNumbers(other.hasArgNumbers),
191
0
          needsAutoQuoting(other.needsAutoQuoting) {
192
0
    UErrorCode errorCode=U_ZERO_ERROR;
193
0
    if(!copyStorage(other, errorCode)) {
194
0
        clear();
195
0
    }
196
0
}
197
198
MessagePattern &
199
0
MessagePattern::operator=(const MessagePattern &other) {
200
0
    if(this==&other) {
201
0
        return *this;
202
0
    }
203
0
    aposMode=other.aposMode;
204
0
    msg=other.msg;
205
0
    hasArgNames=other.hasArgNames;
206
0
    hasArgNumbers=other.hasArgNumbers;
207
0
    needsAutoQuoting=other.needsAutoQuoting;
208
0
    UErrorCode errorCode=U_ZERO_ERROR;
209
0
    if(!copyStorage(other, errorCode)) {
210
0
        clear();
211
0
    }
212
0
    return *this;
213
0
}
214
215
UBool
216
0
MessagePattern::copyStorage(const MessagePattern &other, UErrorCode &errorCode) {
217
0
    if(U_FAILURE(errorCode)) {
218
0
        return false;
219
0
    }
220
0
    parts=nullptr;
221
0
    partsLength=0;
222
0
    numericValues=nullptr;
223
0
    numericValuesLength=0;
224
0
    if(partsList==nullptr) {
225
0
        partsList=new MessagePatternPartsList();
226
0
        if(partsList==nullptr) {
227
0
            errorCode=U_MEMORY_ALLOCATION_ERROR;
228
0
            return false;
229
0
        }
230
0
        parts=partsList->a.getAlias();
231
0
    }
232
0
    if(other.partsLength>0) {
233
0
        partsList->copyFrom(*other.partsList, other.partsLength, errorCode);
234
0
        if(U_FAILURE(errorCode)) {
235
0
            return false;
236
0
        }
237
0
        parts=partsList->a.getAlias();
238
0
        partsLength=other.partsLength;
239
0
    }
240
0
    if(other.numericValuesLength>0) {
241
0
        if(numericValuesList==nullptr) {
242
0
            numericValuesList=new MessagePatternDoubleList();
243
0
            if(numericValuesList==nullptr) {
244
0
                errorCode=U_MEMORY_ALLOCATION_ERROR;
245
0
                return false;
246
0
            }
247
0
            numericValues=numericValuesList->a.getAlias();
248
0
        }
249
0
        numericValuesList->copyFrom(
250
0
            *other.numericValuesList, other.numericValuesLength, errorCode);
251
0
        if(U_FAILURE(errorCode)) {
252
0
            return false;
253
0
        }
254
0
        numericValues=numericValuesList->a.getAlias();
255
0
        numericValuesLength=other.numericValuesLength;
256
0
    }
257
0
    return true;
258
0
}
259
260
44.7k
MessagePattern::~MessagePattern() {
261
44.7k
    delete partsList;
262
44.7k
    delete numericValuesList;
263
44.7k
}
264
265
// MessagePattern API ------------------------------------------------------ ***
266
267
MessagePattern &
268
14.2k
MessagePattern::parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) {
269
14.2k
    preParse(pattern, parseError, errorCode);
270
14.2k
    parseMessage(0, 0, 0, UMSGPAT_ARG_TYPE_NONE, parseError, errorCode);
271
14.2k
    postParse();
272
14.2k
    return *this;
273
14.2k
}
274
275
MessagePattern &
276
MessagePattern::parseChoiceStyle(const UnicodeString &pattern,
277
9.83k
                                 UParseError *parseError, UErrorCode &errorCode) {
278
9.83k
    preParse(pattern, parseError, errorCode);
279
9.83k
    parseChoiceStyle(0, 0, parseError, errorCode);
280
9.83k
    postParse();
281
9.83k
    return *this;
282
9.83k
}
283
284
MessagePattern &
285
MessagePattern::parsePluralStyle(const UnicodeString &pattern,
286
20.7k
                                 UParseError *parseError, UErrorCode &errorCode) {
287
20.7k
    preParse(pattern, parseError, errorCode);
288
20.7k
    parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_PLURAL, 0, 0, parseError, errorCode);
289
20.7k
    postParse();
290
20.7k
    return *this;
291
20.7k
}
292
293
MessagePattern &
294
MessagePattern::parseSelectStyle(const UnicodeString &pattern,
295
0
                                 UParseError *parseError, UErrorCode &errorCode) {
296
0
    preParse(pattern, parseError, errorCode);
297
0
    parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_SELECT, 0, 0, parseError, errorCode);
298
0
    postParse();
299
0
    return *this;
300
0
}
301
302
void
303
3.27k
MessagePattern::clear() {
304
    // Mostly the same as preParse().
305
3.27k
    msg.remove();
306
3.27k
    hasArgNames=hasArgNumbers=false;
307
3.27k
    needsAutoQuoting=false;
308
3.27k
    partsLength=0;
309
3.27k
    numericValuesLength=0;
310
3.27k
}
311
312
bool
313
0
MessagePattern::operator==(const MessagePattern &other) const {
314
0
    if(this==&other) {
315
0
        return true;
316
0
    }
317
0
    return
318
0
        aposMode==other.aposMode &&
319
0
        msg==other.msg &&
320
        // parts.equals(o.parts)
321
0
        partsLength==other.partsLength &&
322
0
        (partsLength==0 || partsList->equals(*other.partsList, partsLength));
323
    // No need to compare numericValues if msg and parts are the same.
324
0
}
325
326
int32_t
327
0
MessagePattern::hashCode() const {
328
0
    int32_t hash=(aposMode*37+msg.hashCode())*37+partsLength;
329
0
    for(int32_t i=0; i<partsLength; ++i) {
330
0
        hash=hash*37+parts[i].hashCode();
331
0
    }
332
0
    return hash;
333
0
}
334
335
int32_t
336
0
MessagePattern::validateArgumentName(const UnicodeString &name) {
337
0
    if(!PatternProps::isIdentifier(name.getBuffer(), name.length())) {
338
0
        return UMSGPAT_ARG_NAME_NOT_VALID;
339
0
    }
340
0
    return parseArgNumber(name, 0, name.length());
341
0
}
342
343
UnicodeString
344
0
MessagePattern::autoQuoteApostropheDeep() const {
345
0
    if(!needsAutoQuoting) {
346
0
        return msg;
347
0
    }
348
0
    UnicodeString modified(msg);
349
    // Iterate backward so that the insertion indexes do not change.
350
0
    int32_t count=countParts();
351
0
    for(int32_t i=count; i>0;) {
352
0
        const Part &part=getPart(--i);
353
0
        if(part.getType()==UMSGPAT_PART_TYPE_INSERT_CHAR) {
354
0
           modified.insert(part.index, static_cast<char16_t>(part.value));
355
0
        }
356
0
    }
357
0
    return modified;
358
0
}
359
360
double
361
9.51k
MessagePattern::getNumericValue(const Part &part) const {
362
9.51k
    UMessagePatternPartType type=part.type;
363
9.51k
    if(type==UMSGPAT_PART_TYPE_ARG_INT) {
364
8.72k
        return part.value;
365
8.72k
    } else if(type==UMSGPAT_PART_TYPE_ARG_DOUBLE) {
366
786
        return numericValues[part.value];
367
786
    } else {
368
0
        return UMSGPAT_NO_NUMERIC_VALUE;
369
0
    }
370
9.51k
}
371
372
/**
373
  * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
374
  * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
375
  * @return the "offset:" value.
376
  * @draft ICU 4.8
377
  */
378
double
379
20.7k
MessagePattern::getPluralOffset(int32_t pluralStart) const {
380
20.7k
    const Part &part=getPart(pluralStart);
381
20.7k
    if(Part::hasNumericValue(part.type)) {
382
0
        return getNumericValue(part);
383
20.7k
    } else {
384
20.7k
        return 0;
385
20.7k
    }
386
20.7k
}
387
388
// MessagePattern::Part ---------------------------------------------------- ***
389
390
bool
391
0
MessagePattern::Part::operator==(const Part &other) const {
392
0
    if(this==&other) {
393
0
        return true;
394
0
    }
395
0
    return
396
0
        type==other.type &&
397
0
        index==other.index &&
398
0
        length==other.length &&
399
0
        value==other.value &&
400
0
        limitPartIndex==other.limitPartIndex;
401
0
}
402
403
// MessagePattern parser --------------------------------------------------- ***
404
405
void
406
44.7k
MessagePattern::preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) {
407
44.7k
    if(U_FAILURE(errorCode)) {
408
0
        return;
409
0
    }
410
44.7k
    if(parseError!=nullptr) {
411
14.2k
        parseError->line=0;
412
14.2k
        parseError->offset=0;
413
14.2k
        parseError->preContext[0]=0;
414
14.2k
        parseError->postContext[0]=0;
415
14.2k
    }
416
44.7k
    msg=pattern;
417
44.7k
    hasArgNames=hasArgNumbers=false;
418
44.7k
    needsAutoQuoting=false;
419
44.7k
    partsLength=0;
420
44.7k
    numericValuesLength=0;
421
44.7k
}
422
423
void
424
44.7k
MessagePattern::postParse() {
425
44.7k
    if(partsList!=nullptr) {
426
44.7k
        parts=partsList->a.getAlias();
427
44.7k
    }
428
44.7k
    if(numericValuesList!=nullptr) {
429
2.73k
        numericValues=numericValuesList->a.getAlias();
430
2.73k
    }
431
44.7k
}
432
433
int32_t
434
MessagePattern::parseMessage(int32_t index, int32_t msgStartLength,
435
                             int32_t nestingLevel, UMessagePatternArgType parentType,
436
639k
                             UParseError *parseError, UErrorCode &errorCode) {
437
639k
    if(U_FAILURE(errorCode)) {
438
0
        return 0;
439
0
    }
440
639k
    if(nestingLevel>Part::MAX_NESTED_LEVELS) {
441
2
        errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
442
2
        return 0;
443
2
    }
444
639k
    int32_t msgStart=partsLength;
445
639k
    addPart(UMSGPAT_PART_TYPE_MSG_START, index, msgStartLength, nestingLevel, errorCode);
446
639k
    index+=msgStartLength;
447
100M
    for(;;) {  // while(index<msg.length()) with U_FAILURE(errorCode) check
448
100M
        if(U_FAILURE(errorCode)) {
449
76.6k
            return 0;
450
76.6k
        }
451
100M
        if(index>=msg.length()) {
452
9.25k
            break;
453
9.25k
        }
454
100M
        char16_t c=msg.charAt(index++);
455
100M
        if(c==u_apos) {
456
1.32M
            if(index==msg.length()) {
457
                // The apostrophe is the last character in the pattern. 
458
                // Add a Part for auto-quoting.
459
48
                addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
460
48
                        u_apos, errorCode);  // value=char to be inserted
461
48
                needsAutoQuoting=true;
462
1.32M
            } else {
463
1.32M
                c=msg.charAt(index);
464
1.32M
                if(c==u_apos) {
465
                    // double apostrophe, skip the second one
466
511k
                    addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode);
467
811k
                } else if(
468
811k
                    aposMode==UMSGPAT_APOS_DOUBLE_REQUIRED ||
469
811k
                    c==u_leftCurlyBrace || c==u_rightCurlyBrace ||
470
743k
                    (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe) ||
471
742k
                    (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound)
472
811k
                ) {
473
                    // skip the quote-starting apostrophe
474
76.4k
                    addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index-1, 1, 0, errorCode);
475
                    // find the end of the quoted literal text
476
315k
                    for(;;) {
477
315k
                        index=msg.indexOf(u_apos, index+1);
478
315k
                        if(index>=0) {
479
315k
                            if(/*(index+1)<msg.length() &&*/ msg.charAt(index+1)==u_apos) {
480
                                // double apostrophe inside quoted literal text
481
                                // still encodes a single apostrophe, skip the second one
482
239k
                                addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, ++index, 1, 0, errorCode);
483
239k
                            } else {
484
                                // skip the quote-ending apostrophe
485
76.2k
                                addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode);
486
76.2k
                                break;
487
76.2k
                            }
488
315k
                        } else {
489
                            // The quoted text reaches to the end of the of the message.
490
239
                            index=msg.length();
491
                            // Add a Part for auto-quoting.
492
239
                            addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
493
239
                                    u_apos, errorCode);  // value=char to be inserted
494
239
                            needsAutoQuoting=true;
495
239
                            break;
496
239
                        }
497
315k
                    }
498
735k
                } else {
499
                    // Interpret the apostrophe as literal text.
500
                    // Add a Part for auto-quoting.
501
735k
                    addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
502
735k
                            u_apos, errorCode);  // value=char to be inserted
503
735k
                    needsAutoQuoting=true;
504
735k
                }
505
1.32M
            }
506
99.0M
        } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) {
507
            // The unquoted # in a plural message fragment will be replaced
508
            // with the (number-offset).
509
7.02k
            addPart(UMSGPAT_PART_TYPE_REPLACE_NUMBER, index-1, 1, 0, errorCode);
510
99.0M
        } else if(c==u_leftCurlyBrace) {
511
1.02M
            index=parseArg(index-1, 1, nestingLevel, parseError, errorCode);
512
97.9M
        } else if((nestingLevel>0 && c==u_rightCurlyBrace) ||
513
97.8M
                  (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe)) {
514
            // Finish the message before the terminator.
515
            // In a choice style, report the "}" substring only for the following ARG_LIMIT,
516
            // not for this MSG_LIMIT.
517
553k
            int32_t limitLength=(parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_rightCurlyBrace) ? 0 : 1;
518
553k
            addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index-1, limitLength,
519
553k
                         nestingLevel, errorCode);
520
553k
            if(parentType==UMSGPAT_ARG_TYPE_CHOICE) {
521
                // Let the choice style parser see the '}' or '|'.
522
469k
                return index-1;
523
469k
            } else {
524
                // continue parsing after the '}'
525
84.8k
                return index;
526
84.8k
            }
527
553k
        }  // else: c is part of literal text
528
100M
    }
529
9.25k
    if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) {
530
419
        setParseError(parseError, 0);  // Unmatched '{' braces in message.
531
419
        errorCode=U_UNMATCHED_BRACES;
532
419
        return 0;
533
419
    }
534
8.83k
    addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index, 0, nestingLevel, errorCode);
535
8.83k
    return index;
536
9.25k
}
537
538
int32_t
539
MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
540
1.02M
                         UParseError *parseError, UErrorCode &errorCode) {
541
1.02M
    int32_t argStart=partsLength;
542
1.02M
    UMessagePatternArgType argType=UMSGPAT_ARG_TYPE_NONE;
543
1.02M
    addPart(UMSGPAT_PART_TYPE_ARG_START, index, argStartLength, argType, errorCode);
544
1.02M
    if(U_FAILURE(errorCode)) {
545
0
        return 0;
546
0
    }
547
1.02M
    int32_t nameIndex=index=skipWhiteSpace(index+argStartLength);
548
1.02M
    if(index==msg.length()) {
549
220
        setParseError(parseError, 0);  // Unmatched '{' braces in message.
550
220
        errorCode=U_UNMATCHED_BRACES;
551
220
        return 0;
552
220
    }
553
    // parse argument name or number
554
1.02M
    index=skipIdentifier(index);
555
1.02M
    int32_t number=parseArgNumber(nameIndex, index);
556
1.02M
    if(number>=0) {
557
714k
        int32_t length=index-nameIndex;
558
714k
        if(length>Part::MAX_LENGTH || number>Part::MAX_VALUE) {
559
84
            setParseError(parseError, nameIndex);  // Argument number too large.
560
84
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
561
84
            return 0;
562
84
        }
563
714k
        hasArgNumbers=true;
564
714k
        addPart(UMSGPAT_PART_TYPE_ARG_NUMBER, nameIndex, length, number, errorCode);
565
714k
    } else if(number==UMSGPAT_ARG_NAME_NOT_NUMBER) {
566
307k
        int32_t length=index-nameIndex;
567
307k
        if(length>Part::MAX_LENGTH) {
568
3
            setParseError(parseError, nameIndex);  // Argument name too long.
569
3
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
570
3
            return 0;
571
3
        }
572
307k
        hasArgNames=true;
573
307k
        addPart(UMSGPAT_PART_TYPE_ARG_NAME, nameIndex, length, 0, errorCode);
574
307k
    } else {  // number<-1 (ARG_NAME_NOT_VALID)
575
2.18k
        setParseError(parseError, nameIndex);  // Bad argument syntax.
576
2.18k
        errorCode=U_PATTERN_SYNTAX_ERROR;
577
2.18k
        return 0;
578
2.18k
    }
579
1.02M
    index=skipWhiteSpace(index);
580
1.02M
    if(index==msg.length()) {
581
777
        setParseError(parseError, 0);  // Unmatched '{' braces in message.
582
777
        errorCode=U_UNMATCHED_BRACES;
583
777
        return 0;
584
777
    }
585
1.02M
    char16_t c=msg.charAt(index);
586
1.02M
    if(c==u_rightCurlyBrace) {
587
        // all done
588
767k
    } else if(c!=u_comma) {
589
521
        setParseError(parseError, nameIndex);  // Bad argument syntax.
590
521
        errorCode=U_PATTERN_SYNTAX_ERROR;
591
521
        return 0;
592
253k
    } else /* ',' */ {
593
        // parse argument type: case-sensitive a-zA-Z
594
253k
        int32_t typeIndex=index=skipWhiteSpace(index+1);
595
3.83M
        while(index<msg.length() && isArgTypeChar(msg.charAt(index))) {
596
3.58M
            ++index;
597
3.58M
        }
598
253k
        int32_t length=index-typeIndex;
599
253k
        index=skipWhiteSpace(index);
600
253k
        if(index==msg.length()) {
601
109
            setParseError(parseError, 0);  // Unmatched '{' braces in message.
602
109
            errorCode=U_UNMATCHED_BRACES;
603
109
            return 0;
604
109
        }
605
253k
        if(length==0 || ((c=msg.charAt(index))!=u_comma && c!=u_rightCurlyBrace)) {
606
440
            setParseError(parseError, nameIndex);  // Bad argument syntax.
607
440
            errorCode=U_PATTERN_SYNTAX_ERROR;
608
440
            return 0;
609
440
        }
610
252k
        if(length>Part::MAX_LENGTH) {
611
4
            setParseError(parseError, nameIndex);  // Argument type name too long.
612
4
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
613
4
            return 0;
614
4
        }
615
252k
        argType=UMSGPAT_ARG_TYPE_SIMPLE;
616
252k
        if(length==6) {
617
            // case-insensitive comparisons for complex-type names
618
90.5k
            if(isChoice(typeIndex)) {
619
25.2k
                argType=UMSGPAT_ARG_TYPE_CHOICE;
620
65.3k
            } else if(isPlural(typeIndex)) {
621
44.9k
                argType=UMSGPAT_ARG_TYPE_PLURAL;
622
44.9k
            } else if(isSelect(typeIndex)) {
623
8.72k
                argType=UMSGPAT_ARG_TYPE_SELECT;
624
8.72k
            }
625
162k
        } else if(length==13) {
626
9.04k
            if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) {
627
824
                argType=UMSGPAT_ARG_TYPE_SELECTORDINAL;
628
824
            }
629
9.04k
        }
630
        // change the ARG_START type from NONE to argType
631
252k
        partsList->a[argStart].value = static_cast<int16_t>(argType);
632
252k
        if(argType==UMSGPAT_ARG_TYPE_SIMPLE) {
633
173k
            addPart(UMSGPAT_PART_TYPE_ARG_TYPE, typeIndex, length, 0, errorCode);
634
173k
        }
635
        // look for an argument style (pattern)
636
252k
        if(c==u_rightCurlyBrace) {
637
76.7k
            if(argType!=UMSGPAT_ARG_TYPE_SIMPLE) {
638
6
                setParseError(parseError, nameIndex);  // No style field for complex argument.
639
6
                errorCode=U_PATTERN_SYNTAX_ERROR;
640
6
                return 0;
641
6
            }
642
175k
        } else /* ',' */ {
643
175k
            ++index;
644
175k
            if(argType==UMSGPAT_ARG_TYPE_SIMPLE) {
645
96.2k
                index=parseSimpleStyle(index, parseError, errorCode);
646
96.2k
            } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) {
647
25.2k
                index=parseChoiceStyle(index, nestingLevel, parseError, errorCode);
648
54.4k
            } else {
649
54.4k
                index=parsePluralOrSelectStyle(argType, index, nestingLevel, parseError, errorCode);
650
54.4k
            }
651
175k
        }
652
252k
    }
653
    // Argument parsing stopped on the '}'.
654
1.01M
    addLimitPart(argStart, UMSGPAT_PART_TYPE_ARG_LIMIT, index, 1, argType, errorCode);
655
1.01M
    return index+1;
656
1.02M
}
657
658
int32_t
659
96.2k
MessagePattern::parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode) {
660
96.2k
    if(U_FAILURE(errorCode)) {
661
0
        return 0;
662
0
    }
663
96.2k
    int32_t start=index;
664
96.2k
    int32_t nestedBraces=0;
665
28.1M
    while(index<msg.length()) {
666
28.1M
        char16_t c=msg.charAt(index++);
667
28.1M
        if(c==u_apos) {
668
            // Treat apostrophe as quoting but include it in the style part.
669
            // Find the end of the quoted literal text.
670
130k
            index=msg.indexOf(u_apos, index);
671
130k
            if(index<0) {
672
                // Quoted literal argument style text reaches to the end of the message.
673
52
                setParseError(parseError, start);
674
52
                errorCode=U_PATTERN_SYNTAX_ERROR;
675
52
                return 0;
676
52
            }
677
            // skip the quote-ending apostrophe
678
130k
            ++index;
679
28.0M
        } else if(c==u_leftCurlyBrace) {
680
16.8k
            ++nestedBraces;
681
28.0M
        } else if(c==u_rightCurlyBrace) {
682
107k
            if(nestedBraces>0) {
683
11.5k
                --nestedBraces;
684
95.6k
            } else {
685
95.6k
                int32_t length=--index-start;
686
95.6k
                if(length>Part::MAX_LENGTH) {
687
3
                    setParseError(parseError, start);  // Argument style text too long.
688
3
                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
689
3
                    return 0;
690
3
                }
691
95.6k
                addPart(UMSGPAT_PART_TYPE_ARG_STYLE, start, length, 0, errorCode);
692
95.6k
                return index;
693
95.6k
            }
694
107k
        }  // c is part of literal text
695
28.1M
    }
696
507
    setParseError(parseError, 0);  // Unmatched '{' braces in message.
697
507
    errorCode=U_UNMATCHED_BRACES;
698
507
    return 0;
699
96.2k
}
700
701
int32_t
702
MessagePattern::parseChoiceStyle(int32_t index, int32_t nestingLevel,
703
35.0k
                                 UParseError *parseError, UErrorCode &errorCode) {
704
35.0k
    if(U_FAILURE(errorCode)) {
705
0
        return 0;
706
0
    }
707
35.0k
    int32_t start=index;
708
35.0k
    index=skipWhiteSpace(index);
709
35.0k
    if(index==msg.length() || msg.charAt(index)==u_rightCurlyBrace) {
710
232
        setParseError(parseError, 0);  // Missing choice argument pattern.
711
232
        errorCode=U_PATTERN_SYNTAX_ERROR;
712
232
        return 0;
713
232
    }
714
495k
    for(;;) {
715
        // The choice argument style contains |-separated (number, separator, message) triples.
716
        // Parse the number.
717
495k
        int32_t numberIndex=index;
718
495k
        index=skipDouble(index);
719
495k
        int32_t length=index-numberIndex;
720
495k
        if(length==0) {
721
5.05k
            setParseError(parseError, start);  // Bad choice pattern syntax.
722
5.05k
            errorCode=U_PATTERN_SYNTAX_ERROR;
723
5.05k
            return 0;
724
5.05k
        }
725
490k
        if(length>Part::MAX_LENGTH) {
726
6
            setParseError(parseError, numberIndex);  // Choice number too long.
727
6
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
728
6
            return 0;
729
6
        }
730
490k
        parseDouble(numberIndex, index, true, parseError, errorCode);  // adds ARG_INT or ARG_DOUBLE
731
490k
        if(U_FAILURE(errorCode)) {
732
951
            return 0;
733
951
        }
734
        // Parse the separator.
735
489k
        index=skipWhiteSpace(index);
736
489k
        if(index==msg.length()) {
737
1.07k
            setParseError(parseError, start);  // Bad choice pattern syntax.
738
1.07k
            errorCode=U_PATTERN_SYNTAX_ERROR;
739
1.07k
            return 0;
740
1.07k
        }
741
488k
        char16_t c=msg.charAt(index);
742
488k
        if(!(c==u_pound || c==u_lessThan || c==u_lessOrEqual)) {  // U+2264 is <=
743
1.84k
            setParseError(parseError, start);  // Expected choice separator (#<\u2264) instead of c.
744
1.84k
            errorCode=U_PATTERN_SYNTAX_ERROR;
745
1.84k
            return 0;
746
1.84k
        }
747
486k
        addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, index, 1, 0, errorCode);
748
        // Parse the message fragment.
749
486k
        index=parseMessage(++index, 0, nestingLevel+1, UMSGPAT_ARG_TYPE_CHOICE, parseError, errorCode);
750
486k
        if(U_FAILURE(errorCode)) {
751
17.3k
            return 0;
752
17.3k
        }
753
        // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length().
754
469k
        if(index==msg.length()) {
755
496
            return index;
756
496
        }
757
469k
        if(msg.charAt(index)==u_rightCurlyBrace) {
758
8.08k
            if(!inMessageFormatPattern(nestingLevel)) {
759
33
                setParseError(parseError, start);  // Bad choice pattern syntax.
760
33
                errorCode=U_PATTERN_SYNTAX_ERROR;
761
33
                return 0;
762
33
            }
763
8.05k
            return index;
764
8.08k
        }  // else the terminator is '|'
765
460k
        index=skipWhiteSpace(index+1);
766
460k
    }
767
34.8k
}
768
769
int32_t
770
MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType,
771
                                         int32_t index, int32_t nestingLevel,
772
75.1k
                                         UParseError *parseError, UErrorCode &errorCode) {
773
75.1k
    if(U_FAILURE(errorCode)) {
774
0
        return 0;
775
0
    }
776
75.1k
    int32_t start=index;
777
75.1k
    UBool isEmpty=true;
778
75.1k
    UBool hasOther=false;
779
160k
    for(;;) {
780
        // First, collect the selector looking for a small set of terminators.
781
        // It would be a little faster to consider the syntax of each possible
782
        // token right here, but that makes the code too complicated.
783
160k
        index=skipWhiteSpace(index);
784
160k
        UBool eos=index==msg.length();
785
160k
        if(eos || msg.charAt(index)==u_rightCurlyBrace) {
786
20.7k
            if(eos==inMessageFormatPattern(nestingLevel)) {
787
67
                setParseError(parseError, start);  // Bad plural/select pattern syntax.
788
67
                errorCode=U_PATTERN_SYNTAX_ERROR;
789
67
                return 0;
790
67
            }
791
20.7k
            if(!hasOther) {
792
5
                setParseError(parseError, 0);  // Missing 'other' keyword in plural/select pattern.
793
5
                errorCode=U_DEFAULT_KEYWORD_MISSING;
794
5
                return 0;
795
5
            }
796
20.7k
            return index;
797
20.7k
        }
798
139k
        int32_t selectorIndex=index;
799
139k
        if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && msg.charAt(selectorIndex)==u_equal) {
800
            // explicit-value plural selector: =double
801
2.89k
            index=skipDouble(index+1);
802
2.89k
            int32_t length=index-selectorIndex;
803
2.89k
            if(length==1) {
804
69
                setParseError(parseError, start);  // Bad plural/select pattern syntax.
805
69
                errorCode=U_PATTERN_SYNTAX_ERROR;
806
69
                return 0;
807
69
            }
808
2.82k
            if(length>Part::MAX_LENGTH) {
809
0
                setParseError(parseError, selectorIndex);  // Argument selector too long.
810
0
                errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
811
0
                return 0;
812
0
            }
813
2.82k
            addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode);
814
2.82k
            parseDouble(selectorIndex+1, index, false,
815
2.82k
                        parseError, errorCode);  // adds ARG_INT or ARG_DOUBLE
816
136k
        } else {
817
136k
            index=skipIdentifier(index);
818
136k
            int32_t length=index-selectorIndex;
819
136k
            if(length==0) {
820
28
                setParseError(parseError, start);  // Bad plural/select pattern syntax.
821
28
                errorCode=U_PATTERN_SYNTAX_ERROR;
822
28
                return 0;
823
28
            }
824
            // Note: The ':' in "offset:" is just beyond the skipIdentifier() range.
825
136k
            if( UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && length==6 && index<msg.length() &&
826
1.07k
                0==msg.compare(selectorIndex, 7, kOffsetColon, 0, 7)
827
136k
            ) {
828
                // plural offset, not a selector
829
0
                if(!isEmpty) {
830
                    // Plural argument 'offset:' (if present) must precede key-message pairs.
831
0
                    setParseError(parseError, start);
832
0
                    errorCode=U_PATTERN_SYNTAX_ERROR;
833
0
                    return 0;
834
0
                }
835
                // allow whitespace between offset: and its value
836
0
                int32_t valueIndex=skipWhiteSpace(index+1);  // The ':' is at index.
837
0
                index=skipDouble(valueIndex);
838
0
                if(index==valueIndex) {
839
0
                    setParseError(parseError, start);  // Missing value for plural 'offset:'.
840
0
                    errorCode=U_PATTERN_SYNTAX_ERROR;
841
0
                    return 0;
842
0
                }
843
0
                if((index-valueIndex)>Part::MAX_LENGTH) {
844
0
                    setParseError(parseError, valueIndex);  // Plural offset value too long.
845
0
                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
846
0
                    return 0;
847
0
                }
848
0
                parseDouble(valueIndex, index, false,
849
0
                            parseError, errorCode);  // adds ARG_INT or ARG_DOUBLE
850
0
                if(U_FAILURE(errorCode)) {
851
0
                    return 0;
852
0
                }
853
0
                isEmpty=false;
854
0
                continue;  // no message fragment after the offset
855
136k
            } else {
856
                // normal selector word
857
136k
                if(length>Part::MAX_LENGTH) {
858
8
                    setParseError(parseError, selectorIndex);  // Argument selector too long.
859
8
                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
860
8
                    return 0;
861
8
                }
862
136k
                addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode);
863
136k
                if(0==msg.compare(selectorIndex, length, kOther, 0, 5)) {
864
20.7k
                    hasOther=true;
865
20.7k
                }
866
136k
            }
867
136k
        }
868
139k
        if(U_FAILURE(errorCode)) {
869
173
            return 0;
870
173
        }
871
872
        // parse the message fragment following the selector
873
138k
        index=skipWhiteSpace(index);
874
138k
        if(index==msg.length() || msg.charAt(index)!=u_leftCurlyBrace) {
875
379
            setParseError(parseError, selectorIndex);  // No message fragment after plural/select selector.
876
379
            errorCode=U_PATTERN_SYNTAX_ERROR;
877
379
            return 0;
878
379
        }
879
138k
        index=parseMessage(index, 1, nestingLevel+1, argType, parseError, errorCode);
880
138k
        if(U_FAILURE(errorCode)) {
881
53.7k
            return 0;
882
53.7k
        }
883
84.8k
        isEmpty=false;
884
84.8k
    }
885
75.1k
}
886
887
int32_t
888
1.02M
MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit) {
889
    // If the identifier contains only ASCII digits, then it is an argument _number_
890
    // and must not have leading zeros (except "0" itself).
891
    // Otherwise it is an argument _name_.
892
1.02M
    if(start>=limit) {
893
2.13k
        return UMSGPAT_ARG_NAME_NOT_VALID;
894
2.13k
    }
895
1.02M
    int32_t number;
896
    // Defer numeric errors until we know there are only digits.
897
1.02M
    UBool badNumber;
898
1.02M
    char16_t c=s.charAt(start++);
899
1.02M
    if(c==0x30) {
900
625k
        if(start==limit) {
901
624k
            return 0;
902
624k
        } else {
903
1.38k
            number=0;
904
1.38k
            badNumber=true;  // leading zero
905
1.38k
        }
906
625k
    } else if(0x31<=c && c<=0x39) {
907
91.5k
        number=c-0x30;
908
91.5k
        badNumber=false;
909
304k
    } else {
910
304k
        return UMSGPAT_ARG_NAME_NOT_NUMBER;
911
304k
    }
912
140k
    while(start<limit) {
913
50.0k
        c=s.charAt(start++);
914
50.0k
        if(0x30<=c && c<=0x39) {
915
47.4k
            if(number>=INT32_MAX/10) {
916
1.37k
                badNumber=true;  // overflow
917
46.0k
            } else {
918
46.0k
                number=number*10+(c-0x30);
919
46.0k
            }
920
47.4k
        } else {
921
2.59k
            return UMSGPAT_ARG_NAME_NOT_NUMBER;
922
2.59k
        }
923
50.0k
    }
924
    // There are only ASCII digits.
925
90.3k
    if(badNumber) {
926
51
        return UMSGPAT_ARG_NAME_NOT_VALID;
927
90.3k
    } else {
928
90.3k
        return number;
929
90.3k
    }
930
90.3k
}
931
932
void
933
MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
934
493k
                            UParseError *parseError, UErrorCode &errorCode) {
935
493k
    if(U_FAILURE(errorCode)) {
936
0
        return;
937
0
    }
938
493k
    U_ASSERT(start<limit);
939
    // fake loop for easy exit and single throw statement
940
493k
    for(;;) { /*loop doesn't iterate*/
941
        // fast path for small integers and infinity
942
493k
        int32_t value=0;
943
493k
        int32_t isNegative=0;  // not boolean so that we can easily add it to value
944
493k
        int32_t index=start;
945
493k
        char16_t c=msg.charAt(index++);
946
493k
        if(c==u_minus) {
947
23.1k
            isNegative=1;
948
23.1k
            if(index==limit) {
949
113
                break;  // no number
950
113
            }
951
23.0k
            c=msg.charAt(index++);
952
470k
        } else if(c==u_plus) {
953
4.74k
            if(index==limit) {
954
22
                break;  // no number
955
22
            }
956
4.72k
            c=msg.charAt(index++);
957
4.72k
        }
958
493k
        if(c==0x221e) {  // infinity
959
685
            if(allowInfinity && index==limit) {
960
645
                double infinity=uprv_getInfinity();
961
645
                addArgDoublePart(
962
645
                    isNegative!=0 ? -infinity : infinity,
963
645
                    start, limit-start, errorCode);
964
645
                return;
965
645
            } else {
966
40
                break;
967
40
            }
968
685
        }
969
        // try to parse the number as a small integer but fall back to a double
970
568k
        while('0'<=c && c<='9') {
971
178k
            value=value*10+(c-'0');
972
178k
            if(value>(Part::MAX_VALUE+isNegative)) {
973
10.4k
                break;  // not a small-enough integer
974
10.4k
            }
975
167k
            if(index==limit) {
976
92.1k
                addPart(UMSGPAT_PART_TYPE_ARG_INT, start, limit-start,
977
92.1k
                        isNegative!=0 ? -value : value, errorCode);
978
92.1k
                return;
979
92.1k
            }
980
75.7k
            c=msg.charAt(index++);
981
75.7k
        }
982
        // Let Double.parseDouble() throw a NumberFormatException.
983
400k
        char numberChars[128];
984
400k
        int32_t capacity = static_cast<int32_t>(sizeof(numberChars));
985
400k
        int32_t length=limit-start;
986
400k
        if(length>=capacity) {
987
209
            break;  // number too long
988
209
        }
989
400k
        msg.extract(start, length, numberChars, capacity, US_INV);
990
400k
        if (static_cast<int32_t>(uprv_strlen(numberChars)) < length) {
991
41
            break;  // contains non-invariant character that was turned into NUL
992
41
        }
993
400k
        char *end;
994
400k
        double numericValue=uprv_strtod(numberChars, &end);
995
400k
        if(end!=(numberChars+length)) {
996
697
            break;  // parsing error
997
697
        }
998
399k
        addArgDoublePart(numericValue, start, length, errorCode);
999
399k
        return;
1000
400k
    }
1001
1.12k
    setParseError(parseError, start /*, limit*/);  // Bad syntax for numeric value.
1002
1.12k
    errorCode=U_PATTERN_SYNTAX_ERROR;
1003
1.12k
}
1004
1005
int32_t
1006
3.83M
MessagePattern::skipWhiteSpace(int32_t index) {
1007
3.83M
    const char16_t *s=msg.getBuffer();
1008
3.83M
    int32_t msgLength=msg.length();
1009
3.83M
    const char16_t *t=PatternProps::skipWhiteSpace(s+index, msgLength-index);
1010
3.83M
    return static_cast<int32_t>(t - s);
1011
3.83M
}
1012
1013
int32_t
1014
1.16M
MessagePattern::skipIdentifier(int32_t index) {
1015
1.16M
    const char16_t *s=msg.getBuffer();
1016
1.16M
    int32_t msgLength=msg.length();
1017
1.16M
    const char16_t *t=PatternProps::skipIdentifier(s+index, msgLength-index);
1018
1.16M
    return static_cast<int32_t>(t - s);
1019
1.16M
}
1020
1021
int32_t
1022
498k
MessagePattern::skipDouble(int32_t index) {
1023
498k
    int32_t msgLength=msg.length();
1024
4.12M
    while(index<msgLength) {
1025
4.12M
        char16_t c=msg.charAt(index);
1026
        // U+221E: Allow the infinity symbol, for ChoiceFormat patterns.
1027
4.12M
        if((c<0x30 && c!=u_plus && c!=u_minus && c!=u_dot) || (c>0x39 && c!=u_e && c!=u_E && c!=0x221e)) {
1028
497k
            break;
1029
497k
        }
1030
3.62M
        ++index;
1031
3.62M
    }
1032
498k
    return index;
1033
498k
}
1034
1035
UBool
1036
3.83M
MessagePattern::isArgTypeChar(UChar32 c) {
1037
3.83M
    return (u_a<=c && c<=u_z) || (u_A<=c && c<=u_Z);
1038
3.83M
}
1039
1040
UBool
1041
90.5k
MessagePattern::isChoice(int32_t index) {
1042
90.5k
    char16_t c;
1043
90.5k
    return
1044
90.5k
        ((c=msg.charAt(index++))==u_c || c==u_C) &&
1045
33.1k
        ((c=msg.charAt(index++))==u_h || c==u_H) &&
1046
32.8k
        ((c=msg.charAt(index++))==u_o || c==u_O) &&
1047
32.5k
        ((c=msg.charAt(index++))==u_i || c==u_I) &&
1048
26.5k
        ((c=msg.charAt(index++))==u_c || c==u_C) &&
1049
26.2k
        ((c=msg.charAt(index))==u_e || c==u_E);
1050
90.5k
}
1051
1052
UBool
1053
65.3k
MessagePattern::isPlural(int32_t index) {
1054
65.3k
    char16_t c;
1055
65.3k
    return
1056
65.3k
        ((c=msg.charAt(index++))==u_p || c==u_P) &&
1057
46.8k
        ((c=msg.charAt(index++))==u_l || c==u_L) &&
1058
46.5k
        ((c=msg.charAt(index++))==u_u || c==u_U) &&
1059
45.7k
        ((c=msg.charAt(index++))==u_r || c==u_R) &&
1060
45.5k
        ((c=msg.charAt(index++))==u_a || c==u_A) &&
1061
45.2k
        ((c=msg.charAt(index))==u_l || c==u_L);
1062
65.3k
}
1063
1064
UBool
1065
29.4k
MessagePattern::isSelect(int32_t index) {
1066
29.4k
    char16_t c;
1067
29.4k
    return
1068
29.4k
        ((c=msg.charAt(index++))==u_s || c==u_S) &&
1069
19.3k
        ((c=msg.charAt(index++))==u_e || c==u_E) &&
1070
19.1k
        ((c=msg.charAt(index++))==u_l || c==u_L) &&
1071
18.4k
        ((c=msg.charAt(index++))==u_e || c==u_E) &&
1072
18.0k
        ((c=msg.charAt(index++))==u_c || c==u_C) &&
1073
17.8k
        ((c=msg.charAt(index))==u_t || c==u_T);
1074
29.4k
}
1075
1076
UBool
1077
8.82k
MessagePattern::isOrdinal(int32_t index) {
1078
8.82k
    char16_t c;
1079
8.82k
    return
1080
8.82k
        ((c=msg.charAt(index++))==u_o || c==u_O) &&
1081
8.61k
        ((c=msg.charAt(index++))==u_r || c==u_R) &&
1082
8.29k
        ((c=msg.charAt(index++))==u_d || c==u_D) &&
1083
6.91k
        ((c=msg.charAt(index++))==u_i || c==u_I) &&
1084
6.69k
        ((c=msg.charAt(index++))==u_n || c==u_N) &&
1085
6.09k
        ((c=msg.charAt(index++))==u_a || c==u_A) &&
1086
4.71k
        ((c=msg.charAt(index))==u_l || c==u_L);
1087
8.82k
}
1088
1089
UBool
1090
28.8k
MessagePattern::inMessageFormatPattern(int32_t nestingLevel) {
1091
28.8k
    return nestingLevel>0 || partsList->a[0].type==UMSGPAT_PART_TYPE_MSG_START;
1092
28.8k
}
1093
1094
UBool
1095
915
MessagePattern::inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType) {
1096
915
    return
1097
915
        nestingLevel==1 &&
1098
758
        parentType==UMSGPAT_ARG_TYPE_CHOICE &&
1099
634
        partsList->a[0].type!=UMSGPAT_PART_TYPE_MSG_START;
1100
915
}
1101
1102
void
1103
MessagePattern::addPart(UMessagePatternPartType type, int32_t index, int32_t length,
1104
7.30M
                        int32_t value, UErrorCode &errorCode) {
1105
7.30M
    if(partsList->ensureCapacityForOneMore(partsLength, errorCode)) {
1106
7.22M
        Part &part=partsList->a[partsLength++];
1107
7.22M
        part.type=type;
1108
7.22M
        part.index=index;
1109
7.22M
        part.length = static_cast<uint16_t>(length);
1110
7.22M
        part.value = static_cast<int16_t>(value);
1111
7.22M
        part.limitPartIndex=0;
1112
7.22M
    }
1113
7.30M
}
1114
1115
void
1116
MessagePattern::addLimitPart(int32_t start,
1117
                             UMessagePatternPartType type, int32_t index, int32_t length,
1118
1.58M
                             int32_t value, UErrorCode &errorCode) {
1119
1.58M
    partsList->a[start].limitPartIndex=partsLength;
1120
1.58M
    addPart(type, index, length, value, errorCode);
1121
1.58M
}
1122
1123
void
1124
MessagePattern::addArgDoublePart(double numericValue, int32_t start, int32_t length,
1125
400k
                                 UErrorCode &errorCode) {
1126
400k
    if(U_FAILURE(errorCode)) {
1127
0
        return;
1128
0
    }
1129
400k
    int32_t numericIndex=numericValuesLength;
1130
400k
    if(numericValuesList==nullptr) {
1131
2.73k
        numericValuesList=new MessagePatternDoubleList();
1132
2.73k
        if(numericValuesList==nullptr) {
1133
0
            errorCode=U_MEMORY_ALLOCATION_ERROR;
1134
0
            return;
1135
0
        }
1136
397k
    } else if(!numericValuesList->ensureCapacityForOneMore(numericValuesLength, errorCode)) {
1137
0
        return;
1138
397k
    } else {
1139
397k
        if(numericIndex>Part::MAX_VALUE) {
1140
2
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1141
2
            return;
1142
2
        }
1143
397k
    }
1144
400k
    numericValuesList->a[numericValuesLength++]=numericValue;
1145
400k
    addPart(UMSGPAT_PART_TYPE_ARG_DOUBLE, start, length, numericIndex, errorCode);
1146
400k
}
1147
1148
void
1149
15.2k
MessagePattern::setParseError(UParseError *parseError, int32_t index) {
1150
15.2k
    if(parseError==nullptr) {
1151
9.33k
        return;
1152
9.33k
    }
1153
5.90k
    parseError->offset=index;
1154
1155
    // Set preContext to some of msg before index.
1156
    // Avoid splitting a surrogate pair.
1157
5.90k
    int32_t length=index;
1158
5.90k
    if(length>=U_PARSE_CONTEXT_LEN) {
1159
1.08k
        length=U_PARSE_CONTEXT_LEN-1;
1160
1.08k
        if(length>0 && U16_IS_TRAIL(msg[index-length])) {
1161
6
            --length;
1162
6
        }
1163
1.08k
    }
1164
5.90k
    msg.extract(index-length, length, parseError->preContext);
1165
5.90k
    parseError->preContext[length]=0;
1166
1167
    // Set postContext to some of msg starting at index.
1168
5.90k
    length=msg.length()-index;
1169
5.90k
    if(length>=U_PARSE_CONTEXT_LEN) {
1170
3.05k
        length=U_PARSE_CONTEXT_LEN-1;
1171
3.05k
        if(length>0 && U16_IS_LEAD(msg[index+length-1])) {
1172
20
            --length;
1173
20
        }
1174
3.05k
    }
1175
5.90k
    msg.extract(index, length, parseError->postContext);
1176
5.90k
    parseError->postContext[length]=0;
1177
5.90k
}
1178
1179
// MessageImpl ------------------------------------------------------------- ***
1180
1181
void
1182
MessageImpl::appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit,
1183
0
                                      UnicodeString &sb) {
1184
0
    int32_t doubleApos=-1;
1185
0
    for(;;) {
1186
0
        int32_t i=s.indexOf(u_apos, start);
1187
0
        if(i<0 || i>=limit) {
1188
0
            sb.append(s, start, limit-start);
1189
0
            break;
1190
0
        }
1191
0
        if(i==doubleApos) {
1192
            // Double apostrophe at start-1 and start==i, append one.
1193
0
            sb.append(u_apos);
1194
0
            ++start;
1195
0
            doubleApos=-1;
1196
0
        } else {
1197
            // Append text between apostrophes and skip this one.
1198
0
            sb.append(s, start, i-start);
1199
0
            doubleApos=start=i+1;
1200
0
        }
1201
0
    }
1202
0
}
1203
1204
// Ported from second half of ICU4J SelectFormat.format(String).
1205
UnicodeString &
1206
MessageImpl::appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern,
1207
                                               int32_t msgStart,
1208
0
                                               UnicodeString &result) {
1209
0
    const UnicodeString &msgString=msgPattern.getPatternString();
1210
0
    int32_t prevIndex=msgPattern.getPart(msgStart).getLimit();
1211
0
    for(int32_t i=msgStart;;) {
1212
0
        const MessagePattern::Part &part=msgPattern.getPart(++i);
1213
0
        UMessagePatternPartType type=part.getType();
1214
0
        int32_t index=part.getIndex();
1215
0
        if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
1216
0
            return result.append(msgString, prevIndex, index-prevIndex);
1217
0
        } else if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
1218
0
            result.append(msgString, prevIndex, index-prevIndex);
1219
0
            prevIndex=part.getLimit();
1220
0
        } else if(type==UMSGPAT_PART_TYPE_ARG_START) {
1221
0
            result.append(msgString, prevIndex, index-prevIndex);
1222
0
            prevIndex=index;
1223
0
            i=msgPattern.getLimitPartIndex(i);
1224
0
            index=msgPattern.getPart(i).getLimit();
1225
0
            appendReducedApostrophes(msgString, prevIndex, index, result);
1226
0
            prevIndex=index;
1227
0
        }
1228
0
    }
1229
0
}
1230
1231
U_NAMESPACE_END
1232
1233
#endif  // !UCONFIG_NO_FORMATTING