Coverage Report

Created: 2025-09-05 07:16

/src/icu/icu4c/source/common/messagepattern.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*   Copyright (C) 2011-2012, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
*   file name:  messagepattern.cpp
9
*   encoding:   UTF-8
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
*   created on: 2011mar14
14
*   created by: Markus W. Scherer
15
*/
16
17
#include "unicode/utypes.h"
18
19
#if !UCONFIG_NO_FORMATTING
20
21
#include "unicode/messagepattern.h"
22
#include "unicode/unistr.h"
23
#include "unicode/utf16.h"
24
#include "cmemory.h"
25
#include "cstring.h"
26
#include "messageimpl.h"
27
#include "patternprops.h"
28
#include "putilimp.h"
29
#include "uassert.h"
30
31
U_NAMESPACE_BEGIN
32
33
// Unicode character/code point constants ---------------------------------- ***
34
35
static const char16_t u_pound=0x23;
36
static const char16_t u_apos=0x27;
37
static const char16_t u_plus=0x2B;
38
static const char16_t u_comma=0x2C;
39
static const char16_t u_minus=0x2D;
40
static const char16_t u_dot=0x2E;
41
static const char16_t u_colon=0x3A;
42
static const char16_t u_lessThan=0x3C;
43
static const char16_t u_equal=0x3D;
44
static const char16_t u_A=0x41;
45
static const char16_t u_C=0x43;
46
static const char16_t u_D=0x44;
47
static const char16_t u_E=0x45;
48
static const char16_t u_H=0x48;
49
static const char16_t u_I=0x49;
50
static const char16_t u_L=0x4C;
51
static const char16_t u_N=0x4E;
52
static const char16_t u_O=0x4F;
53
static const char16_t u_P=0x50;
54
static const char16_t u_R=0x52;
55
static const char16_t u_S=0x53;
56
static const char16_t u_T=0x54;
57
static const char16_t u_U=0x55;
58
static const char16_t u_Z=0x5A;
59
static const char16_t u_a=0x61;
60
static const char16_t u_c=0x63;
61
static const char16_t u_d=0x64;
62
static const char16_t u_e=0x65;
63
static const char16_t u_f=0x66;
64
static const char16_t u_h=0x68;
65
static const char16_t u_i=0x69;
66
static const char16_t u_l=0x6C;
67
static const char16_t u_n=0x6E;
68
static const char16_t u_o=0x6F;
69
static const char16_t u_p=0x70;
70
static const char16_t u_r=0x72;
71
static const char16_t u_s=0x73;
72
static const char16_t u_t=0x74;
73
static const char16_t u_u=0x75;
74
static const char16_t u_z=0x7A;
75
static const char16_t u_leftCurlyBrace=0x7B;
76
static const char16_t u_pipe=0x7C;
77
static const char16_t u_rightCurlyBrace=0x7D;
78
static const char16_t u_lessOrEqual=0x2264;  // U+2264 is <=
79
80
static const char16_t kOffsetColon[]={  // "offset:"
81
    u_o, u_f, u_f, u_s, u_e, u_t, u_colon
82
};
83
84
static const char16_t kOther[]={  // "other"
85
    u_o, u_t, u_h, u_e, u_r
86
};
87
88
// MessagePatternList ------------------------------------------------------ ***
89
90
template<typename T, int32_t stackCapacity>
91
class MessagePatternList : public UMemory {
92
public:
93
45.8k
    MessagePatternList() {}
icu_78::MessagePatternList<icu_78::MessagePattern::Part, 32>::MessagePatternList()
Line
Count
Source
93
43.4k
    MessagePatternList() {}
icu_78::MessagePatternList<double, 8>::MessagePatternList()
Line
Count
Source
93
2.39k
    MessagePatternList() {}
94
    void copyFrom(const MessagePatternList<T, stackCapacity> &other,
95
                  int32_t length,
96
                  UErrorCode &errorCode);
97
    UBool ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode);
98
0
    UBool equals(const MessagePatternList<T, stackCapacity> &other, int32_t length) const {
99
0
        for(int32_t i=0; i<length; ++i) {
100
0
            if(a[i]!=other.a[i]) { return false; }
101
0
        }
102
0
        return true;
103
0
    }
104
105
    MaybeStackArray<T, stackCapacity> a;
106
};
107
108
template<typename T, int32_t stackCapacity>
109
void
110
MessagePatternList<T, stackCapacity>::copyFrom(
111
        const MessagePatternList<T, stackCapacity> &other,
112
        int32_t length,
113
0
        UErrorCode &errorCode) {
114
0
    if(U_SUCCESS(errorCode) && length>0) {
115
0
        if(length>a.getCapacity() && nullptr==a.resize(length)) {
116
0
            errorCode=U_MEMORY_ALLOCATION_ERROR;
117
0
            return;
118
0
        }
119
0
        uprv_memcpy(a.getAlias(), other.a.getAlias(), (size_t)length*sizeof(T));
120
0
    }
121
0
}
Unexecuted instantiation: icu_78::MessagePatternList<icu_78::MessagePattern::Part, 32>::copyFrom(icu_78::MessagePatternList<icu_78::MessagePattern::Part, 32> const&, int, UErrorCode&)
Unexecuted instantiation: icu_78::MessagePatternList<double, 8>::copyFrom(icu_78::MessagePatternList<double, 8> const&, int, UErrorCode&)
122
123
template<typename T, int32_t stackCapacity>
124
UBool
125
7.13M
MessagePatternList<T, stackCapacity>::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) {
126
7.13M
    if(U_FAILURE(errorCode)) {
127
79.5k
        return false;
128
79.5k
    }
129
7.05M
    if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=nullptr) {
130
7.05M
        return true;
131
7.05M
    }
132
0
    errorCode=U_MEMORY_ALLOCATION_ERROR;
133
0
    return false;
134
7.05M
}
icu_78::MessagePatternList<icu_78::MessagePattern::Part, 32>::ensureCapacityForOneMore(int, UErrorCode&)
Line
Count
Source
125
6.64M
MessagePatternList<T, stackCapacity>::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) {
126
6.64M
    if(U_FAILURE(errorCode)) {
127
79.5k
        return false;
128
79.5k
    }
129
6.56M
    if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=nullptr) {
130
6.56M
        return true;
131
6.56M
    }
132
0
    errorCode=U_MEMORY_ALLOCATION_ERROR;
133
0
    return false;
134
6.56M
}
icu_78::MessagePatternList<double, 8>::ensureCapacityForOneMore(int, UErrorCode&)
Line
Count
Source
125
490k
MessagePatternList<T, stackCapacity>::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) {
126
490k
    if(U_FAILURE(errorCode)) {
127
0
        return false;
128
0
    }
129
490k
    if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=nullptr) {
130
490k
        return true;
131
490k
    }
132
0
    errorCode=U_MEMORY_ALLOCATION_ERROR;
133
0
    return false;
134
490k
}
135
136
// MessagePatternList specializations -------------------------------------- ***
137
138
class MessagePatternDoubleList : public MessagePatternList<double, 8> {
139
};
140
141
class MessagePatternPartsList : public MessagePatternList<MessagePattern::Part, 32> {
142
};
143
144
// MessagePattern constructors etc. ---------------------------------------- ***
145
146
MessagePattern::MessagePattern(UErrorCode &errorCode)
147
36.2k
        : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE),
148
36.2k
          partsList(nullptr), parts(nullptr), partsLength(0),
149
36.2k
          numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
150
36.2k
          hasArgNames(false), hasArgNumbers(false), needsAutoQuoting(false) {
151
36.2k
    init(errorCode);
152
36.2k
}
153
154
MessagePattern::MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode)
155
0
        : aposMode(mode),
156
0
          partsList(nullptr), parts(nullptr), partsLength(0),
157
0
          numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
158
0
          hasArgNames(false), hasArgNumbers(false), needsAutoQuoting(false) {
159
0
    init(errorCode);
160
0
}
161
162
MessagePattern::MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
163
7.19k
        : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE),
164
7.19k
          partsList(nullptr), parts(nullptr), partsLength(0),
165
7.19k
          numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
166
7.19k
          hasArgNames(false), hasArgNumbers(false), needsAutoQuoting(false) {
167
7.19k
    if(init(errorCode)) {
168
7.19k
        parse(pattern, parseError, errorCode);
169
7.19k
    }
170
7.19k
}
171
172
UBool
173
43.4k
MessagePattern::init(UErrorCode &errorCode) {
174
43.4k
    if(U_FAILURE(errorCode)) {
175
0
        return false;
176
0
    }
177
43.4k
    partsList=new MessagePatternPartsList();
178
43.4k
    if(partsList==nullptr) {
179
0
        errorCode=U_MEMORY_ALLOCATION_ERROR;
180
0
        return false;
181
0
    }
182
43.4k
    parts=partsList->a.getAlias();
183
43.4k
    return true;
184
43.4k
}
185
186
MessagePattern::MessagePattern(const MessagePattern &other)
187
0
        : UObject(other), aposMode(other.aposMode), msg(other.msg),
188
0
          partsList(nullptr), parts(nullptr), partsLength(0),
189
0
          numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
190
0
          hasArgNames(other.hasArgNames), hasArgNumbers(other.hasArgNumbers),
191
0
          needsAutoQuoting(other.needsAutoQuoting) {
192
0
    UErrorCode errorCode=U_ZERO_ERROR;
193
0
    if(!copyStorage(other, errorCode)) {
194
0
        clear();
195
0
    }
196
0
}
197
198
MessagePattern &
199
0
MessagePattern::operator=(const MessagePattern &other) {
200
0
    if(this==&other) {
201
0
        return *this;
202
0
    }
203
0
    aposMode=other.aposMode;
204
0
    msg=other.msg;
205
0
    hasArgNames=other.hasArgNames;
206
0
    hasArgNumbers=other.hasArgNumbers;
207
0
    needsAutoQuoting=other.needsAutoQuoting;
208
0
    UErrorCode errorCode=U_ZERO_ERROR;
209
0
    if(!copyStorage(other, errorCode)) {
210
0
        clear();
211
0
    }
212
0
    return *this;
213
0
}
214
215
UBool
216
0
MessagePattern::copyStorage(const MessagePattern &other, UErrorCode &errorCode) {
217
0
    if(U_FAILURE(errorCode)) {
218
0
        return false;
219
0
    }
220
0
    parts=nullptr;
221
0
    partsLength=0;
222
0
    numericValues=nullptr;
223
0
    numericValuesLength=0;
224
0
    if(partsList==nullptr) {
225
0
        partsList=new MessagePatternPartsList();
226
0
        if(partsList==nullptr) {
227
0
            errorCode=U_MEMORY_ALLOCATION_ERROR;
228
0
            return false;
229
0
        }
230
0
        parts=partsList->a.getAlias();
231
0
    }
232
0
    if(other.partsLength>0) {
233
0
        partsList->copyFrom(*other.partsList, other.partsLength, errorCode);
234
0
        if(U_FAILURE(errorCode)) {
235
0
            return false;
236
0
        }
237
0
        parts=partsList->a.getAlias();
238
0
        partsLength=other.partsLength;
239
0
    }
240
0
    if(other.numericValuesLength>0) {
241
0
        if(numericValuesList==nullptr) {
242
0
            numericValuesList=new MessagePatternDoubleList();
243
0
            if(numericValuesList==nullptr) {
244
0
                errorCode=U_MEMORY_ALLOCATION_ERROR;
245
0
                return false;
246
0
            }
247
0
            numericValues=numericValuesList->a.getAlias();
248
0
        }
249
0
        numericValuesList->copyFrom(
250
0
            *other.numericValuesList, other.numericValuesLength, errorCode);
251
0
        if(U_FAILURE(errorCode)) {
252
0
            return false;
253
0
        }
254
0
        numericValues=numericValuesList->a.getAlias();
255
0
        numericValuesLength=other.numericValuesLength;
256
0
    }
257
0
    return true;
258
0
}
259
260
43.4k
MessagePattern::~MessagePattern() {
261
43.4k
    delete partsList;
262
43.4k
    delete numericValuesList;
263
43.4k
}
264
265
// MessagePattern API ------------------------------------------------------ ***
266
267
MessagePattern &
268
14.3k
MessagePattern::parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) {
269
14.3k
    preParse(pattern, parseError, errorCode);
270
14.3k
    parseMessage(0, 0, 0, UMSGPAT_ARG_TYPE_NONE, parseError, errorCode);
271
14.3k
    postParse();
272
14.3k
    return *this;
273
14.3k
}
274
275
MessagePattern &
276
MessagePattern::parseChoiceStyle(const UnicodeString &pattern,
277
8.20k
                                 UParseError *parseError, UErrorCode &errorCode) {
278
8.20k
    preParse(pattern, parseError, errorCode);
279
8.20k
    parseChoiceStyle(0, 0, parseError, errorCode);
280
8.20k
    postParse();
281
8.20k
    return *this;
282
8.20k
}
283
284
MessagePattern &
285
MessagePattern::parsePluralStyle(const UnicodeString &pattern,
286
20.8k
                                 UParseError *parseError, UErrorCode &errorCode) {
287
20.8k
    preParse(pattern, parseError, errorCode);
288
20.8k
    parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_PLURAL, 0, 0, parseError, errorCode);
289
20.8k
    postParse();
290
20.8k
    return *this;
291
20.8k
}
292
293
MessagePattern &
294
MessagePattern::parseSelectStyle(const UnicodeString &pattern,
295
0
                                 UParseError *parseError, UErrorCode &errorCode) {
296
0
    preParse(pattern, parseError, errorCode);
297
0
    parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_SELECT, 0, 0, parseError, errorCode);
298
0
    postParse();
299
0
    return *this;
300
0
}
301
302
void
303
3.40k
MessagePattern::clear() {
304
    // Mostly the same as preParse().
305
3.40k
    msg.remove();
306
3.40k
    hasArgNames=hasArgNumbers=false;
307
3.40k
    needsAutoQuoting=false;
308
3.40k
    partsLength=0;
309
3.40k
    numericValuesLength=0;
310
3.40k
}
311
312
bool
313
0
MessagePattern::operator==(const MessagePattern &other) const {
314
0
    if(this==&other) {
315
0
        return true;
316
0
    }
317
0
    return
318
0
        aposMode==other.aposMode &&
319
0
        msg==other.msg &&
320
        // parts.equals(o.parts)
321
0
        partsLength==other.partsLength &&
322
0
        (partsLength==0 || partsList->equals(*other.partsList, partsLength));
323
    // No need to compare numericValues if msg and parts are the same.
324
0
}
325
326
int32_t
327
0
MessagePattern::hashCode() const {
328
0
    int32_t hash=(aposMode*37+msg.hashCode())*37+partsLength;
329
0
    for(int32_t i=0; i<partsLength; ++i) {
330
0
        hash=hash*37+parts[i].hashCode();
331
0
    }
332
0
    return hash;
333
0
}
334
335
int32_t
336
0
MessagePattern::validateArgumentName(const UnicodeString &name) {
337
0
    if(!PatternProps::isIdentifier(name.getBuffer(), name.length())) {
338
0
        return UMSGPAT_ARG_NAME_NOT_VALID;
339
0
    }
340
0
    return parseArgNumber(name, 0, name.length());
341
0
}
342
343
UnicodeString
344
0
MessagePattern::autoQuoteApostropheDeep() const {
345
0
    if(!needsAutoQuoting) {
346
0
        return msg;
347
0
    }
348
0
    UnicodeString modified(msg);
349
    // Iterate backward so that the insertion indexes do not change.
350
0
    int32_t count=countParts();
351
0
    for(int32_t i=count; i>0;) {
352
0
        const Part &part=getPart(--i);
353
0
        if(part.getType()==UMSGPAT_PART_TYPE_INSERT_CHAR) {
354
0
           modified.insert(part.index, static_cast<char16_t>(part.value));
355
0
        }
356
0
    }
357
0
    return modified;
358
0
}
359
360
double
361
6.16k
MessagePattern::getNumericValue(const Part &part) const {
362
6.16k
    UMessagePatternPartType type=part.type;
363
6.16k
    if(type==UMSGPAT_PART_TYPE_ARG_INT) {
364
5.56k
        return part.value;
365
5.56k
    } else if(type==UMSGPAT_PART_TYPE_ARG_DOUBLE) {
366
604
        return numericValues[part.value];
367
604
    } else {
368
0
        return UMSGPAT_NO_NUMERIC_VALUE;
369
0
    }
370
6.16k
}
371
372
/**
373
  * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
374
  * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
375
  * @return the "offset:" value.
376
  * @draft ICU 4.8
377
  */
378
double
379
20.8k
MessagePattern::getPluralOffset(int32_t pluralStart) const {
380
20.8k
    const Part &part=getPart(pluralStart);
381
20.8k
    if(Part::hasNumericValue(part.type)) {
382
0
        return getNumericValue(part);
383
20.8k
    } else {
384
20.8k
        return 0;
385
20.8k
    }
386
20.8k
}
387
388
// MessagePattern::Part ---------------------------------------------------- ***
389
390
bool
391
0
MessagePattern::Part::operator==(const Part &other) const {
392
0
    if(this==&other) {
393
0
        return true;
394
0
    }
395
0
    return
396
0
        type==other.type &&
397
0
        index==other.index &&
398
0
        length==other.length &&
399
0
        value==other.value &&
400
0
        limitPartIndex==other.limitPartIndex;
401
0
}
402
403
// MessagePattern parser --------------------------------------------------- ***
404
405
void
406
43.4k
MessagePattern::preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) {
407
43.4k
    if(U_FAILURE(errorCode)) {
408
0
        return;
409
0
    }
410
43.4k
    if(parseError!=nullptr) {
411
14.3k
        parseError->line=0;
412
14.3k
        parseError->offset=0;
413
14.3k
        parseError->preContext[0]=0;
414
14.3k
        parseError->postContext[0]=0;
415
14.3k
    }
416
43.4k
    msg=pattern;
417
43.4k
    hasArgNames=hasArgNumbers=false;
418
43.4k
    needsAutoQuoting=false;
419
43.4k
    partsLength=0;
420
43.4k
    numericValuesLength=0;
421
43.4k
}
422
423
void
424
43.4k
MessagePattern::postParse() {
425
43.4k
    if(partsList!=nullptr) {
426
43.4k
        parts=partsList->a.getAlias();
427
43.4k
    }
428
43.4k
    if(numericValuesList!=nullptr) {
429
2.39k
        numericValues=numericValuesList->a.getAlias();
430
2.39k
    }
431
43.4k
}
432
433
int32_t
434
MessagePattern::parseMessage(int32_t index, int32_t msgStartLength,
435
                             int32_t nestingLevel, UMessagePatternArgType parentType,
436
756k
                             UParseError *parseError, UErrorCode &errorCode) {
437
756k
    if(U_FAILURE(errorCode)) {
438
0
        return 0;
439
0
    }
440
756k
    if(nestingLevel>Part::MAX_NESTED_LEVELS) {
441
4
        errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
442
4
        return 0;
443
4
    }
444
756k
    int32_t msgStart=partsLength;
445
756k
    addPart(UMSGPAT_PART_TYPE_MSG_START, index, msgStartLength, nestingLevel, errorCode);
446
756k
    index+=msgStartLength;
447
106M
    for(;;) {  // while(index<msg.length()) with U_FAILURE(errorCode) check
448
106M
        if(U_FAILURE(errorCode)) {
449
83.7k
            return 0;
450
83.7k
        }
451
106M
        if(index>=msg.length()) {
452
9.05k
            break;
453
9.05k
        }
454
106M
        char16_t c=msg.charAt(index++);
455
106M
        if(c==u_apos) {
456
1.46M
            if(index==msg.length()) {
457
                // The apostrophe is the last character in the pattern. 
458
                // Add a Part for auto-quoting.
459
45
                addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
460
45
                        u_apos, errorCode);  // value=char to be inserted
461
45
                needsAutoQuoting=true;
462
1.46M
            } else {
463
1.46M
                c=msg.charAt(index);
464
1.46M
                if(c==u_apos) {
465
                    // double apostrophe, skip the second one
466
373k
                    addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode);
467
1.09M
                } else if(
468
1.09M
                    aposMode==UMSGPAT_APOS_DOUBLE_REQUIRED ||
469
1.09M
                    c==u_leftCurlyBrace || c==u_rightCurlyBrace ||
470
1.09M
                    (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe) ||
471
1.09M
                    (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound)
472
1.09M
                ) {
473
                    // skip the quote-starting apostrophe
474
99.6k
                    addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index-1, 1, 0, errorCode);
475
                    // find the end of the quoted literal text
476
227k
                    for(;;) {
477
227k
                        index=msg.indexOf(u_apos, index+1);
478
227k
                        if(index>=0) {
479
227k
                            if(/*(index+1)<msg.length() &&*/ msg.charAt(index+1)==u_apos) {
480
                                // double apostrophe inside quoted literal text
481
                                // still encodes a single apostrophe, skip the second one
482
127k
                                addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, ++index, 1, 0, errorCode);
483
127k
                            } else {
484
                                // skip the quote-ending apostrophe
485
99.4k
                                addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode);
486
99.4k
                                break;
487
99.4k
                            }
488
227k
                        } else {
489
                            // The quoted text reaches to the end of the of the message.
490
213
                            index=msg.length();
491
                            // Add a Part for auto-quoting.
492
213
                            addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
493
213
                                    u_apos, errorCode);  // value=char to be inserted
494
213
                            needsAutoQuoting=true;
495
213
                            break;
496
213
                        }
497
227k
                    }
498
993k
                } else {
499
                    // Interpret the apostrophe as literal text.
500
                    // Add a Part for auto-quoting.
501
993k
                    addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
502
993k
                            u_apos, errorCode);  // value=char to be inserted
503
993k
                    needsAutoQuoting=true;
504
993k
                }
505
1.46M
            }
506
104M
        } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) {
507
            // The unquoted # in a plural message fragment will be replaced
508
            // with the (number-offset).
509
5.45k
            addPart(UMSGPAT_PART_TYPE_REPLACE_NUMBER, index-1, 1, 0, errorCode);
510
104M
        } else if(c==u_leftCurlyBrace) {
511
625k
            index=parseArg(index-1, 1, nestingLevel, parseError, errorCode);
512
104M
        } else if((nestingLevel>0 && c==u_rightCurlyBrace) ||
513
104M
                  (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe)) {
514
            // Finish the message before the terminator.
515
            // In a choice style, report the "}" substring only for the following ARG_LIMIT,
516
            // not for this MSG_LIMIT.
517
663k
            int32_t limitLength=(parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_rightCurlyBrace) ? 0 : 1;
518
663k
            addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index-1, limitLength,
519
663k
                         nestingLevel, errorCode);
520
663k
            if(parentType==UMSGPAT_ARG_TYPE_CHOICE) {
521
                // Let the choice style parser see the '}' or '|'.
522
577k
                return index-1;
523
577k
            } else {
524
                // continue parsing after the '}'
525
85.8k
                return index;
526
85.8k
            }
527
663k
        }  // else: c is part of literal text
528
106M
    }
529
9.05k
    if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) {
530
372
        setParseError(parseError, 0);  // Unmatched '{' braces in message.
531
372
        errorCode=U_UNMATCHED_BRACES;
532
372
        return 0;
533
372
    }
534
8.68k
    addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index, 0, nestingLevel, errorCode);
535
8.68k
    return index;
536
9.05k
}
537
538
int32_t
539
MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
540
625k
                         UParseError *parseError, UErrorCode &errorCode) {
541
625k
    int32_t argStart=partsLength;
542
625k
    UMessagePatternArgType argType=UMSGPAT_ARG_TYPE_NONE;
543
625k
    addPart(UMSGPAT_PART_TYPE_ARG_START, index, argStartLength, argType, errorCode);
544
625k
    if(U_FAILURE(errorCode)) {
545
0
        return 0;
546
0
    }
547
625k
    int32_t nameIndex=index=skipWhiteSpace(index+argStartLength);
548
625k
    if(index==msg.length()) {
549
207
        setParseError(parseError, 0);  // Unmatched '{' braces in message.
550
207
        errorCode=U_UNMATCHED_BRACES;
551
207
        return 0;
552
207
    }
553
    // parse argument name or number
554
625k
    index=skipIdentifier(index);
555
625k
    int32_t number=parseArgNumber(nameIndex, index);
556
625k
    if(number>=0) {
557
329k
        int32_t length=index-nameIndex;
558
329k
        if(length>Part::MAX_LENGTH || number>Part::MAX_VALUE) {
559
92
            setParseError(parseError, nameIndex);  // Argument number too large.
560
92
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
561
92
            return 0;
562
92
        }
563
329k
        hasArgNumbers=true;
564
329k
        addPart(UMSGPAT_PART_TYPE_ARG_NUMBER, nameIndex, length, number, errorCode);
565
329k
    } else if(number==UMSGPAT_ARG_NAME_NOT_NUMBER) {
566
293k
        int32_t length=index-nameIndex;
567
293k
        if(length>Part::MAX_LENGTH) {
568
4
            setParseError(parseError, nameIndex);  // Argument name too long.
569
4
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
570
4
            return 0;
571
4
        }
572
293k
        hasArgNames=true;
573
293k
        addPart(UMSGPAT_PART_TYPE_ARG_NAME, nameIndex, length, 0, errorCode);
574
293k
    } else {  // number<-1 (ARG_NAME_NOT_VALID)
575
2.06k
        setParseError(parseError, nameIndex);  // Bad argument syntax.
576
2.06k
        errorCode=U_PATTERN_SYNTAX_ERROR;
577
2.06k
        return 0;
578
2.06k
    }
579
622k
    index=skipWhiteSpace(index);
580
622k
    if(index==msg.length()) {
581
775
        setParseError(parseError, 0);  // Unmatched '{' braces in message.
582
775
        errorCode=U_UNMATCHED_BRACES;
583
775
        return 0;
584
775
    }
585
622k
    char16_t c=msg.charAt(index);
586
622k
    if(c==u_rightCurlyBrace) {
587
        // all done
588
351k
    } else if(c!=u_comma) {
589
514
        setParseError(parseError, nameIndex);  // Bad argument syntax.
590
514
        errorCode=U_PATTERN_SYNTAX_ERROR;
591
514
        return 0;
592
269k
    } else /* ',' */ {
593
        // parse argument type: case-sensitive a-zA-Z
594
269k
        int32_t typeIndex=index=skipWhiteSpace(index+1);
595
3.17M
        while(index<msg.length() && isArgTypeChar(msg.charAt(index))) {
596
2.90M
            ++index;
597
2.90M
        }
598
269k
        int32_t length=index-typeIndex;
599
269k
        index=skipWhiteSpace(index);
600
269k
        if(index==msg.length()) {
601
116
            setParseError(parseError, 0);  // Unmatched '{' braces in message.
602
116
            errorCode=U_UNMATCHED_BRACES;
603
116
            return 0;
604
116
        }
605
269k
        if(length==0 || ((c=msg.charAt(index))!=u_comma && c!=u_rightCurlyBrace)) {
606
452
            setParseError(parseError, nameIndex);  // Bad argument syntax.
607
452
            errorCode=U_PATTERN_SYNTAX_ERROR;
608
452
            return 0;
609
452
        }
610
269k
        if(length>Part::MAX_LENGTH) {
611
2
            setParseError(parseError, nameIndex);  // Argument type name too long.
612
2
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
613
2
            return 0;
614
2
        }
615
269k
        argType=UMSGPAT_ARG_TYPE_SIMPLE;
616
269k
        if(length==6) {
617
            // case-insensitive comparisons for complex-type names
618
90.4k
            if(isChoice(typeIndex)) {
619
32.0k
                argType=UMSGPAT_ARG_TYPE_CHOICE;
620
58.3k
            } else if(isPlural(typeIndex)) {
621
45.8k
                argType=UMSGPAT_ARG_TYPE_PLURAL;
622
45.8k
            } else if(isSelect(typeIndex)) {
623
6.94k
                argType=UMSGPAT_ARG_TYPE_SELECT;
624
6.94k
            }
625
178k
        } else if(length==13) {
626
7.85k
            if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) {
627
1.60k
                argType=UMSGPAT_ARG_TYPE_SELECTORDINAL;
628
1.60k
            }
629
7.85k
        }
630
        // change the ARG_START type from NONE to argType
631
269k
        partsList->a[argStart].value = static_cast<int16_t>(argType);
632
269k
        if(argType==UMSGPAT_ARG_TYPE_SIMPLE) {
633
182k
            addPart(UMSGPAT_PART_TYPE_ARG_TYPE, typeIndex, length, 0, errorCode);
634
182k
        }
635
        // look for an argument style (pattern)
636
269k
        if(c==u_rightCurlyBrace) {
637
70.4k
            if(argType!=UMSGPAT_ARG_TYPE_SIMPLE) {
638
6
                setParseError(parseError, nameIndex);  // No style field for complex argument.
639
6
                errorCode=U_PATTERN_SYNTAX_ERROR;
640
6
                return 0;
641
6
            }
642
198k
        } else /* ',' */ {
643
198k
            ++index;
644
198k
            if(argType==UMSGPAT_ARG_TYPE_SIMPLE) {
645
112k
                index=parseSimpleStyle(index, parseError, errorCode);
646
112k
            } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) {
647
32.0k
                index=parseChoiceStyle(index, nestingLevel, parseError, errorCode);
648
54.4k
            } else {
649
54.4k
                index=parsePluralOrSelectStyle(argType, index, nestingLevel, parseError, errorCode);
650
54.4k
            }
651
198k
        }
652
269k
    }
653
    // Argument parsing stopped on the '}'.
654
621k
    addLimitPart(argStart, UMSGPAT_PART_TYPE_ARG_LIMIT, index, 1, argType, errorCode);
655
621k
    return index+1;
656
622k
}
657
658
int32_t
659
112k
MessagePattern::parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode) {
660
112k
    if(U_FAILURE(errorCode)) {
661
0
        return 0;
662
0
    }
663
112k
    int32_t start=index;
664
112k
    int32_t nestedBraces=0;
665
32.2M
    while(index<msg.length()) {
666
32.2M
        char16_t c=msg.charAt(index++);
667
32.2M
        if(c==u_apos) {
668
            // Treat apostrophe as quoting but include it in the style part.
669
            // Find the end of the quoted literal text.
670
101k
            index=msg.indexOf(u_apos, index);
671
101k
            if(index<0) {
672
                // Quoted literal argument style text reaches to the end of the message.
673
87
                setParseError(parseError, start);
674
87
                errorCode=U_PATTERN_SYNTAX_ERROR;
675
87
                return 0;
676
87
            }
677
            // skip the quote-ending apostrophe
678
101k
            ++index;
679
32.1M
        } else if(c==u_leftCurlyBrace) {
680
34.3k
            ++nestedBraces;
681
32.0M
        } else if(c==u_rightCurlyBrace) {
682
130k
            if(nestedBraces>0) {
683
18.5k
                --nestedBraces;
684
111k
            } else {
685
111k
                int32_t length=--index-start;
686
111k
                if(length>Part::MAX_LENGTH) {
687
7
                    setParseError(parseError, start);  // Argument style text too long.
688
7
                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
689
7
                    return 0;
690
7
                }
691
111k
                addPart(UMSGPAT_PART_TYPE_ARG_STYLE, start, length, 0, errorCode);
692
111k
                return index;
693
111k
            }
694
130k
        }  // c is part of literal text
695
32.2M
    }
696
551
    setParseError(parseError, 0);  // Unmatched '{' braces in message.
697
551
    errorCode=U_UNMATCHED_BRACES;
698
551
    return 0;
699
112k
}
700
701
int32_t
702
MessagePattern::parseChoiceStyle(int32_t index, int32_t nestingLevel,
703
40.2k
                                 UParseError *parseError, UErrorCode &errorCode) {
704
40.2k
    if(U_FAILURE(errorCode)) {
705
0
        return 0;
706
0
    }
707
40.2k
    int32_t start=index;
708
40.2k
    index=skipWhiteSpace(index);
709
40.2k
    if(index==msg.length() || msg.charAt(index)==u_rightCurlyBrace) {
710
194
        setParseError(parseError, 0);  // Missing choice argument pattern.
711
194
        errorCode=U_PATTERN_SYNTAX_ERROR;
712
194
        return 0;
713
194
    }
714
609k
    for(;;) {
715
        // The choice argument style contains |-separated (number, separator, message) triples.
716
        // Parse the number.
717
609k
        int32_t numberIndex=index;
718
609k
        index=skipDouble(index);
719
609k
        int32_t length=index-numberIndex;
720
609k
        if(length==0) {
721
4.27k
            setParseError(parseError, start);  // Bad choice pattern syntax.
722
4.27k
            errorCode=U_PATTERN_SYNTAX_ERROR;
723
4.27k
            return 0;
724
4.27k
        }
725
605k
        if(length>Part::MAX_LENGTH) {
726
3
            setParseError(parseError, numberIndex);  // Choice number too long.
727
3
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
728
3
            return 0;
729
3
        }
730
605k
        parseDouble(numberIndex, index, true, parseError, errorCode);  // adds ARG_INT or ARG_DOUBLE
731
605k
        if(U_FAILURE(errorCode)) {
732
754
            return 0;
733
754
        }
734
        // Parse the separator.
735
604k
        index=skipWhiteSpace(index);
736
604k
        if(index==msg.length()) {
737
1.07k
            setParseError(parseError, start);  // Bad choice pattern syntax.
738
1.07k
            errorCode=U_PATTERN_SYNTAX_ERROR;
739
1.07k
            return 0;
740
1.07k
        }
741
603k
        char16_t c=msg.charAt(index);
742
603k
        if(!(c==u_pound || c==u_lessThan || c==u_lessOrEqual)) {  // U+2264 is <=
743
1.64k
            setParseError(parseError, start);  // Expected choice separator (#<\u2264) instead of c.
744
1.64k
            errorCode=U_PATTERN_SYNTAX_ERROR;
745
1.64k
            return 0;
746
1.64k
        }
747
602k
        addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, index, 1, 0, errorCode);
748
        // Parse the message fragment.
749
602k
        index=parseMessage(++index, 0, nestingLevel+1, UMSGPAT_ARG_TYPE_CHOICE, parseError, errorCode);
750
602k
        if(U_FAILURE(errorCode)) {
751
24.1k
            return 0;
752
24.1k
        }
753
        // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length().
754
577k
        if(index==msg.length()) {
755
503
            return index;
756
503
        }
757
577k
        if(msg.charAt(index)==u_rightCurlyBrace) {
758
7.64k
            if(!inMessageFormatPattern(nestingLevel)) {
759
16
                setParseError(parseError, start);  // Bad choice pattern syntax.
760
16
                errorCode=U_PATTERN_SYNTAX_ERROR;
761
16
                return 0;
762
16
            }
763
7.62k
            return index;
764
7.64k
        }  // else the terminator is '|'
765
569k
        index=skipWhiteSpace(index+1);
766
569k
    }
767
40.0k
}
768
769
int32_t
770
MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType,
771
                                         int32_t index, int32_t nestingLevel,
772
75.2k
                                         UParseError *parseError, UErrorCode &errorCode) {
773
75.2k
    if(U_FAILURE(errorCode)) {
774
0
        return 0;
775
0
    }
776
75.2k
    int32_t start=index;
777
75.2k
    UBool isEmpty=true;
778
75.2k
    UBool hasOther=false;
779
161k
    for(;;) {
780
        // First, collect the selector looking for a small set of terminators.
781
        // It would be a little faster to consider the syntax of each possible
782
        // token right here, but that makes the code too complicated.
783
161k
        index=skipWhiteSpace(index);
784
161k
        UBool eos=index==msg.length();
785
161k
        if(eos || msg.charAt(index)==u_rightCurlyBrace) {
786
20.9k
            if(eos==inMessageFormatPattern(nestingLevel)) {
787
64
                setParseError(parseError, start);  // Bad plural/select pattern syntax.
788
64
                errorCode=U_PATTERN_SYNTAX_ERROR;
789
64
                return 0;
790
64
            }
791
20.8k
            if(!hasOther) {
792
4
                setParseError(parseError, 0);  // Missing 'other' keyword in plural/select pattern.
793
4
                errorCode=U_DEFAULT_KEYWORD_MISSING;
794
4
                return 0;
795
4
            }
796
20.8k
            return index;
797
20.8k
        }
798
140k
        int32_t selectorIndex=index;
799
140k
        if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && msg.charAt(selectorIndex)==u_equal) {
800
            // explicit-value plural selector: =double
801
4.89k
            index=skipDouble(index+1);
802
4.89k
            int32_t length=index-selectorIndex;
803
4.89k
            if(length==1) {
804
60
                setParseError(parseError, start);  // Bad plural/select pattern syntax.
805
60
                errorCode=U_PATTERN_SYNTAX_ERROR;
806
60
                return 0;
807
60
            }
808
4.83k
            if(length>Part::MAX_LENGTH) {
809
0
                setParseError(parseError, selectorIndex);  // Argument selector too long.
810
0
                errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
811
0
                return 0;
812
0
            }
813
4.83k
            addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode);
814
4.83k
            parseDouble(selectorIndex+1, index, false,
815
4.83k
                        parseError, errorCode);  // adds ARG_INT or ARG_DOUBLE
816
135k
        } else {
817
135k
            index=skipIdentifier(index);
818
135k
            int32_t length=index-selectorIndex;
819
135k
            if(length==0) {
820
30
                setParseError(parseError, start);  // Bad plural/select pattern syntax.
821
30
                errorCode=U_PATTERN_SYNTAX_ERROR;
822
30
                return 0;
823
30
            }
824
            // Note: The ':' in "offset:" is just beyond the skipIdentifier() range.
825
135k
            if( UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && length==6 && index<msg.length() &&
826
135k
                0==msg.compare(selectorIndex, 7, kOffsetColon, 0, 7)
827
135k
            ) {
828
                // plural offset, not a selector
829
0
                if(!isEmpty) {
830
                    // Plural argument 'offset:' (if present) must precede key-message pairs.
831
0
                    setParseError(parseError, start);
832
0
                    errorCode=U_PATTERN_SYNTAX_ERROR;
833
0
                    return 0;
834
0
                }
835
                // allow whitespace between offset: and its value
836
0
                int32_t valueIndex=skipWhiteSpace(index+1);  // The ':' is at index.
837
0
                index=skipDouble(valueIndex);
838
0
                if(index==valueIndex) {
839
0
                    setParseError(parseError, start);  // Missing value for plural 'offset:'.
840
0
                    errorCode=U_PATTERN_SYNTAX_ERROR;
841
0
                    return 0;
842
0
                }
843
0
                if((index-valueIndex)>Part::MAX_LENGTH) {
844
0
                    setParseError(parseError, valueIndex);  // Plural offset value too long.
845
0
                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
846
0
                    return 0;
847
0
                }
848
0
                parseDouble(valueIndex, index, false,
849
0
                            parseError, errorCode);  // adds ARG_INT or ARG_DOUBLE
850
0
                if(U_FAILURE(errorCode)) {
851
0
                    return 0;
852
0
                }
853
0
                isEmpty=false;
854
0
                continue;  // no message fragment after the offset
855
135k
            } else {
856
                // normal selector word
857
135k
                if(length>Part::MAX_LENGTH) {
858
18
                    setParseError(parseError, selectorIndex);  // Argument selector too long.
859
18
                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
860
18
                    return 0;
861
18
                }
862
135k
                addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode);
863
135k
                if(0==msg.compare(selectorIndex, length, kOther, 0, 5)) {
864
20.8k
                    hasOther=true;
865
20.8k
                }
866
135k
            }
867
135k
        }
868
140k
        if(U_FAILURE(errorCode)) {
869
146
            return 0;
870
146
        }
871
872
        // parse the message fragment following the selector
873
139k
        index=skipWhiteSpace(index);
874
139k
        if(index==msg.length() || msg.charAt(index)!=u_leftCurlyBrace) {
875
382
            setParseError(parseError, selectorIndex);  // No message fragment after plural/select selector.
876
382
            errorCode=U_PATTERN_SYNTAX_ERROR;
877
382
            return 0;
878
382
        }
879
139k
        index=parseMessage(index, 1, nestingLevel+1, argType, parseError, errorCode);
880
139k
        if(U_FAILURE(errorCode)) {
881
53.7k
            return 0;
882
53.7k
        }
883
85.8k
        isEmpty=false;
884
85.8k
    }
885
75.2k
}
886
887
int32_t
888
625k
MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit) {
889
    // If the identifier contains only ASCII digits, then it is an argument _number_
890
    // and must not have leading zeros (except "0" itself).
891
    // Otherwise it is an argument _name_.
892
625k
    if(start>=limit) {
893
2.00k
        return UMSGPAT_ARG_NAME_NOT_VALID;
894
2.00k
    }
895
623k
    int32_t number;
896
    // Defer numeric errors until we know there are only digits.
897
623k
    UBool badNumber;
898
623k
    char16_t c=s.charAt(start++);
899
623k
    if(c==0x30) {
900
186k
        if(start==limit) {
901
185k
            return 0;
902
185k
        } else {
903
872
            number=0;
904
872
            badNumber=true;  // leading zero
905
872
        }
906
436k
    } else if(0x31<=c && c<=0x39) {
907
143k
        number=c-0x30;
908
143k
        badNumber=false;
909
292k
    } else {
910
292k
        return UMSGPAT_ARG_NAME_NOT_NUMBER;
911
292k
    }
912
188k
    while(start<limit) {
913
45.0k
        c=s.charAt(start++);
914
45.0k
        if(0x30<=c && c<=0x39) {
915
43.6k
            if(number>=INT32_MAX/10) {
916
1.71k
                badNumber=true;  // overflow
917
1.71k
            }
918
43.6k
            number=number*10+(c-0x30);
919
43.6k
        } else {
920
1.42k
            return UMSGPAT_ARG_NAME_NOT_NUMBER;
921
1.42k
        }
922
45.0k
    }
923
    // There are only ASCII digits.
924
143k
    if(badNumber) {
925
54
        return UMSGPAT_ARG_NAME_NOT_VALID;
926
143k
    } else {
927
143k
        return number;
928
143k
    }
929
143k
}
930
931
void
932
MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
933
610k
                            UParseError *parseError, UErrorCode &errorCode) {
934
610k
    if(U_FAILURE(errorCode)) {
935
0
        return;
936
0
    }
937
610k
    U_ASSERT(start<limit);
938
    // fake loop for easy exit and single throw statement
939
610k
    for(;;) { /*loop doesn't iterate*/
940
        // fast path for small integers and infinity
941
610k
        int32_t value=0;
942
610k
        int32_t isNegative=0;  // not boolean so that we can easily add it to value
943
610k
        int32_t index=start;
944
610k
        char16_t c=msg.charAt(index++);
945
610k
        if(c==u_minus) {
946
10.2k
            isNegative=1;
947
10.2k
            if(index==limit) {
948
46
                break;  // no number
949
46
            }
950
10.2k
            c=msg.charAt(index++);
951
600k
        } else if(c==u_plus) {
952
4.70k
            if(index==limit) {
953
26
                break;  // no number
954
26
            }
955
4.68k
            c=msg.charAt(index++);
956
4.68k
        }
957
610k
        if(c==0x221e) {  // infinity
958
613
            if(allowInfinity && index==limit) {
959
583
                double infinity=uprv_getInfinity();
960
583
                addArgDoublePart(
961
583
                    isNegative!=0 ? -infinity : infinity,
962
583
                    start, limit-start, errorCode);
963
583
                return;
964
583
            } else {
965
30
                break;
966
30
            }
967
613
        }
968
        // try to parse the number as a small integer but fall back to a double
969
747k
        while('0'<=c && c<='9') {
970
277k
            value=value*10+(c-'0');
971
277k
            if(value>(Part::MAX_VALUE+isNegative)) {
972
23.1k
                break;  // not a small-enough integer
973
23.1k
            }
974
254k
            if(index==limit) {
975
116k
                addPart(UMSGPAT_PART_TYPE_ARG_INT, start, limit-start,
976
116k
                        isNegative!=0 ? -value : value, errorCode);
977
116k
                return;
978
116k
            }
979
137k
            c=msg.charAt(index++);
980
137k
        }
981
        // Let Double.parseDouble() throw a NumberFormatException.
982
492k
        char numberChars[128];
983
492k
        int32_t capacity = static_cast<int32_t>(sizeof(numberChars));
984
492k
        int32_t length=limit-start;
985
492k
        if(length>=capacity) {
986
117
            break;  // number too long
987
117
        }
988
492k
        msg.extract(start, length, numberChars, capacity, US_INV);
989
492k
        if (static_cast<int32_t>(uprv_strlen(numberChars)) < length) {
990
28
            break;  // contains non-invariant character that was turned into NUL
991
28
        }
992
492k
        char *end;
993
492k
        double numericValue=uprv_strtod(numberChars, &end);
994
492k
        if(end!=(numberChars+length)) {
995
651
            break;  // parsing error
996
651
        }
997
491k
        addArgDoublePart(numericValue, start, length, errorCode);
998
491k
        return;
999
492k
    }
1000
898
    setParseError(parseError, start /*, limit*/);  // Bad syntax for numeric value.
1001
898
    errorCode=U_PATTERN_SYNTAX_ERROR;
1002
898
}
1003
1004
int32_t
1005
3.30M
MessagePattern::skipWhiteSpace(int32_t index) {
1006
3.30M
    const char16_t *s=msg.getBuffer();
1007
3.30M
    int32_t msgLength=msg.length();
1008
3.30M
    const char16_t *t=PatternProps::skipWhiteSpace(s+index, msgLength-index);
1009
3.30M
    return static_cast<int32_t>(t - s);
1010
3.30M
}
1011
1012
int32_t
1013
760k
MessagePattern::skipIdentifier(int32_t index) {
1014
760k
    const char16_t *s=msg.getBuffer();
1015
760k
    int32_t msgLength=msg.length();
1016
760k
    const char16_t *t=PatternProps::skipIdentifier(s+index, msgLength-index);
1017
760k
    return static_cast<int32_t>(t - s);
1018
760k
}
1019
1020
int32_t
1021
614k
MessagePattern::skipDouble(int32_t index) {
1022
614k
    int32_t msgLength=msg.length();
1023
3.10M
    while(index<msgLength) {
1024
3.10M
        char16_t c=msg.charAt(index);
1025
        // U+221E: Allow the infinity symbol, for ChoiceFormat patterns.
1026
3.10M
        if((c<0x30 && c!=u_plus && c!=u_minus && c!=u_dot) || (c>0x39 && c!=u_e && c!=u_E && c!=0x221e)) {
1027
613k
            break;
1028
613k
        }
1029
2.49M
        ++index;
1030
2.49M
    }
1031
614k
    return index;
1032
614k
}
1033
1034
UBool
1035
3.17M
MessagePattern::isArgTypeChar(UChar32 c) {
1036
3.17M
    return (u_a<=c && c<=u_z) || (u_A<=c && c<=u_Z);
1037
3.17M
}
1038
1039
UBool
1040
90.4k
MessagePattern::isChoice(int32_t index) {
1041
90.4k
    char16_t c;
1042
90.4k
    return
1043
90.4k
        ((c=msg.charAt(index++))==u_c || c==u_C) &&
1044
90.4k
        ((c=msg.charAt(index++))==u_h || c==u_H) &&
1045
90.4k
        ((c=msg.charAt(index++))==u_o || c==u_O) &&
1046
90.4k
        ((c=msg.charAt(index++))==u_i || c==u_I) &&
1047
90.4k
        ((c=msg.charAt(index++))==u_c || c==u_C) &&
1048
90.4k
        ((c=msg.charAt(index))==u_e || c==u_E);
1049
90.4k
}
1050
1051
UBool
1052
58.3k
MessagePattern::isPlural(int32_t index) {
1053
58.3k
    char16_t c;
1054
58.3k
    return
1055
58.3k
        ((c=msg.charAt(index++))==u_p || c==u_P) &&
1056
58.3k
        ((c=msg.charAt(index++))==u_l || c==u_L) &&
1057
58.3k
        ((c=msg.charAt(index++))==u_u || c==u_U) &&
1058
58.3k
        ((c=msg.charAt(index++))==u_r || c==u_R) &&
1059
58.3k
        ((c=msg.charAt(index++))==u_a || c==u_A) &&
1060
58.3k
        ((c=msg.charAt(index))==u_l || c==u_L);
1061
58.3k
}
1062
1063
UBool
1064
20.3k
MessagePattern::isSelect(int32_t index) {
1065
20.3k
    char16_t c;
1066
20.3k
    return
1067
20.3k
        ((c=msg.charAt(index++))==u_s || c==u_S) &&
1068
20.3k
        ((c=msg.charAt(index++))==u_e || c==u_E) &&
1069
20.3k
        ((c=msg.charAt(index++))==u_l || c==u_L) &&
1070
20.3k
        ((c=msg.charAt(index++))==u_e || c==u_E) &&
1071
20.3k
        ((c=msg.charAt(index++))==u_c || c==u_C) &&
1072
20.3k
        ((c=msg.charAt(index))==u_t || c==u_T);
1073
20.3k
}
1074
1075
UBool
1076
7.63k
MessagePattern::isOrdinal(int32_t index) {
1077
7.63k
    char16_t c;
1078
7.63k
    return
1079
7.63k
        ((c=msg.charAt(index++))==u_o || c==u_O) &&
1080
7.63k
        ((c=msg.charAt(index++))==u_r || c==u_R) &&
1081
7.63k
        ((c=msg.charAt(index++))==u_d || c==u_D) &&
1082
7.63k
        ((c=msg.charAt(index++))==u_i || c==u_I) &&
1083
7.63k
        ((c=msg.charAt(index++))==u_n || c==u_N) &&
1084
7.63k
        ((c=msg.charAt(index++))==u_a || c==u_A) &&
1085
7.63k
        ((c=msg.charAt(index))==u_l || c==u_L);
1086
7.63k
}
1087
1088
UBool
1089
28.5k
MessagePattern::inMessageFormatPattern(int32_t nestingLevel) {
1090
28.5k
    return nestingLevel>0 || partsList->a[0].type==UMSGPAT_PART_TYPE_MSG_START;
1091
28.5k
}
1092
1093
UBool
1094
875
MessagePattern::inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType) {
1095
875
    return
1096
875
        nestingLevel==1 &&
1097
875
        parentType==UMSGPAT_ARG_TYPE_CHOICE &&
1098
875
        partsList->a[0].type!=UMSGPAT_PART_TYPE_MSG_START;
1099
875
}
1100
1101
void
1102
MessagePattern::addPart(UMessagePatternPartType type, int32_t index, int32_t length,
1103
6.64M
                        int32_t value, UErrorCode &errorCode) {
1104
6.64M
    if(partsList->ensureCapacityForOneMore(partsLength, errorCode)) {
1105
6.56M
        Part &part=partsList->a[partsLength++];
1106
6.56M
        part.type=type;
1107
6.56M
        part.index=index;
1108
6.56M
        part.length = static_cast<uint16_t>(length);
1109
6.56M
        part.value = static_cast<int16_t>(value);
1110
6.56M
        part.limitPartIndex=0;
1111
6.56M
    }
1112
6.64M
}
1113
1114
void
1115
MessagePattern::addLimitPart(int32_t start,
1116
                             UMessagePatternPartType type, int32_t index, int32_t length,
1117
1.29M
                             int32_t value, UErrorCode &errorCode) {
1118
1.29M
    partsList->a[start].limitPartIndex=partsLength;
1119
1.29M
    addPart(type, index, length, value, errorCode);
1120
1.29M
}
1121
1122
void
1123
MessagePattern::addArgDoublePart(double numericValue, int32_t start, int32_t length,
1124
492k
                                 UErrorCode &errorCode) {
1125
492k
    if(U_FAILURE(errorCode)) {
1126
0
        return;
1127
0
    }
1128
492k
    int32_t numericIndex=numericValuesLength;
1129
492k
    if(numericValuesList==nullptr) {
1130
2.39k
        numericValuesList=new MessagePatternDoubleList();
1131
2.39k
        if(numericValuesList==nullptr) {
1132
0
            errorCode=U_MEMORY_ALLOCATION_ERROR;
1133
0
            return;
1134
0
        }
1135
490k
    } else if(!numericValuesList->ensureCapacityForOneMore(numericValuesLength, errorCode)) {
1136
0
        return;
1137
490k
    } else {
1138
490k
        if(numericIndex>Part::MAX_VALUE) {
1139
2
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1140
2
            return;
1141
2
        }
1142
490k
    }
1143
492k
    numericValuesList->a[numericValuesLength++]=numericValue;
1144
492k
    addPart(UMSGPAT_PART_TYPE_ARG_DOUBLE, start, length, numericIndex, errorCode);
1145
492k
}
1146
1147
void
1148
13.8k
MessagePattern::setParseError(UParseError *parseError, int32_t index) {
1149
13.8k
    if(parseError==nullptr) {
1150
7.70k
        return;
1151
7.70k
    }
1152
6.19k
    parseError->offset=index;
1153
1154
    // Set preContext to some of msg before index.
1155
    // Avoid splitting a surrogate pair.
1156
6.19k
    int32_t length=index;
1157
6.19k
    if(length>=U_PARSE_CONTEXT_LEN) {
1158
1.30k
        length=U_PARSE_CONTEXT_LEN-1;
1159
1.30k
        if(length>0 && U16_IS_TRAIL(msg[index-length])) {
1160
6
            --length;
1161
6
        }
1162
1.30k
    }
1163
6.19k
    msg.extract(index-length, length, parseError->preContext);
1164
6.19k
    parseError->preContext[length]=0;
1165
1166
    // Set postContext to some of msg starting at index.
1167
6.19k
    length=msg.length()-index;
1168
6.19k
    if(length>=U_PARSE_CONTEXT_LEN) {
1169
3.39k
        length=U_PARSE_CONTEXT_LEN-1;
1170
3.39k
        if(length>0 && U16_IS_LEAD(msg[index+length-1])) {
1171
12
            --length;
1172
12
        }
1173
3.39k
    }
1174
6.19k
    msg.extract(index, length, parseError->postContext);
1175
6.19k
    parseError->postContext[length]=0;
1176
6.19k
}
1177
1178
// MessageImpl ------------------------------------------------------------- ***
1179
1180
void
1181
MessageImpl::appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit,
1182
0
                                      UnicodeString &sb) {
1183
0
    int32_t doubleApos=-1;
1184
0
    for(;;) {
1185
0
        int32_t i=s.indexOf(u_apos, start);
1186
0
        if(i<0 || i>=limit) {
1187
0
            sb.append(s, start, limit-start);
1188
0
            break;
1189
0
        }
1190
0
        if(i==doubleApos) {
1191
            // Double apostrophe at start-1 and start==i, append one.
1192
0
            sb.append(u_apos);
1193
0
            ++start;
1194
0
            doubleApos=-1;
1195
0
        } else {
1196
            // Append text between apostrophes and skip this one.
1197
0
            sb.append(s, start, i-start);
1198
0
            doubleApos=start=i+1;
1199
0
        }
1200
0
    }
1201
0
}
1202
1203
// Ported from second half of ICU4J SelectFormat.format(String).
1204
UnicodeString &
1205
MessageImpl::appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern,
1206
                                               int32_t msgStart,
1207
0
                                               UnicodeString &result) {
1208
0
    const UnicodeString &msgString=msgPattern.getPatternString();
1209
0
    int32_t prevIndex=msgPattern.getPart(msgStart).getLimit();
1210
0
    for(int32_t i=msgStart;;) {
1211
0
        const MessagePattern::Part &part=msgPattern.getPart(++i);
1212
0
        UMessagePatternPartType type=part.getType();
1213
0
        int32_t index=part.getIndex();
1214
0
        if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
1215
0
            return result.append(msgString, prevIndex, index-prevIndex);
1216
0
        } else if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
1217
0
            result.append(msgString, prevIndex, index-prevIndex);
1218
0
            prevIndex=part.getLimit();
1219
0
        } else if(type==UMSGPAT_PART_TYPE_ARG_START) {
1220
0
            result.append(msgString, prevIndex, index-prevIndex);
1221
0
            prevIndex=index;
1222
0
            i=msgPattern.getLimitPartIndex(i);
1223
0
            index=msgPattern.getPart(i).getLimit();
1224
0
            appendReducedApostrophes(msgString, prevIndex, index, result);
1225
0
            prevIndex=index;
1226
0
        }
1227
0
    }
1228
0
}
1229
1230
U_NAMESPACE_END
1231
1232
#endif  // !UCONFIG_NO_FORMATTING