Coverage Report

Created: 2026-02-05 06:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/i18n/messageformat2_parser.h
Line
Count
Source
1
// © 2024 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
#include "unicode/utypes.h"
5
6
#ifndef U_HIDE_DEPRECATED_API
7
8
#ifndef MESSAGEFORMAT_PARSER_H
9
#define MESSAGEFORMAT_PARSER_H
10
11
#include "unicode/messageformat2_data_model.h"
12
#include "unicode/parseerr.h"
13
#include "unicode/uniset.h"
14
15
#include "messageformat2_allocation.h"
16
#include "messageformat2_errors.h"
17
18
#if U_SHOW_CPLUSPLUS_API
19
20
#if !UCONFIG_NO_NORMALIZATION
21
22
#if !UCONFIG_NO_FORMATTING
23
24
#if !UCONFIG_NO_MF2
25
26
U_NAMESPACE_BEGIN
27
28
namespace message2 {
29
30
    using namespace data_model;
31
32
    // Used for parameterizing options parsing code
33
    // over the two builders that use it (Operator and Markup)
34
    template <class T>
35
    class OptionAdder {
36
        private:
37
            T& builder;
38
        public:
39
43.3k
            OptionAdder(T& b) : builder(b) {}
icu_79::message2::OptionAdder<icu_79::message2::data_model::Operator::Builder>::OptionAdder(icu_79::message2::data_model::Operator::Builder&)
Line
Count
Source
39
36.5k
            OptionAdder(T& b) : builder(b) {}
icu_79::message2::OptionAdder<icu_79::message2::data_model::Markup::Builder>::OptionAdder(icu_79::message2::data_model::Markup::Builder&)
Line
Count
Source
39
6.76k
            OptionAdder(T& b) : builder(b) {}
40
5.48k
            void addOption(const UnicodeString& k, Operand&& r, UErrorCode& s) {
41
5.48k
                builder.addOption(k, std::move(r), s);
42
5.48k
            }
icu_79::message2::OptionAdder<icu_79::message2::data_model::Operator::Builder>::addOption(icu_79::UnicodeString const&, icu_79::message2::data_model::Operand&&, UErrorCode&)
Line
Count
Source
40
1.59k
            void addOption(const UnicodeString& k, Operand&& r, UErrorCode& s) {
41
1.59k
                builder.addOption(k, std::move(r), s);
42
1.59k
            }
icu_79::message2::OptionAdder<icu_79::message2::data_model::Markup::Builder>::addOption(icu_79::UnicodeString const&, icu_79::message2::data_model::Operand&&, UErrorCode&)
Line
Count
Source
40
3.88k
            void addOption(const UnicodeString& k, Operand&& r, UErrorCode& s) {
41
3.88k
                builder.addOption(k, std::move(r), s);
42
3.88k
            }
43
    };
44
45
    // Used for parameterizing attributes parsing code
46
    // over the two builders that use it (Expression and Markup)
47
    // Unfortunately the same OptionAdder class can't just be reused,
48
    // becaues duplicate options are forbidden while duplicate attributes are not
49
    template <class T>
50
    class AttributeAdder {
51
        private:
52
            T& builder;
53
        public:
54
53.0k
            AttributeAdder(T& b) : builder(b) {}
icu_79::message2::AttributeAdder<icu_79::message2::data_model::Expression::Builder>::AttributeAdder(icu_79::message2::data_model::Expression::Builder&)
Line
Count
Source
54
46.7k
            AttributeAdder(T& b) : builder(b) {}
icu_79::message2::AttributeAdder<icu_79::message2::data_model::Markup::Builder>::AttributeAdder(icu_79::message2::data_model::Markup::Builder&)
Line
Count
Source
54
6.32k
            AttributeAdder(T& b) : builder(b) {}
55
17.8k
            void addAttribute(const UnicodeString& k, Operand&& r, UErrorCode& s) {
56
17.8k
                builder.addAttribute(k, std::move(r), s);
57
17.8k
            }
icu_79::message2::AttributeAdder<icu_79::message2::data_model::Expression::Builder>::addAttribute(icu_79::UnicodeString const&, icu_79::message2::data_model::Operand&&, UErrorCode&)
Line
Count
Source
55
11.3k
            void addAttribute(const UnicodeString& k, Operand&& r, UErrorCode& s) {
56
11.3k
                builder.addAttribute(k, std::move(r), s);
57
11.3k
            }
icu_79::message2::AttributeAdder<icu_79::message2::data_model::Markup::Builder>::addAttribute(icu_79::UnicodeString const&, icu_79::message2::data_model::Operand&&, UErrorCode&)
Line
Count
Source
55
6.51k
            void addAttribute(const UnicodeString& k, Operand&& r, UErrorCode& s) {
56
6.51k
                builder.addAttribute(k, std::move(r), s);
57
6.51k
            }
58
    };
59
60
61
    // Initialization of UnicodeSets
62
    namespace unisets {
63
        enum Key {
64
            CONTENT,
65
            WHITESPACE,
66
            BIDI,
67
            ALPHA,
68
            DIGIT,
69
            NAME_START,
70
            NAME_CHAR,
71
            TEXT,
72
            QUOTED,
73
            ESCAPABLE,
74
            UNISETS_KEY_COUNT
75
        };
76
77
    U_I18N_API const UnicodeSet* get(Key key, UErrorCode& status);
78
    }
79
80
    // Parser class (private)
81
    class Parser : public UMemory {
82
    public:
83
  virtual ~Parser();
84
    private:
85
        friend class MessageFormatter;
86
87
        void parse(UParseError&, UErrorCode&);
88
89
  /*
90
    Use an internal "parse error" structure to make it easier to translate
91
    absolute offsets to line offsets.
92
    This is translated back to a `UParseError` at the end of parsing.
93
  */
94
  typedef struct MessageParseError {
95
      // The line on which the error occurred
96
      uint32_t line;
97
      // The offset, relative to the erroneous line, on which the error occurred
98
      uint32_t offset;
99
      // The total number of characters seen before advancing to the current line. It has a value of 0 if line == 0.
100
      // It includes newline characters, because the index does too.
101
      uint32_t lengthBeforeCurrentLine;
102
103
      // This parser doesn't yet use the last two fields.
104
      UChar   preContext[U_PARSE_CONTEXT_LEN];
105
      UChar   postContext[U_PARSE_CONTEXT_LEN];
106
  } MessageParseError;
107
108
  Parser(const UnicodeString &input,
109
               MFDataModel::Builder& dataModelBuilder,
110
               StaticErrors& e,
111
               UnicodeString& normalizedInputRef,
112
               UErrorCode& status)
113
7.31k
            : contentChars(unisets::get(unisets::CONTENT, status)),
114
7.31k
              whitespaceChars(unisets::get(unisets::WHITESPACE, status)),
115
7.31k
              bidiControlChars(unisets::get(unisets::BIDI, status)),
116
7.31k
              alphaChars(unisets::get(unisets::ALPHA, status)),
117
7.31k
              digitChars(unisets::get(unisets::DIGIT, status)),
118
7.31k
              nameStartChars(unisets::get(unisets::NAME_START, status)),
119
7.31k
              nameChars(unisets::get(unisets::NAME_CHAR, status)),
120
7.31k
              textChars(unisets::get(unisets::TEXT, status)),
121
7.31k
              quotedChars(unisets::get(unisets::QUOTED, status)),
122
7.31k
              escapableChars(unisets::get(unisets::ESCAPABLE, status)),
123
7.31k
            source(input), index(0), errors(e), normalizedInput(normalizedInputRef), dataModel(dataModelBuilder) {
124
7.31k
            (void) status;
125
7.31k
    parseError.line = 0;
126
7.31k
    parseError.offset = 0;
127
7.31k
    parseError.lengthBeforeCurrentLine = 0;
128
7.31k
    parseError.preContext[0] = '\0';
129
7.31k
    parseError.postContext[0] = '\0';
130
7.31k
  }
131
132
        bool isContentChar(UChar32) const;
133
        bool isBidiControl(UChar32) const;
134
        bool isWhitespace(UChar32) const;
135
        bool isTextChar(UChar32) const;
136
        bool isQuotedChar(UChar32) const;
137
        bool isEscapableChar(UChar32) const;
138
        bool isAlpha(UChar32) const;
139
        bool isDigit(UChar32) const;
140
        bool isNameStart(UChar32) const;
141
        bool isNameChar(UChar32) const;
142
        bool isUnquotedStart(UChar32) const;
143
        bool isLiteralStart(UChar32) const;
144
        bool isKeyStart(UChar32) const;
145
146
  static void translateParseError(const MessageParseError&, UParseError&);
147
  static void setParseError(MessageParseError&, uint32_t);
148
  void maybeAdvanceLine();
149
        Pattern parseSimpleMessage(UErrorCode&);
150
        void parseBody(UErrorCode&);
151
  void parseDeclarations(UErrorCode&);
152
        void parseUnsupportedStatement(UErrorCode&);
153
        void parseLocalDeclaration(UErrorCode&);
154
        void parseInputDeclaration(UErrorCode&);
155
        void parseSelectors(UErrorCode&);
156
        void parseVariant(UErrorCode&);
157
158
  void parseRequiredWS(UErrorCode&);
159
  void parseRequiredWhitespace(UErrorCode&);
160
  void parseOptionalBidi();
161
  void parseOptionalWhitespace();
162
  void parseToken(UChar32, UErrorCode&);
163
  void parseTokenWithWhitespace(UChar32, UErrorCode&);
164
  void parseToken(const std::u16string_view&, UErrorCode&);
165
  void parseTokenWithWhitespace(const std::u16string_view&, UErrorCode&);
166
        bool nextIs(const std::u16string_view&) const;
167
  UnicodeString parseNameChars(UnicodeString&, UErrorCode&);
168
  UnicodeString parseName(UErrorCode&);
169
        UnicodeString parseIdentifier(UErrorCode&);
170
        UnicodeString parseDigits(UErrorCode&);
171
  VariableName parseVariableName(UErrorCode&);
172
  FunctionName parseFunction(UErrorCode&);
173
  UnicodeString parseEscapeSequence(UErrorCode&);
174
  Literal parseUnquotedLiteral(UErrorCode&);
175
        Literal parseQuotedLiteral(UErrorCode&);
176
  Literal parseLiteral(UErrorCode&);
177
        template<class T>
178
        void parseAttribute(AttributeAdder<T>&, UErrorCode&);
179
        template<class T>
180
        void parseAttributes(AttributeAdder<T>&, UErrorCode&);
181
        template<class T>
182
        void parseOption(OptionAdder<T>&, UErrorCode&);
183
        template<class T>
184
        void parseOptions(OptionAdder<T>&, UErrorCode&);
185
  Operator parseAnnotation(UErrorCode&);
186
  void parseLiteralOrVariableWithAnnotation(bool, Expression::Builder&, UErrorCode&);
187
        Markup parseMarkup(UErrorCode&);
188
  Expression parseExpression(UErrorCode&);
189
        std::variant<Expression, Markup> parsePlaceholder(UErrorCode&);
190
  UnicodeString parseTextChar(UErrorCode&);
191
  Key parseKey(UErrorCode&);
192
  SelectorKeys parseNonEmptyKeys(UErrorCode&);
193
  void errorPattern(UErrorCode& status);
194
  Pattern parseQuotedPattern(UErrorCode&);
195
        bool isDeclarationStart();
196
197
215M
        UChar32 peek() const { return source.char32At(index) ; }
198
91.4k
        UChar32 peek(uint32_t i) const {
199
91.4k
            return source.char32At(source.moveIndex32(index, i));
200
91.4k
        }
201
49.0M
        void next() { index = source.moveIndex32(index, 1); }
202
203
101M
        bool inBounds() const { return (int32_t) index < source.length(); }
204
73.7k
        bool inBounds(uint32_t i) const { return source.moveIndex32(index, i) < source.length(); }
205
6.95k
        bool allConsumed() const { return (int32_t) index == source.length(); }
206
207
        // UnicodeSets for checking character ranges
208
        const UnicodeSet* contentChars;
209
        const UnicodeSet* whitespaceChars;
210
        const UnicodeSet* bidiControlChars;
211
        const UnicodeSet* alphaChars;
212
        const UnicodeSet* digitChars;
213
        const UnicodeSet* nameStartChars;
214
        const UnicodeSet* nameChars;
215
        const UnicodeSet* textChars;
216
        const UnicodeSet* quotedChars;
217
        const UnicodeSet* escapableChars;
218
219
  // The input string
220
  const UnicodeString &source;
221
  // The current position within the input string -- counting in UChar32
222
  uint32_t index;
223
  // Represents the current line (and when an error is indicated),
224
  // character offset within the line of the parse error
225
  MessageParseError parseError;
226
227
  // The structure to use for recording errors
228
  StaticErrors& errors;
229
230
  // Normalized version of the input string (optional whitespace removed)
231
  UnicodeString& normalizedInput;
232
233
  // The parent builder
234
  MFDataModel::Builder &dataModel;
235
236
    }; // class Parser
237
} // namespace message2
238
239
U_NAMESPACE_END
240
241
#endif /* #if !UCONFIG_NO_MF2 */
242
243
#endif /* #if !UCONFIG_NO_FORMATTING */
244
245
#endif /* #if !UCONFIG_NO_NORMALIZATION */
246
247
#endif /* U_SHOW_CPLUSPLUS_API */
248
249
#endif // MESSAGEFORMAT_PARSER_H
250
251
#endif // U_HIDE_DEPRECATED_API
252
// eof