/src/icu/icu4c/source/i18n/messageformat2_parser.h
Line | Count | Source |
1 | | // © 2024 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | |
4 | | #include "unicode/utypes.h" |
5 | | |
6 | | #ifndef U_HIDE_DEPRECATED_API |
7 | | |
8 | | #ifndef MESSAGEFORMAT_PARSER_H |
9 | | #define MESSAGEFORMAT_PARSER_H |
10 | | |
11 | | #include "unicode/messageformat2_data_model.h" |
12 | | #include "unicode/parseerr.h" |
13 | | #include "unicode/uniset.h" |
14 | | |
15 | | #include "messageformat2_allocation.h" |
16 | | #include "messageformat2_errors.h" |
17 | | |
18 | | #if U_SHOW_CPLUSPLUS_API |
19 | | |
20 | | #if !UCONFIG_NO_NORMALIZATION |
21 | | |
22 | | #if !UCONFIG_NO_FORMATTING |
23 | | |
24 | | #if !UCONFIG_NO_MF2 |
25 | | |
26 | | U_NAMESPACE_BEGIN |
27 | | |
28 | | namespace message2 { |
29 | | |
30 | | using namespace data_model; |
31 | | |
32 | | // Used for parameterizing options parsing code |
33 | | // over the two builders that use it (Operator and Markup) |
34 | | template <class T> |
35 | | class OptionAdder { |
36 | | private: |
37 | | T& builder; |
38 | | public: |
39 | 43.3k | OptionAdder(T& b) : builder(b) {}icu_79::message2::OptionAdder<icu_79::message2::data_model::Operator::Builder>::OptionAdder(icu_79::message2::data_model::Operator::Builder&) Line | Count | Source | 39 | 36.5k | OptionAdder(T& b) : builder(b) {} |
icu_79::message2::OptionAdder<icu_79::message2::data_model::Markup::Builder>::OptionAdder(icu_79::message2::data_model::Markup::Builder&) Line | Count | Source | 39 | 6.76k | OptionAdder(T& b) : builder(b) {} |
|
40 | 5.48k | void addOption(const UnicodeString& k, Operand&& r, UErrorCode& s) { |
41 | 5.48k | builder.addOption(k, std::move(r), s); |
42 | 5.48k | } icu_79::message2::OptionAdder<icu_79::message2::data_model::Operator::Builder>::addOption(icu_79::UnicodeString const&, icu_79::message2::data_model::Operand&&, UErrorCode&) Line | Count | Source | 40 | 1.59k | void addOption(const UnicodeString& k, Operand&& r, UErrorCode& s) { | 41 | 1.59k | builder.addOption(k, std::move(r), s); | 42 | 1.59k | } |
icu_79::message2::OptionAdder<icu_79::message2::data_model::Markup::Builder>::addOption(icu_79::UnicodeString const&, icu_79::message2::data_model::Operand&&, UErrorCode&) Line | Count | Source | 40 | 3.88k | void addOption(const UnicodeString& k, Operand&& r, UErrorCode& s) { | 41 | 3.88k | builder.addOption(k, std::move(r), s); | 42 | 3.88k | } |
|
43 | | }; |
44 | | |
45 | | // Used for parameterizing attributes parsing code |
46 | | // over the two builders that use it (Expression and Markup) |
47 | | // Unfortunately the same OptionAdder class can't just be reused, |
48 | | // becaues duplicate options are forbidden while duplicate attributes are not |
49 | | template <class T> |
50 | | class AttributeAdder { |
51 | | private: |
52 | | T& builder; |
53 | | public: |
54 | 53.0k | AttributeAdder(T& b) : builder(b) {}icu_79::message2::AttributeAdder<icu_79::message2::data_model::Expression::Builder>::AttributeAdder(icu_79::message2::data_model::Expression::Builder&) Line | Count | Source | 54 | 46.7k | AttributeAdder(T& b) : builder(b) {} |
icu_79::message2::AttributeAdder<icu_79::message2::data_model::Markup::Builder>::AttributeAdder(icu_79::message2::data_model::Markup::Builder&) Line | Count | Source | 54 | 6.32k | AttributeAdder(T& b) : builder(b) {} |
|
55 | 17.8k | void addAttribute(const UnicodeString& k, Operand&& r, UErrorCode& s) { |
56 | 17.8k | builder.addAttribute(k, std::move(r), s); |
57 | 17.8k | } icu_79::message2::AttributeAdder<icu_79::message2::data_model::Expression::Builder>::addAttribute(icu_79::UnicodeString const&, icu_79::message2::data_model::Operand&&, UErrorCode&) Line | Count | Source | 55 | 11.3k | void addAttribute(const UnicodeString& k, Operand&& r, UErrorCode& s) { | 56 | 11.3k | builder.addAttribute(k, std::move(r), s); | 57 | 11.3k | } |
icu_79::message2::AttributeAdder<icu_79::message2::data_model::Markup::Builder>::addAttribute(icu_79::UnicodeString const&, icu_79::message2::data_model::Operand&&, UErrorCode&) Line | Count | Source | 55 | 6.51k | void addAttribute(const UnicodeString& k, Operand&& r, UErrorCode& s) { | 56 | 6.51k | builder.addAttribute(k, std::move(r), s); | 57 | 6.51k | } |
|
58 | | }; |
59 | | |
60 | | |
61 | | // Initialization of UnicodeSets |
62 | | namespace unisets { |
63 | | enum Key { |
64 | | CONTENT, |
65 | | WHITESPACE, |
66 | | BIDI, |
67 | | ALPHA, |
68 | | DIGIT, |
69 | | NAME_START, |
70 | | NAME_CHAR, |
71 | | TEXT, |
72 | | QUOTED, |
73 | | ESCAPABLE, |
74 | | UNISETS_KEY_COUNT |
75 | | }; |
76 | | |
77 | | U_I18N_API const UnicodeSet* get(Key key, UErrorCode& status); |
78 | | } |
79 | | |
80 | | // Parser class (private) |
81 | | class Parser : public UMemory { |
82 | | public: |
83 | | virtual ~Parser(); |
84 | | private: |
85 | | friend class MessageFormatter; |
86 | | |
87 | | void parse(UParseError&, UErrorCode&); |
88 | | |
89 | | /* |
90 | | Use an internal "parse error" structure to make it easier to translate |
91 | | absolute offsets to line offsets. |
92 | | This is translated back to a `UParseError` at the end of parsing. |
93 | | */ |
94 | | typedef struct MessageParseError { |
95 | | // The line on which the error occurred |
96 | | uint32_t line; |
97 | | // The offset, relative to the erroneous line, on which the error occurred |
98 | | uint32_t offset; |
99 | | // The total number of characters seen before advancing to the current line. It has a value of 0 if line == 0. |
100 | | // It includes newline characters, because the index does too. |
101 | | uint32_t lengthBeforeCurrentLine; |
102 | | |
103 | | // This parser doesn't yet use the last two fields. |
104 | | UChar preContext[U_PARSE_CONTEXT_LEN]; |
105 | | UChar postContext[U_PARSE_CONTEXT_LEN]; |
106 | | } MessageParseError; |
107 | | |
108 | | Parser(const UnicodeString &input, |
109 | | MFDataModel::Builder& dataModelBuilder, |
110 | | StaticErrors& e, |
111 | | UnicodeString& normalizedInputRef, |
112 | | UErrorCode& status) |
113 | 7.31k | : contentChars(unisets::get(unisets::CONTENT, status)), |
114 | 7.31k | whitespaceChars(unisets::get(unisets::WHITESPACE, status)), |
115 | 7.31k | bidiControlChars(unisets::get(unisets::BIDI, status)), |
116 | 7.31k | alphaChars(unisets::get(unisets::ALPHA, status)), |
117 | 7.31k | digitChars(unisets::get(unisets::DIGIT, status)), |
118 | 7.31k | nameStartChars(unisets::get(unisets::NAME_START, status)), |
119 | 7.31k | nameChars(unisets::get(unisets::NAME_CHAR, status)), |
120 | 7.31k | textChars(unisets::get(unisets::TEXT, status)), |
121 | 7.31k | quotedChars(unisets::get(unisets::QUOTED, status)), |
122 | 7.31k | escapableChars(unisets::get(unisets::ESCAPABLE, status)), |
123 | 7.31k | source(input), index(0), errors(e), normalizedInput(normalizedInputRef), dataModel(dataModelBuilder) { |
124 | 7.31k | (void) status; |
125 | 7.31k | parseError.line = 0; |
126 | 7.31k | parseError.offset = 0; |
127 | 7.31k | parseError.lengthBeforeCurrentLine = 0; |
128 | 7.31k | parseError.preContext[0] = '\0'; |
129 | 7.31k | parseError.postContext[0] = '\0'; |
130 | 7.31k | } |
131 | | |
132 | | bool isContentChar(UChar32) const; |
133 | | bool isBidiControl(UChar32) const; |
134 | | bool isWhitespace(UChar32) const; |
135 | | bool isTextChar(UChar32) const; |
136 | | bool isQuotedChar(UChar32) const; |
137 | | bool isEscapableChar(UChar32) const; |
138 | | bool isAlpha(UChar32) const; |
139 | | bool isDigit(UChar32) const; |
140 | | bool isNameStart(UChar32) const; |
141 | | bool isNameChar(UChar32) const; |
142 | | bool isUnquotedStart(UChar32) const; |
143 | | bool isLiteralStart(UChar32) const; |
144 | | bool isKeyStart(UChar32) const; |
145 | | |
146 | | static void translateParseError(const MessageParseError&, UParseError&); |
147 | | static void setParseError(MessageParseError&, uint32_t); |
148 | | void maybeAdvanceLine(); |
149 | | Pattern parseSimpleMessage(UErrorCode&); |
150 | | void parseBody(UErrorCode&); |
151 | | void parseDeclarations(UErrorCode&); |
152 | | void parseUnsupportedStatement(UErrorCode&); |
153 | | void parseLocalDeclaration(UErrorCode&); |
154 | | void parseInputDeclaration(UErrorCode&); |
155 | | void parseSelectors(UErrorCode&); |
156 | | void parseVariant(UErrorCode&); |
157 | | |
158 | | void parseRequiredWS(UErrorCode&); |
159 | | void parseRequiredWhitespace(UErrorCode&); |
160 | | void parseOptionalBidi(); |
161 | | void parseOptionalWhitespace(); |
162 | | void parseToken(UChar32, UErrorCode&); |
163 | | void parseTokenWithWhitespace(UChar32, UErrorCode&); |
164 | | void parseToken(const std::u16string_view&, UErrorCode&); |
165 | | void parseTokenWithWhitespace(const std::u16string_view&, UErrorCode&); |
166 | | bool nextIs(const std::u16string_view&) const; |
167 | | UnicodeString parseNameChars(UnicodeString&, UErrorCode&); |
168 | | UnicodeString parseName(UErrorCode&); |
169 | | UnicodeString parseIdentifier(UErrorCode&); |
170 | | UnicodeString parseDigits(UErrorCode&); |
171 | | VariableName parseVariableName(UErrorCode&); |
172 | | FunctionName parseFunction(UErrorCode&); |
173 | | UnicodeString parseEscapeSequence(UErrorCode&); |
174 | | Literal parseUnquotedLiteral(UErrorCode&); |
175 | | Literal parseQuotedLiteral(UErrorCode&); |
176 | | Literal parseLiteral(UErrorCode&); |
177 | | template<class T> |
178 | | void parseAttribute(AttributeAdder<T>&, UErrorCode&); |
179 | | template<class T> |
180 | | void parseAttributes(AttributeAdder<T>&, UErrorCode&); |
181 | | template<class T> |
182 | | void parseOption(OptionAdder<T>&, UErrorCode&); |
183 | | template<class T> |
184 | | void parseOptions(OptionAdder<T>&, UErrorCode&); |
185 | | Operator parseAnnotation(UErrorCode&); |
186 | | void parseLiteralOrVariableWithAnnotation(bool, Expression::Builder&, UErrorCode&); |
187 | | Markup parseMarkup(UErrorCode&); |
188 | | Expression parseExpression(UErrorCode&); |
189 | | std::variant<Expression, Markup> parsePlaceholder(UErrorCode&); |
190 | | UnicodeString parseTextChar(UErrorCode&); |
191 | | Key parseKey(UErrorCode&); |
192 | | SelectorKeys parseNonEmptyKeys(UErrorCode&); |
193 | | void errorPattern(UErrorCode& status); |
194 | | Pattern parseQuotedPattern(UErrorCode&); |
195 | | bool isDeclarationStart(); |
196 | | |
197 | 215M | UChar32 peek() const { return source.char32At(index) ; } |
198 | 91.4k | UChar32 peek(uint32_t i) const { |
199 | 91.4k | return source.char32At(source.moveIndex32(index, i)); |
200 | 91.4k | } |
201 | 49.0M | void next() { index = source.moveIndex32(index, 1); } |
202 | | |
203 | 101M | bool inBounds() const { return (int32_t) index < source.length(); } |
204 | 73.7k | bool inBounds(uint32_t i) const { return source.moveIndex32(index, i) < source.length(); } |
205 | 6.95k | bool allConsumed() const { return (int32_t) index == source.length(); } |
206 | | |
207 | | // UnicodeSets for checking character ranges |
208 | | const UnicodeSet* contentChars; |
209 | | const UnicodeSet* whitespaceChars; |
210 | | const UnicodeSet* bidiControlChars; |
211 | | const UnicodeSet* alphaChars; |
212 | | const UnicodeSet* digitChars; |
213 | | const UnicodeSet* nameStartChars; |
214 | | const UnicodeSet* nameChars; |
215 | | const UnicodeSet* textChars; |
216 | | const UnicodeSet* quotedChars; |
217 | | const UnicodeSet* escapableChars; |
218 | | |
219 | | // The input string |
220 | | const UnicodeString &source; |
221 | | // The current position within the input string -- counting in UChar32 |
222 | | uint32_t index; |
223 | | // Represents the current line (and when an error is indicated), |
224 | | // character offset within the line of the parse error |
225 | | MessageParseError parseError; |
226 | | |
227 | | // The structure to use for recording errors |
228 | | StaticErrors& errors; |
229 | | |
230 | | // Normalized version of the input string (optional whitespace removed) |
231 | | UnicodeString& normalizedInput; |
232 | | |
233 | | // The parent builder |
234 | | MFDataModel::Builder &dataModel; |
235 | | |
236 | | }; // class Parser |
237 | | } // namespace message2 |
238 | | |
239 | | U_NAMESPACE_END |
240 | | |
241 | | #endif /* #if !UCONFIG_NO_MF2 */ |
242 | | |
243 | | #endif /* #if !UCONFIG_NO_FORMATTING */ |
244 | | |
245 | | #endif /* #if !UCONFIG_NO_NORMALIZATION */ |
246 | | |
247 | | #endif /* U_SHOW_CPLUSPLUS_API */ |
248 | | |
249 | | #endif // MESSAGEFORMAT_PARSER_H |
250 | | |
251 | | #endif // U_HIDE_DEPRECATED_API |
252 | | // eof |