/src/icu/icu4c/source/i18n/messageformat2_serializer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2024 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | |
4 | | #include "unicode/utypes.h" |
5 | | |
6 | | #if !UCONFIG_NO_NORMALIZATION |
7 | | |
8 | | #if !UCONFIG_NO_FORMATTING |
9 | | |
10 | | #if !UCONFIG_NO_MF2 |
11 | | |
12 | | #include "unicode/messageformat2_data_model.h" |
13 | | #include "messageformat2_macros.h" |
14 | | #include "messageformat2_serializer.h" |
15 | | #include "uvector.h" // U_ASSERT |
16 | | |
17 | | U_NAMESPACE_BEGIN |
18 | | |
19 | | namespace message2 { |
20 | | |
21 | | // Generates a string representation of a data model |
22 | | // ------------------------------------------------ |
23 | | |
24 | | using namespace data_model; |
25 | | |
26 | | // Private helper methods |
27 | | |
28 | 0 | void Serializer::whitespace() { |
29 | 0 | result += SPACE; |
30 | 0 | } |
31 | | |
32 | 0 | void Serializer::emit(UChar32 c) { |
33 | 0 | result += c; |
34 | 0 | } |
35 | | |
36 | 0 | void Serializer::emit(const UnicodeString& s) { |
37 | 0 | result += s; |
38 | 0 | } |
39 | | |
40 | 0 | void Serializer::emit(const std::u16string_view& token) { |
41 | 0 | result.append(token); |
42 | 0 | } |
43 | | |
44 | 0 | void Serializer::emit(const Literal& l) { |
45 | 0 | if (l.isQuoted()) { |
46 | 0 | emit(PIPE); |
47 | 0 | } |
48 | 0 | const UnicodeString& contents = l.unquoted(); |
49 | 0 | for (int32_t i = 0; ((int32_t) i) < contents.length(); i++) { |
50 | | // Re-escape any escaped-char characters |
51 | 0 | switch(contents[i]) { |
52 | 0 | case BACKSLASH: |
53 | 0 | case PIPE: |
54 | 0 | case LEFT_CURLY_BRACE: |
55 | 0 | case RIGHT_CURLY_BRACE: { |
56 | 0 | emit(BACKSLASH); |
57 | 0 | break; |
58 | 0 | } |
59 | 0 | default: { |
60 | 0 | break; |
61 | 0 | } |
62 | 0 | } |
63 | 0 | emit(contents[i]); |
64 | 0 | } |
65 | 0 | if (l.isQuoted()) { |
66 | 0 | emit(PIPE); |
67 | 0 | } |
68 | 0 | } |
69 | | |
70 | 0 | void Serializer::emit(const Key& k) { |
71 | 0 | if (k.isWildcard()) { |
72 | 0 | emit(ASTERISK); |
73 | 0 | return; |
74 | 0 | } |
75 | 0 | emit(k.asLiteral()); |
76 | 0 | } |
77 | | |
78 | 0 | void Serializer::emit(const SelectorKeys& k) { |
79 | 0 | const Key* ks = k.getKeysInternal(); |
80 | 0 | int32_t len = k.len; |
81 | | // It would be an error for `keys` to be empty; |
82 | | // that would mean this is the single `pattern` |
83 | | // variant, and in that case, this method shouldn't be called |
84 | 0 | U_ASSERT(len > 0); |
85 | 0 | for (int32_t i = 0; i < len; i++) { |
86 | 0 | if (i != 0) { |
87 | 0 | whitespace(); |
88 | 0 | } |
89 | 0 | emit(ks[i]); |
90 | 0 | } |
91 | 0 | } |
92 | | |
93 | 0 | void Serializer::emit(const Operand& rand) { |
94 | 0 | U_ASSERT(!rand.isNull()); |
95 | |
|
96 | 0 | if (rand.isVariable()) { |
97 | 0 | emit(DOLLAR); |
98 | 0 | emit(rand.asVariable()); |
99 | 0 | } else { |
100 | | // Literal: quoted or unquoted |
101 | 0 | emit(rand.asLiteral()); |
102 | 0 | } |
103 | 0 | } |
104 | | |
105 | 0 | void Serializer::emit(const OptionMap& options) { |
106 | | // Errors should have been checked before this point |
107 | 0 | UErrorCode localStatus = U_ZERO_ERROR; |
108 | 0 | U_ASSERT(!options.bogus); |
109 | 0 | for (int32_t i = 0; i < options.size(); i++) { |
110 | 0 | const Option& opt = options.getOption(i, localStatus); |
111 | | // No need to check error code, since we already checked |
112 | | // that !bogus |
113 | 0 | whitespace(); |
114 | 0 | emit(opt.getName()); |
115 | 0 | emit(EQUALS); |
116 | 0 | emit(opt.getValue()); |
117 | 0 | } |
118 | 0 | } |
119 | | |
120 | 0 | void Serializer::emitAttributes(const OptionMap& attributes) { |
121 | | // Errors should have been checked before this point |
122 | 0 | UErrorCode localStatus = U_ZERO_ERROR; |
123 | 0 | U_ASSERT(!attributes.bogus); |
124 | 0 | for (int32_t i = 0; i < attributes.size(); i++) { |
125 | 0 | const Option& attr = attributes.getOption(i, localStatus); |
126 | | // No need to check error code, since we already checked |
127 | | // that !bogus |
128 | 0 | whitespace(); |
129 | 0 | emit(AT); |
130 | 0 | emit(attr.getName()); |
131 | 0 | const Operand& v = attr.getValue(); |
132 | 0 | if (!v.isNull()) { |
133 | 0 | emit(EQUALS); |
134 | 0 | emit(v); |
135 | 0 | } |
136 | 0 | } |
137 | 0 | } |
138 | | |
139 | 0 | void Serializer::emit(const Expression& expr) { |
140 | 0 | emit(LEFT_CURLY_BRACE); |
141 | |
|
142 | 0 | if (!expr.isFunctionCall()) { |
143 | | // Literal or variable, no annotation |
144 | 0 | emit(expr.getOperand()); |
145 | 0 | } else { |
146 | | // Function call or reserved |
147 | 0 | if (!expr.isStandaloneAnnotation()) { |
148 | | // Must be a function call that has an operand |
149 | 0 | emit(expr.getOperand()); |
150 | 0 | whitespace(); |
151 | 0 | } |
152 | 0 | UErrorCode localStatus = U_ZERO_ERROR; |
153 | 0 | const Operator* rator = expr.getOperator(localStatus); |
154 | 0 | U_ASSERT(U_SUCCESS(localStatus)); |
155 | 0 | emit(COLON); |
156 | 0 | emit(rator->getFunctionName()); |
157 | | // No whitespace after function name, in case it has |
158 | | // no options. (when there are options, emit(OptionMap) will |
159 | | // emit the leading whitespace) |
160 | 0 | emit(rator->getOptionsInternal()); |
161 | 0 | } |
162 | 0 | emitAttributes(expr.getAttributesInternal()); |
163 | 0 | emit(RIGHT_CURLY_BRACE); |
164 | 0 | } |
165 | | |
166 | 0 | void Serializer::emit(const PatternPart& part) { |
167 | 0 | if (part.isText()) { |
168 | | // Raw text |
169 | 0 | const UnicodeString& text = part.asText(); |
170 | | // Re-escape '{'/'}'/'\''|' |
171 | 0 | for (int32_t i = 0; ((int32_t) i) < text.length(); i++) { |
172 | 0 | switch(text[i]) { |
173 | 0 | case PIPE: |
174 | 0 | case BACKSLASH: |
175 | 0 | case LEFT_CURLY_BRACE: |
176 | 0 | case RIGHT_CURLY_BRACE: { |
177 | 0 | emit(BACKSLASH); |
178 | 0 | break; |
179 | 0 | } |
180 | 0 | default: |
181 | 0 | break; |
182 | 0 | } |
183 | 0 | emit(text[i]); |
184 | 0 | } |
185 | 0 | return; |
186 | 0 | } |
187 | | // Markup |
188 | 0 | if (part.isMarkup()) { |
189 | 0 | const Markup& markup = part.asMarkup(); |
190 | 0 | emit(LEFT_CURLY_BRACE); |
191 | 0 | if (markup.isClose()) { |
192 | 0 | emit(SLASH); |
193 | 0 | } else { |
194 | 0 | emit(NUMBER_SIGN); |
195 | 0 | } |
196 | 0 | emit(markup.getName()); |
197 | 0 | emit(markup.getOptionsInternal()); |
198 | 0 | emitAttributes(markup.getAttributesInternal()); |
199 | 0 | if (markup.isStandalone()) { |
200 | 0 | emit(SLASH); |
201 | 0 | } |
202 | 0 | emit(RIGHT_CURLY_BRACE); |
203 | 0 | return; |
204 | 0 | } |
205 | | // Expression |
206 | 0 | emit(part.contents()); |
207 | 0 | } |
208 | | |
209 | 0 | void Serializer::emit(const Pattern& pat) { |
210 | 0 | int32_t len = pat.numParts(); |
211 | | // Always quote pattern, which should match the normalized input |
212 | | // if the parser is constructing it correctly |
213 | 0 | emit(LEFT_CURLY_BRACE); |
214 | 0 | emit(LEFT_CURLY_BRACE); |
215 | 0 | for (int32_t i = 0; i < len; i++) { |
216 | | // No whitespace is needed here -- see the `pattern` nonterminal in the grammar |
217 | 0 | emit(pat.getPart(i)); |
218 | 0 | } |
219 | 0 | emit(RIGHT_CURLY_BRACE); |
220 | 0 | emit(RIGHT_CURLY_BRACE); |
221 | 0 | } |
222 | | |
223 | 0 | void Serializer::serializeDeclarations() { |
224 | 0 | const Binding* bindings = dataModel.getLocalVariablesInternal(); |
225 | 0 | U_ASSERT(dataModel.bindingsLen == 0 || bindings != nullptr); |
226 | |
|
227 | 0 | for (int32_t i = 0; i < dataModel.bindingsLen; i++) { |
228 | 0 | const Binding& b = bindings[i]; |
229 | 0 | if (b.isLocal()) { |
230 | | // No whitespace needed here -- see `message` in the grammar |
231 | 0 | emit(ID_LOCAL); |
232 | 0 | whitespace(); |
233 | 0 | emit(DOLLAR); |
234 | 0 | emit(b.getVariable()); |
235 | | // No whitespace needed here -- see `local-declaration` in the grammar |
236 | 0 | emit(EQUALS); |
237 | | // No whitespace needed here -- see `local-declaration` in the grammar |
238 | 0 | } else { |
239 | | // Input declaration |
240 | 0 | emit(ID_INPUT); |
241 | | // No whitespace needed here -- see `input-declaration` in the grammar |
242 | 0 | } |
243 | 0 | emit(b.getValue()); |
244 | 0 | } |
245 | 0 | } |
246 | | |
247 | 0 | void Serializer::serializeSelectors() { |
248 | 0 | U_ASSERT(!dataModel.hasPattern()); |
249 | 0 | const VariableName* selectors = dataModel.getSelectorsInternal(); |
250 | |
|
251 | 0 | emit(ID_MATCH); |
252 | 0 | for (int32_t i = 0; i < dataModel.numSelectors(); i++) { |
253 | 0 | whitespace(); |
254 | 0 | emit(DOLLAR); |
255 | 0 | emit(selectors[i]); |
256 | 0 | } |
257 | 0 | } |
258 | | |
259 | 0 | void Serializer::serializeVariants() { |
260 | 0 | U_ASSERT(!dataModel.hasPattern()); |
261 | 0 | const Variant* variants = dataModel.getVariantsInternal(); |
262 | 0 | whitespace(); |
263 | 0 | for (int32_t i = 0; i < dataModel.numVariants(); i++) { |
264 | 0 | const Variant& v = variants[i]; |
265 | 0 | emit(v.getKeys()); |
266 | | // No whitespace needed here -- see `variant` in the grammar |
267 | 0 | emit(v.getPattern()); |
268 | 0 | } |
269 | 0 | } |
270 | | |
271 | | |
272 | | // Main (public) serializer method |
273 | 0 | void Serializer::serialize() { |
274 | 0 | serializeDeclarations(); |
275 | | // Pattern message |
276 | 0 | if (dataModel.hasPattern()) { |
277 | 0 | emit(dataModel.getPattern()); |
278 | 0 | } else { |
279 | | // Selectors message |
280 | 0 | serializeSelectors(); |
281 | 0 | serializeVariants(); |
282 | 0 | } |
283 | 0 | } |
284 | | |
285 | | } // namespace message2 |
286 | | U_NAMESPACE_END |
287 | | |
288 | | #endif /* #if !UCONFIG_NO_MF2 */ |
289 | | |
290 | | #endif /* #if !UCONFIG_NO_FORMATTING */ |
291 | | |
292 | | #endif /* #if !UCONFIG_NO_NORMALIZATION */ |