Coverage Report

Created: 2025-09-05 07:16

/src/icu/icu4c/source/i18n/messageformat2_serializer.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2024 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
#include "unicode/utypes.h"
5
6
#if !UCONFIG_NO_NORMALIZATION
7
8
#if !UCONFIG_NO_FORMATTING
9
10
#if !UCONFIG_NO_MF2
11
12
#include "unicode/messageformat2_data_model.h"
13
#include "messageformat2_macros.h"
14
#include "messageformat2_serializer.h"
15
#include "uvector.h" // U_ASSERT
16
17
U_NAMESPACE_BEGIN
18
19
namespace message2 {
20
21
// Generates a string representation of a data model
22
// ------------------------------------------------
23
24
using namespace data_model;
25
26
// Private helper methods
27
28
0
void Serializer::whitespace() {
29
0
    result += SPACE;
30
0
}
31
32
0
void Serializer::emit(UChar32 c) {
33
0
    result += c;
34
0
}
35
36
0
void Serializer::emit(const UnicodeString& s) {
37
0
    result += s;
38
0
}
39
40
0
void Serializer::emit(const std::u16string_view& token) {
41
0
    result.append(token);
42
0
}
43
44
0
void Serializer::emit(const Literal& l) {
45
0
    if (l.isQuoted()) {
46
0
      emit(PIPE);
47
0
    }
48
0
    const UnicodeString& contents = l.unquoted();
49
0
    for (int32_t i = 0; ((int32_t) i) < contents.length(); i++) {
50
        // Re-escape any escaped-char characters
51
0
        switch(contents[i]) {
52
0
        case BACKSLASH:
53
0
        case PIPE:
54
0
        case LEFT_CURLY_BRACE:
55
0
        case RIGHT_CURLY_BRACE: {
56
0
            emit(BACKSLASH);
57
0
            break;
58
0
        }
59
0
        default: {
60
0
            break;
61
0
        }
62
0
        }
63
0
        emit(contents[i]);
64
0
    }
65
0
    if (l.isQuoted()) {
66
0
        emit(PIPE);
67
0
    }
68
0
}
69
70
0
void Serializer::emit(const Key& k) {
71
0
    if (k.isWildcard()) {
72
0
        emit(ASTERISK);
73
0
        return;
74
0
    }
75
0
    emit(k.asLiteral());
76
0
}
77
78
0
void Serializer::emit(const SelectorKeys& k) {
79
0
  const Key* ks = k.getKeysInternal();
80
0
  int32_t len = k.len;
81
  // It would be an error for `keys` to be empty;
82
  // that would mean this is the single `pattern`
83
  // variant, and in that case, this method shouldn't be called
84
0
  U_ASSERT(len > 0);
85
0
  for (int32_t i = 0; i < len; i++) {
86
0
    if (i != 0) {
87
0
      whitespace();
88
0
    }
89
0
    emit(ks[i]);
90
0
  }
91
0
}
92
93
0
void Serializer::emit(const Operand& rand) {
94
0
    U_ASSERT(!rand.isNull());
95
96
0
    if (rand.isVariable()) {
97
0
        emit(DOLLAR);
98
0
        emit(rand.asVariable());
99
0
    } else {
100
        // Literal: quoted or unquoted
101
0
        emit(rand.asLiteral());
102
0
    }
103
0
}
104
105
0
void Serializer::emit(const OptionMap& options) {
106
    // Errors should have been checked before this point
107
0
    UErrorCode localStatus = U_ZERO_ERROR;
108
0
    U_ASSERT(!options.bogus);
109
0
    for (int32_t i = 0; i < options.size(); i++) {
110
0
        const Option& opt = options.getOption(i, localStatus);
111
        // No need to check error code, since we already checked
112
        // that !bogus
113
0
        whitespace();
114
0
        emit(opt.getName());
115
0
        emit(EQUALS);
116
0
        emit(opt.getValue());
117
0
    }
118
0
}
119
120
0
void Serializer::emitAttributes(const OptionMap& attributes) {
121
    // Errors should have been checked before this point
122
0
    UErrorCode localStatus = U_ZERO_ERROR;
123
0
    U_ASSERT(!attributes.bogus);
124
0
    for (int32_t i = 0; i < attributes.size(); i++) {
125
0
        const Option& attr = attributes.getOption(i, localStatus);
126
        // No need to check error code, since we already checked
127
        // that !bogus
128
0
        whitespace();
129
0
        emit(AT);
130
0
        emit(attr.getName());
131
0
        const Operand& v = attr.getValue();
132
0
        if (!v.isNull()) {
133
0
            emit(EQUALS);
134
0
            emit(v);
135
0
        }
136
0
    }
137
0
}
138
139
0
 void Serializer::emit(const Expression& expr) {
140
0
    emit(LEFT_CURLY_BRACE);
141
142
0
    if (!expr.isFunctionCall()) {
143
        // Literal or variable, no annotation
144
0
        emit(expr.getOperand());
145
0
    } else {
146
        // Function call or reserved
147
0
        if (!expr.isStandaloneAnnotation()) {
148
          // Must be a function call that has an operand
149
0
          emit(expr.getOperand());
150
0
          whitespace();
151
0
        }
152
0
        UErrorCode localStatus = U_ZERO_ERROR;
153
0
        const Operator* rator = expr.getOperator(localStatus);
154
0
        U_ASSERT(U_SUCCESS(localStatus));
155
0
        emit(COLON);
156
0
        emit(rator->getFunctionName());
157
        // No whitespace after function name, in case it has
158
        // no options. (when there are options, emit(OptionMap) will
159
        // emit the leading whitespace)
160
0
        emit(rator->getOptionsInternal());
161
0
    }
162
0
    emitAttributes(expr.getAttributesInternal());
163
0
    emit(RIGHT_CURLY_BRACE);
164
0
}
165
166
0
void Serializer::emit(const PatternPart& part) {
167
0
    if (part.isText()) {
168
        // Raw text
169
0
        const UnicodeString& text = part.asText();
170
        // Re-escape '{'/'}'/'\''|'
171
0
        for (int32_t i = 0; ((int32_t) i) < text.length(); i++) {
172
0
          switch(text[i]) {
173
0
          case PIPE:
174
0
          case BACKSLASH:
175
0
          case LEFT_CURLY_BRACE:
176
0
          case RIGHT_CURLY_BRACE: {
177
0
            emit(BACKSLASH);
178
0
            break;
179
0
          }
180
0
          default:
181
0
            break;
182
0
          }
183
0
          emit(text[i]);
184
0
        }
185
0
        return;
186
0
    }
187
    // Markup
188
0
    if (part.isMarkup()) {
189
0
        const Markup& markup = part.asMarkup();
190
0
        emit(LEFT_CURLY_BRACE);
191
0
        if (markup.isClose()) {
192
0
            emit(SLASH);
193
0
            } else {
194
0
            emit(NUMBER_SIGN);
195
0
        }
196
0
        emit(markup.getName());
197
0
        emit(markup.getOptionsInternal());
198
0
        emitAttributes(markup.getAttributesInternal());
199
0
        if (markup.isStandalone()) {
200
0
            emit(SLASH);
201
0
        }
202
0
        emit(RIGHT_CURLY_BRACE);
203
0
        return;
204
0
    }
205
    // Expression
206
0
    emit(part.contents());
207
0
}
208
209
0
void Serializer::emit(const Pattern& pat) {
210
0
    int32_t len = pat.numParts();
211
    // Always quote pattern, which should match the normalized input
212
    // if the parser is constructing it correctly
213
0
    emit(LEFT_CURLY_BRACE);
214
0
    emit(LEFT_CURLY_BRACE);
215
0
    for (int32_t i = 0; i < len; i++) {
216
        // No whitespace is needed here -- see the `pattern` nonterminal in the grammar
217
0
        emit(pat.getPart(i));
218
0
    }
219
0
    emit(RIGHT_CURLY_BRACE);
220
0
    emit(RIGHT_CURLY_BRACE);
221
0
}
222
223
0
void Serializer::serializeDeclarations() {
224
0
    const Binding* bindings = dataModel.getLocalVariablesInternal();
225
0
    U_ASSERT(dataModel.bindingsLen == 0 || bindings != nullptr);
226
227
0
    for (int32_t i = 0; i < dataModel.bindingsLen; i++) {
228
0
        const Binding& b = bindings[i];
229
0
        if (b.isLocal()) {
230
            // No whitespace needed here -- see `message` in the grammar
231
0
            emit(ID_LOCAL);
232
0
            whitespace();
233
0
            emit(DOLLAR);
234
0
            emit(b.getVariable());
235
            // No whitespace needed here -- see `local-declaration` in the grammar
236
0
            emit(EQUALS);
237
            // No whitespace needed here -- see `local-declaration` in the grammar
238
0
        } else {
239
            // Input declaration
240
0
            emit(ID_INPUT);
241
            // No whitespace needed here -- see `input-declaration` in the grammar
242
0
        }
243
0
        emit(b.getValue());
244
0
    }
245
0
}
246
247
0
void Serializer::serializeSelectors() {
248
0
    U_ASSERT(!dataModel.hasPattern());
249
0
    const VariableName* selectors = dataModel.getSelectorsInternal();
250
251
0
    emit(ID_MATCH);
252
0
    for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
253
0
        whitespace();
254
0
        emit(DOLLAR);
255
0
        emit(selectors[i]);
256
0
    }
257
0
}
258
259
0
void Serializer::serializeVariants() {
260
0
    U_ASSERT(!dataModel.hasPattern());
261
0
    const Variant* variants = dataModel.getVariantsInternal();
262
0
    whitespace();
263
0
    for (int32_t i = 0; i < dataModel.numVariants(); i++) {
264
0
        const Variant& v = variants[i];
265
0
        emit(v.getKeys());
266
        // No whitespace needed here -- see `variant` in the grammar
267
0
        emit(v.getPattern());
268
0
    }
269
0
}
270
271
272
// Main (public) serializer method
273
0
void Serializer::serialize() {
274
0
    serializeDeclarations();
275
    // Pattern message
276
0
    if (dataModel.hasPattern()) {
277
0
      emit(dataModel.getPattern());
278
0
    } else {
279
      // Selectors message
280
0
      serializeSelectors();
281
0
      serializeVariants();
282
0
    }
283
0
}
284
285
} // namespace message2
286
U_NAMESPACE_END
287
288
#endif /* #if !UCONFIG_NO_MF2 */
289
290
#endif /* #if !UCONFIG_NO_FORMATTING */
291
292
#endif /* #if !UCONFIG_NO_NORMALIZATION */