/src/icu/icu4c/source/i18n/messageformat2_evaluation.cpp
Line | Count | Source |
1 | | // © 2024 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | |
4 | | #include "unicode/utypes.h" |
5 | | |
6 | | #if !UCONFIG_NO_NORMALIZATION |
7 | | |
8 | | #if !UCONFIG_NO_FORMATTING |
9 | | |
10 | | #if !UCONFIG_NO_MF2 |
11 | | |
12 | | #include "messageformat2_allocation.h" |
13 | | #include "messageformat2_evaluation.h" |
14 | | #include "messageformat2_function_registry_internal.h" |
15 | | #include "messageformat2_macros.h" |
16 | | #include "uvector.h" // U_ASSERT |
17 | | |
18 | | U_NAMESPACE_BEGIN |
19 | | |
20 | | // Auxiliary data structures used during formatting a message |
21 | | |
22 | | namespace message2 { |
23 | | |
24 | | using namespace data_model; |
25 | | |
26 | | // Functions |
27 | | // ------------- |
28 | | |
29 | 0 | ResolvedFunctionOption::ResolvedFunctionOption(ResolvedFunctionOption&& other) { |
30 | 0 | name = std::move(other.name); |
31 | 0 | value = std::move(other.value); |
32 | 0 | sourceIsLiteral = other.sourceIsLiteral; |
33 | 0 | } |
34 | | |
35 | 0 | ResolvedFunctionOption::~ResolvedFunctionOption() {} |
36 | | |
37 | | |
38 | 0 | const ResolvedFunctionOption* FunctionOptions::getResolvedFunctionOptions(int32_t& len) const { |
39 | 0 | len = functionOptionsLen; |
40 | 0 | U_ASSERT(len == 0 || options != nullptr); |
41 | 0 | return options; |
42 | 0 | } |
43 | | |
44 | 0 | FunctionOptions::FunctionOptions(UVector&& optionsVector, UErrorCode& status) { |
45 | 0 | CHECK_ERROR(status); |
46 | |
|
47 | 0 | functionOptionsLen = optionsVector.size(); |
48 | 0 | options = moveVectorToArray<ResolvedFunctionOption>(optionsVector, status); |
49 | 0 | } |
50 | | |
51 | | // Returns false if option doesn't exist |
52 | 0 | UBool FunctionOptions::wasSetFromLiteral(const UnicodeString& key) const { |
53 | 0 | if (options == nullptr) { |
54 | 0 | U_ASSERT(functionOptionsLen == 0); |
55 | 0 | } |
56 | 0 | for (int32_t i = 0; i < functionOptionsLen; i++) { |
57 | 0 | const ResolvedFunctionOption& opt = options[i]; |
58 | 0 | if (opt.getName() == key) { |
59 | 0 | return opt.isLiteral(); |
60 | 0 | } |
61 | 0 | } |
62 | 0 | return false; |
63 | 0 | } |
64 | | |
65 | 0 | UBool FunctionOptions::getFunctionOption(std::u16string_view key, Formattable& option) const { |
66 | 0 | if (options == nullptr) { |
67 | 0 | U_ASSERT(functionOptionsLen == 0); |
68 | 0 | } |
69 | 0 | for (int32_t i = 0; i < functionOptionsLen; i++) { |
70 | 0 | const ResolvedFunctionOption& opt = options[i]; |
71 | 0 | if (opt.getName() == key) { |
72 | 0 | option = opt.getValue(); |
73 | 0 | return true; |
74 | 0 | } |
75 | 0 | } |
76 | 0 | return false; |
77 | 0 | } |
78 | | |
79 | 0 | UnicodeString FunctionOptions::getStringFunctionOption(std::u16string_view key) const { |
80 | 0 | Formattable option; |
81 | 0 | if (getFunctionOption(key, option)) { |
82 | 0 | if (option.getType() == UFMT_STRING) { |
83 | 0 | UErrorCode localErrorCode = U_ZERO_ERROR; |
84 | 0 | UnicodeString val = option.getString(localErrorCode); |
85 | 0 | U_ASSERT(U_SUCCESS(localErrorCode)); |
86 | 0 | return val; |
87 | 0 | } |
88 | 0 | } |
89 | | // For anything else, including non-string values, return "". |
90 | | // Alternately, could try to stringify the non-string option. |
91 | | // (Currently, no tests require that.) |
92 | 0 | return {}; |
93 | 0 | } |
94 | | |
95 | 0 | FunctionOptions& FunctionOptions::operator=(FunctionOptions&& other) noexcept { |
96 | 0 | functionOptionsLen = other.functionOptionsLen; |
97 | 0 | options = other.options; |
98 | 0 | other.functionOptionsLen = 0; |
99 | 0 | other.options = nullptr; |
100 | 0 | return *this; |
101 | 0 | } |
102 | | |
103 | 0 | FunctionOptions::FunctionOptions(FunctionOptions&& other) { |
104 | 0 | *this = std::move(other); |
105 | 0 | } |
106 | | |
107 | 0 | FunctionOptions::~FunctionOptions() { |
108 | 0 | if (options != nullptr) { |
109 | 0 | delete[] options; |
110 | 0 | options = nullptr; |
111 | 0 | } |
112 | 0 | } |
113 | | |
114 | 0 | static bool containsOption(const UVector& opts, const ResolvedFunctionOption& opt) { |
115 | 0 | for (int32_t i = 0; i < opts.size(); i++) { |
116 | 0 | if (static_cast<ResolvedFunctionOption*>(opts[i])->getName() |
117 | 0 | == opt.getName()) { |
118 | 0 | return true; |
119 | 0 | } |
120 | 0 | } |
121 | 0 | return false; |
122 | 0 | } |
123 | | |
124 | | // Options in `this` take precedence |
125 | | // `this` can't be used after mergeOptions is called |
126 | | FunctionOptions FunctionOptions::mergeOptions(FunctionOptions&& other, |
127 | 0 | UErrorCode& status) { |
128 | 0 | UVector mergedOptions(status); |
129 | 0 | mergedOptions.setDeleter(uprv_deleteUObject); |
130 | |
|
131 | 0 | if (U_FAILURE(status)) { |
132 | 0 | return {}; |
133 | 0 | } |
134 | | |
135 | | // Create a new vector consisting of the options from this `FunctionOptions` |
136 | 0 | for (int32_t i = 0; i < functionOptionsLen; i++) { |
137 | 0 | mergedOptions.adoptElement(create<ResolvedFunctionOption>(std::move(options[i]), status), |
138 | 0 | status); |
139 | 0 | } |
140 | | |
141 | | // Add each option from `other` that doesn't appear in this `FunctionOptions` |
142 | 0 | for (int i = 0; i < other.functionOptionsLen; i++) { |
143 | | // Note: this is quadratic in the length of `options` |
144 | 0 | if (!containsOption(mergedOptions, other.options[i])) { |
145 | 0 | mergedOptions.adoptElement(create<ResolvedFunctionOption>(std::move(other.options[i]), |
146 | 0 | status), |
147 | 0 | status); |
148 | 0 | } |
149 | 0 | } |
150 | |
|
151 | 0 | delete[] options; |
152 | 0 | options = nullptr; |
153 | 0 | functionOptionsLen = 0; |
154 | |
|
155 | 0 | return FunctionOptions(std::move(mergedOptions), status); |
156 | 0 | } |
157 | | |
158 | | // PrioritizedVariant |
159 | | // ------------------ |
160 | | |
161 | 0 | UBool PrioritizedVariant::operator<(const PrioritizedVariant& other) const { |
162 | 0 | if (priority < other.priority) { |
163 | 0 | return true; |
164 | 0 | } |
165 | 0 | return false; |
166 | 0 | } |
167 | | |
168 | 0 | PrioritizedVariant::~PrioritizedVariant() {} |
169 | | |
170 | | // ---------------- Environments and closures |
171 | | |
172 | 0 | Environment* Environment::create(const VariableName& var, Closure&& c, Environment* parent, UErrorCode& errorCode) { |
173 | 0 | NULL_ON_ERROR(errorCode); |
174 | 0 | Environment* result = new NonEmptyEnvironment(var, std::move(c), parent); |
175 | 0 | if (result == nullptr) { |
176 | 0 | errorCode = U_MEMORY_ALLOCATION_ERROR; |
177 | 0 | return nullptr; |
178 | 0 | } |
179 | 0 | return result; |
180 | 0 | } |
181 | | |
182 | 0 | Environment* Environment::create(UErrorCode& errorCode) { |
183 | 0 | NULL_ON_ERROR(errorCode); |
184 | 0 | Environment* result = new EmptyEnvironment(); |
185 | 0 | if (result == nullptr) { |
186 | 0 | errorCode = U_MEMORY_ALLOCATION_ERROR; |
187 | 0 | return nullptr; |
188 | 0 | } |
189 | 0 | return result; |
190 | 0 | } |
191 | | |
192 | 0 | const Closure& EmptyEnvironment::lookup(const VariableName& v) const { |
193 | 0 | (void) v; |
194 | 0 | U_ASSERT(false); |
195 | 0 | UPRV_UNREACHABLE_EXIT; |
196 | 0 | } |
197 | | |
198 | 0 | const Closure& NonEmptyEnvironment::lookup(const VariableName& v) const { |
199 | 0 | if (v == var) { |
200 | 0 | return rhs; |
201 | 0 | } |
202 | 0 | return parent->lookup(v); |
203 | 0 | } |
204 | | |
205 | 0 | bool EmptyEnvironment::has(const VariableName& v) const { |
206 | 0 | (void) v; |
207 | 0 | return false; |
208 | 0 | } |
209 | | |
210 | 0 | bool NonEmptyEnvironment::has(const VariableName& v) const { |
211 | 0 | if (v == var) { |
212 | 0 | return true; |
213 | 0 | } |
214 | 0 | return parent->has(v); |
215 | 0 | } |
216 | | |
217 | 0 | Environment::~Environment() {} |
218 | 0 | NonEmptyEnvironment::~NonEmptyEnvironment() {} |
219 | | EmptyEnvironment::~EmptyEnvironment() {} |
220 | | |
221 | 0 | Closure::~Closure() {} |
222 | | |
223 | | // MessageContext methods |
224 | | |
225 | 0 | void MessageContext::checkErrors(UErrorCode& status) const { |
226 | 0 | CHECK_ERROR(status); |
227 | 0 | errors.checkErrors(status); |
228 | 0 | } |
229 | | |
230 | | const Formattable* MessageContext::getGlobal(const VariableName& v, |
231 | 0 | UErrorCode& errorCode) const { |
232 | 0 | return arguments.getArgument(v, errorCode); |
233 | 0 | } |
234 | | |
235 | | MessageContext::MessageContext(const MessageArguments& args, |
236 | | const StaticErrors& e, |
237 | 0 | UErrorCode& status) : arguments(args), errors(e, status) {} |
238 | | |
239 | 0 | MessageContext::~MessageContext() {} |
240 | | |
241 | | // InternalValue |
242 | | // ------------- |
243 | | |
244 | 0 | bool InternalValue::isFallback() const { |
245 | 0 | return std::holds_alternative<FormattedPlaceholder>(argument) |
246 | 0 | && std::get_if<FormattedPlaceholder>(&argument)->isFallback(); |
247 | 0 | } |
248 | | |
249 | 0 | bool InternalValue::hasNullOperand() const { |
250 | 0 | return std::holds_alternative<FormattedPlaceholder>(argument) |
251 | 0 | && std::get_if<FormattedPlaceholder>(&argument)->isNullOperand(); |
252 | 0 | } |
253 | | |
254 | 0 | FormattedPlaceholder InternalValue::takeArgument(UErrorCode& errorCode) { |
255 | 0 | if (U_FAILURE(errorCode)) { |
256 | 0 | return {}; |
257 | 0 | } |
258 | | |
259 | 0 | if (std::holds_alternative<FormattedPlaceholder>(argument)) { |
260 | 0 | return std::move(*std::get_if<FormattedPlaceholder>(&argument)); |
261 | 0 | } |
262 | 0 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
263 | 0 | return {}; |
264 | 0 | } |
265 | | |
266 | 0 | const UnicodeString& InternalValue::getFallback() const { |
267 | 0 | if (std::holds_alternative<FormattedPlaceholder>(argument)) { |
268 | 0 | return std::get_if<FormattedPlaceholder>(&argument)->getFallback(); |
269 | 0 | } |
270 | 0 | return (*std::get_if<InternalValue*>(&argument))->getFallback(); |
271 | 0 | } |
272 | | |
273 | 0 | const Selector* InternalValue::getSelector(UErrorCode& errorCode) const { |
274 | 0 | if (U_FAILURE(errorCode)) { |
275 | 0 | return nullptr; |
276 | 0 | } |
277 | | |
278 | 0 | if (selector == nullptr) { |
279 | 0 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
280 | 0 | } |
281 | 0 | return selector; |
282 | 0 | } |
283 | | |
284 | 0 | InternalValue::InternalValue(FormattedPlaceholder&& arg) { |
285 | 0 | argument = std::move(arg); |
286 | 0 | selector = nullptr; |
287 | 0 | formatter = nullptr; |
288 | 0 | } |
289 | | |
290 | | InternalValue::InternalValue(InternalValue* operand, |
291 | | FunctionOptions&& opts, |
292 | | const FunctionName& functionName, |
293 | | const Formatter* f, |
294 | 0 | const Selector* s) { |
295 | 0 | argument = operand; |
296 | 0 | options = std::move(opts); |
297 | 0 | name = functionName; |
298 | 0 | selector = s; |
299 | 0 | formatter = f; |
300 | 0 | U_ASSERT(selector != nullptr || formatter != nullptr); |
301 | 0 | } |
302 | | |
303 | | // `this` cannot be used after calling this method |
304 | | void InternalValue::forceSelection(DynamicErrors& errs, |
305 | | const UnicodeString* keys, |
306 | | int32_t keysLen, |
307 | | UnicodeString* prefs, |
308 | | int32_t& prefsLen, |
309 | 0 | UErrorCode& errorCode) { |
310 | 0 | if (U_FAILURE(errorCode)) { |
311 | 0 | return; |
312 | 0 | } |
313 | | |
314 | 0 | if (!canSelect()) { |
315 | 0 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
316 | 0 | return; |
317 | 0 | } |
318 | | // Find the argument and complete set of options by traversing `argument` |
319 | 0 | FunctionOptions opts; |
320 | 0 | InternalValue* p = this; |
321 | 0 | FunctionName selectorName = name; |
322 | |
|
323 | 0 | bool operandSelect = false; |
324 | 0 | while (std::holds_alternative<InternalValue*>(p->argument)) { |
325 | 0 | if (p->name != selectorName) { |
326 | | // Can only compose calls to the same selector |
327 | 0 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
328 | 0 | return; |
329 | 0 | } |
330 | | // Very special case to detect something like: |
331 | | // .local $sel = {1 :integer select=exact} .local $bad = {$sel :integer} .match $bad 1 {{ONE}} * {{operand select {$bad}}} |
332 | | // This can be done better once function composition is fully implemented. |
333 | 0 | if (p != this && |
334 | 0 | !p->options.getStringFunctionOption(options::SELECT).isEmpty() |
335 | 0 | && (selectorName == functions::NUMBER || selectorName == functions::INTEGER)) { |
336 | | // In this case, we want to call the selector normally but emit a |
337 | | // `bad-option` error, possibly with the outcome of normal-looking output (with relaxed |
338 | | // error handling) and an error (with strict error handling). |
339 | 0 | operandSelect = true; |
340 | 0 | } |
341 | | // First argument to mergeOptions takes precedence |
342 | 0 | opts = opts.mergeOptions(std::move(p->options), errorCode); |
343 | 0 | if (U_FAILURE(errorCode)) { |
344 | 0 | return; |
345 | 0 | } |
346 | 0 | InternalValue* next = *std::get_if<InternalValue*>(&p->argument); |
347 | 0 | p = next; |
348 | 0 | } |
349 | 0 | FormattedPlaceholder arg = std::move(*std::get_if<FormattedPlaceholder>(&p->argument)); |
350 | | |
351 | | // This condition can't be checked in the selector. |
352 | | // Effectively, there are two different kinds of "bad option" errors: |
353 | | // one that can be recovered from (used for select=$var) and one that |
354 | | // can't (used for bad digit size options and other cases). |
355 | | // The checking of the recoverable error has to be done here; otherwise, |
356 | | // the "bad option" signaled by the selector implementation would cause |
357 | | // fallback output to be used when formatting the `*` pattern. |
358 | 0 | bool badSelectOption = !checkSelectOption(); |
359 | |
|
360 | 0 | selector->selectKey(std::move(arg), std::move(opts), |
361 | 0 | keys, keysLen, |
362 | 0 | prefs, prefsLen, errorCode); |
363 | 0 | if (errorCode == U_MF_SELECTOR_ERROR) { |
364 | 0 | errorCode = U_ZERO_ERROR; |
365 | 0 | errs.setSelectorError(selectorName, errorCode); |
366 | 0 | } else if (errorCode == U_MF_BAD_OPTION) { |
367 | 0 | errorCode = U_ZERO_ERROR; |
368 | 0 | errs.setBadOption(selectorName, errorCode); |
369 | 0 | } else if (operandSelect || badSelectOption) { |
370 | 0 | errs.setRecoverableBadOption(selectorName, errorCode); |
371 | | // In this case, only the `*` variant should match |
372 | 0 | prefsLen = 0; |
373 | 0 | } |
374 | 0 | } |
375 | | |
376 | 0 | bool InternalValue::checkSelectOption() const { |
377 | 0 | if (name != UnicodeString("number") && name != UnicodeString("integer")) { |
378 | 0 | return true; |
379 | 0 | } |
380 | | |
381 | | // Per the spec, if the "select" option is present, it must have been |
382 | | // set from a literal |
383 | | |
384 | 0 | Formattable opt; |
385 | | // Returns false if the `select` option is present and it was not set from a literal |
386 | | |
387 | | // OK if the option wasn't present |
388 | 0 | if (!options.getFunctionOption(UnicodeString("select"), opt)) { |
389 | 0 | return true; |
390 | 0 | } |
391 | | // Otherwise, return true if the option was set from a literal |
392 | 0 | return options.wasSetFromLiteral(UnicodeString("select")); |
393 | 0 | } |
394 | | |
395 | 0 | FormattedPlaceholder InternalValue::forceFormatting(DynamicErrors& errs, UErrorCode& errorCode) { |
396 | 0 | if (U_FAILURE(errorCode)) { |
397 | 0 | return {}; |
398 | 0 | } |
399 | | |
400 | 0 | if (formatter == nullptr && selector == nullptr) { |
401 | 0 | U_ASSERT(std::holds_alternative<FormattedPlaceholder>(argument)); |
402 | 0 | return std::move(*std::get_if<FormattedPlaceholder>(&argument)); |
403 | 0 | } |
404 | 0 | if (formatter == nullptr) { |
405 | 0 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
406 | 0 | return {}; |
407 | 0 | } |
408 | | |
409 | 0 | FormattedPlaceholder arg; |
410 | |
|
411 | 0 | if (std::holds_alternative<FormattedPlaceholder>(argument)) { |
412 | 0 | arg = std::move(*std::get_if<FormattedPlaceholder>(&argument)); |
413 | 0 | } else { |
414 | 0 | arg = (*std::get_if<InternalValue*>(&argument))->forceFormatting(errs, |
415 | 0 | errorCode); |
416 | 0 | } |
417 | |
|
418 | 0 | if (U_FAILURE(errorCode)) { |
419 | 0 | return {}; |
420 | 0 | } |
421 | | |
422 | 0 | if (arg.isFallback()) { |
423 | 0 | return arg; |
424 | 0 | } |
425 | | |
426 | | // The fallback for a nullary function call is the function name |
427 | 0 | UnicodeString fallback; |
428 | 0 | if (arg.isNullOperand()) { |
429 | 0 | fallback = u":"; |
430 | 0 | fallback += name; |
431 | 0 | } else { |
432 | 0 | fallback = arg.getFallback(); |
433 | 0 | } |
434 | | |
435 | | // Very special case for :number select=foo and :integer select=foo |
436 | | // This check can't be done inside the function implementation because |
437 | | // it doesn't have a way to both signal an error and return usable output, |
438 | | // and the spec stipulates that fallback output shouldn't be used in the |
439 | | // case of a bad `select` option to a formatting call. |
440 | 0 | bool badSelect = !checkSelectOption(); |
441 | | |
442 | | // Call the function with the argument |
443 | 0 | FormattedPlaceholder result = formatter->format(std::move(arg), std::move(options), errorCode); |
444 | 0 | if (U_SUCCESS(errorCode) && errorCode == U_USING_DEFAULT_WARNING) { |
445 | | // Ignore this warning |
446 | 0 | errorCode = U_ZERO_ERROR; |
447 | 0 | } |
448 | 0 | if (U_FAILURE(errorCode)) { |
449 | 0 | if (errorCode == U_MF_OPERAND_MISMATCH_ERROR) { |
450 | 0 | errorCode = U_ZERO_ERROR; |
451 | 0 | errs.setOperandMismatchError(name, errorCode); |
452 | 0 | } else if (errorCode == U_MF_BAD_OPTION) { |
453 | 0 | errorCode = U_ZERO_ERROR; |
454 | 0 | errs.setBadOption(name, errorCode); |
455 | 0 | } else { |
456 | 0 | errorCode = U_ZERO_ERROR; |
457 | | // Convey any other error generated by the formatter |
458 | | // as a formatting error |
459 | 0 | errs.setFormattingError(name, errorCode); |
460 | 0 | } |
461 | 0 | } |
462 | | // Ignore the output if any error occurred |
463 | | // We don't ignore the output in the case of a Bad Option Error, |
464 | | // because of the select=bad case where we want both an error |
465 | | // and non-fallback output. |
466 | 0 | if (errs.hasFormattingError() || errs.hasBadOptionError()) { |
467 | 0 | return FormattedPlaceholder(fallback); |
468 | 0 | } |
469 | 0 | if (badSelect) { |
470 | | // In this case, we want to set an error but not replace |
471 | | // the output with a fallback |
472 | 0 | errs.setRecoverableBadOption(name, errorCode); |
473 | 0 | } |
474 | 0 | return result; |
475 | 0 | } |
476 | | |
477 | 0 | InternalValue& InternalValue::operator=(InternalValue&& other) noexcept { |
478 | 0 | argument = std::move(other.argument); |
479 | 0 | other.argument = nullptr; |
480 | 0 | options = std::move(other.options); |
481 | 0 | name = other.name; |
482 | 0 | selector = other.selector; |
483 | 0 | formatter = other.formatter; |
484 | 0 | other.selector = nullptr; |
485 | 0 | other.formatter = nullptr; |
486 | |
|
487 | 0 | return *this; |
488 | 0 | } |
489 | | |
490 | 0 | InternalValue::~InternalValue() { |
491 | 0 | delete selector; |
492 | 0 | selector = nullptr; |
493 | 0 | delete formatter; |
494 | 0 | formatter = nullptr; |
495 | 0 | if (std::holds_alternative<InternalValue*>(argument)) { |
496 | 0 | delete *std::get_if<InternalValue*>(&argument); |
497 | 0 | argument = nullptr; |
498 | 0 | } |
499 | 0 | } |
500 | | |
501 | | } // namespace message2 |
502 | | U_NAMESPACE_END |
503 | | |
504 | | #endif /* #if !UCONFIG_NO_MF2 */ |
505 | | |
506 | | #endif /* #if !UCONFIG_NO_FORMATTING */ |
507 | | |
508 | | #endif /* #if !UCONFIG_NO_NORMALIZATION */ |