Coverage Report

Created: 2026-06-23 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/i18n/unicode/messageformat2.h
Line
Count
Source
1
// © 2024 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
#include "unicode/utypes.h"
5
6
#ifndef MESSAGEFORMAT2_H
7
#define MESSAGEFORMAT2_H
8
9
#if U_SHOW_CPLUSPLUS_API
10
11
#if !UCONFIG_NO_NORMALIZATION
12
13
#if !UCONFIG_NO_FORMATTING
14
15
#if !UCONFIG_NO_MF2
16
17
/**
18
 * \file
19
 * \brief C++ API: Formats messages using the draft MessageFormat 2.0.
20
 */
21
22
#include "unicode/messageformat2_arguments.h"
23
#include "unicode/messageformat2_data_model.h"
24
#include "unicode/messageformat2_function_registry.h"
25
#include "unicode/normalizer2.h"
26
#include "unicode/unistr.h"
27
28
#ifndef U_HIDE_DEPRECATED_API
29
30
U_NAMESPACE_BEGIN
31
32
namespace message2 {
33
34
    class Environment;
35
    class MessageContext;
36
    class StaticErrors;
37
    class InternalValue;
38
    class BaseValue;
39
40
    /**
41
     * <p>MessageFormatter is a Technical Preview API implementing MessageFormat 2.0.
42
     *
43
     * <p>See <a target="github" href="https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md">the
44
     * description of the syntax with examples and use cases</a> and the corresponding
45
     * <a target="github" href="https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf">ABNF</a> grammar.</p>
46
     *
47
     * The MessageFormatter class is mutable and movable. It is not copyable.
48
     * (It is mutable because if it has a custom function registry, the registry may include
49
     * `FormatterFactory` objects implementing custom formatters, which are allowed to contain
50
     * mutable state.)
51
     *
52
     * @internal ICU 75 technology preview
53
     * @deprecated This API is for technology preview only.
54
     */
55
    class U_I18N_API_CLASS MessageFormatter : public UObject {
56
        // Note: This class does not currently inherit from the existing
57
        // `Format` class.
58
    public:
59
        /**
60
         * Move assignment operator:
61
         * The source MessageFormatter will be left in a valid but undefined state.
62
         *
63
         * @internal ICU 75 technology preview
64
         * @deprecated This API is for technology preview only.
65
         */
66
        U_I18N_API MessageFormatter& operator=(MessageFormatter&&) noexcept;
67
        /**
68
         * Destructor.
69
         *
70
         * @internal ICU 75 technology preview
71
         * @deprecated This API is for technology preview only.
72
         */
73
        U_I18N_API virtual ~MessageFormatter();
74
75
        /**
76
         * Formats the message to a string, using the data model that was previously set or parsed,
77
         * and the given `arguments` object.
78
         *
79
         * This method is non-const due to the function registry being non-const,
80
         * which is in turn due to the values (`Function` objects in the map) having mutable state.
81
         * In other words, formatting a message can mutate the underlying `MessageFormatter` by changing
82
         * state within the objects that represent custom functions.
83
         *
84
         * @param arguments Reference to message arguments
85
         * @param status    Input/output error code used to indicate syntax errors, data model
86
         *                  errors, resolution errors, formatting errors, selection errors, as well
87
         *                  as other errors (such as memory allocation failures). Partial output
88
         *                  is still provided in the presence of most error types.
89
         * @return          The string result of formatting the message with the given arguments.
90
         *
91
         * @internal ICU 75 technology preview
92
         * @deprecated This API is for technology preview only.
93
         */
94
        U_I18N_API UnicodeString formatToString(const MessageArguments& arguments, UErrorCode& status);
95
96
        /**
97
         * Not yet implemented; formats the message to a `FormattedMessage` object,
98
         * using the data model that was previously set or parsed,
99
         * and the given `arguments` object.
100
         *
101
         * @param arguments Reference to message arguments
102
         * @param status    Input/output error code used to indicate syntax errors, data model
103
         *                  errors, resolution errors, formatting errors, selection errors, as well
104
         *                  as other errors (such as memory allocation failures). Partial output
105
         *                  is still provided in the presence of most error types.
106
         * @return          The `FormattedMessage` representing the formatted message.
107
         *
108
         * @internal ICU 75 technology preview
109
         * @deprecated This API is for technology preview only.
110
         */
111
0
        U_I18N_API FormattedMessage format(const MessageArguments& arguments, UErrorCode& status) const {
112
0
            (void) arguments;
113
0
            if (U_SUCCESS(status)) {
114
0
                status = U_UNSUPPORTED_ERROR;
115
0
            }
116
0
            return FormattedMessage(status);
117
0
        }
118
119
        /**
120
         * Accesses the locale that this `MessageFormatter` object was created with.
121
         *
122
         * @return A reference to the locale.
123
         *
124
         * @internal ICU 75 technology preview
125
         * @deprecated This API is for technology preview only.
126
         */
127
0
        U_I18N_API const Locale& getLocale() const { return locale; }
128
129
        /**
130
         * Serializes the data model as a string in MessageFormat 2.0 syntax.
131
         *
132
         * @return result    A string representation of the data model.
133
         *                   The string is a valid MessageFormat 2.0 message.
134
         *
135
         * @internal ICU 75 technology preview
136
         * @deprecated This API is for technology preview only.
137
         */
138
        U_I18N_API UnicodeString getPattern() const;
139
140
        /**
141
         * Accesses the data model referred to by this
142
         * `MessageFormatter` object.
143
         *
144
         * @return A reference to the data model.
145
         *
146
         * @internal ICU 75 technology preview
147
         * @deprecated This API is for technology preview only.
148
         */
149
        U_I18N_API const MFDataModel& getDataModel() const;
150
151
        /**
152
         * Used in conjunction with the
153
         * MessageFormatter::Builder::setErrorHandlingBehavior() method.
154
         *
155
         * @internal ICU 76 technology preview
156
         * @deprecated This API is for technology preview only.
157
         */
158
        typedef enum UMFErrorHandlingBehavior {
159
            /**
160
             * Suppress errors and return best-effort output.
161
             *
162
             * @internal ICU 76 technology preview
163
             * @deprecated This API is for technology preview only.
164
             */
165
            U_MF_BEST_EFFORT = 0,
166
            /**
167
             * Signal all MessageFormat errors using the UErrorCode
168
             * argument.
169
             *
170
             * @internal ICU 76 technology preview
171
             * @deprecated This API is for technology preview only.
172
             */
173
            U_MF_STRICT
174
        } UMFErrorHandlingBehavior;
175
176
        /**
177
         * Used in conjunction with the
178
         * MessageFormatter::Builder::setBidiContext() method.
179
         *
180
         * @internal ICU 79 technology preview
181
         * @deprecated This API is for technology preview only.
182
         */
183
        typedef enum UMFBidiContext {
184
            /**
185
             * Denotes a left-to-right message.
186
             *
187
             * @internal ICU 79 technology preview
188
             * @deprecated This API is for technology preview only.
189
             */
190
            U_MF_BIDI_CONTEXT_LTR = 0,
191
            /**
192
             * Denotes a right-to-left message.
193
             *
194
             * @internal ICU 79 technology preview
195
             * @deprecated This API is for technology preview only.
196
             */
197
            U_MF_BIDI_CONTEXT_RTL,
198
            /**
199
             * Indicates that the message directionality should be
200
             * inferred from the locale.
201
             *
202
             * @internal ICU 79 technology preview
203
             * @deprecated This API is for technology preview only.
204
             */
205
            U_MF_BIDI_CONTEXT_AUTO,
206
            U_MF_BIDI_CONTEXT_DEFAULT = U_MF_BIDI_CONTEXT_AUTO
207
        } UMFBidiContext;
208
        /**
209
         * The mutable Builder class allows each part of the MessageFormatter to be initialized
210
         * separately; calling its `build()` method yields an immutable MessageFormatter.
211
         *
212
         * Not copyable or movable.
213
         */
214
        class U_I18N_API_CLASS Builder : public UObject {
215
        private:
216
            friend class MessageFormatter;
217
218
            // The pattern to be parsed to generate the formatted message
219
            UnicodeString pattern;
220
            bool hasPattern = false;
221
            bool hasDataModel = false;
222
            // The data model to be used to generate the formatted message
223
            // Initialized either by `setDataModel()`, or by the parser
224
            // through a call to `setPattern()`
225
            MFDataModel dataModel;
226
            // Normalized representation of the pattern;
227
            // ignored if `setPattern()` wasn't called
228
            UnicodeString normalizedInput;
229
            // Errors (internal representation of parse errors)
230
            // Ignored if `setPattern()` wasn't called
231
            StaticErrors* errors;
232
            Locale locale;
233
            // Not owned
234
            const MFFunctionRegistry* customMFFunctionRegistry;
235
            // Error behavior; see comment in `MessageFormatter` class
236
            bool signalErrors = false;
237
            // Bidi isolation strategy
238
            UMFBidiIsolationStrategy bidiIsolationStrategy = U_MF_BIDI_DEFAULT;
239
            // Message directionality
240
            MessageFormatter::UMFBidiContext msgdir = U_MF_BIDI_CONTEXT_DEFAULT;
241
            // Bidi isolation style
242
            UMFBidiIsolationStyle bidiStyle = U_MF_BIDI_STYLE_DEFAULT;
243
244
            void clearState();
245
        public:
246
            /**
247
             * Sets the locale to use for formatting.
248
             *
249
             * @param locale The desired locale.
250
             * @return       A reference to the builder.
251
             *
252
             * @internal ICU 75 technology preview
253
             * @deprecated This API is for technology preview only.
254
             */
255
            U_I18N_API Builder& setLocale(const Locale& locale);
256
            /**
257
             * Sets the pattern (contents of the message) and parses it
258
             * into a data model. If a data model was
259
             * previously set, it is removed.
260
             *
261
             * @param pattern A string in MessageFormat 2.0 syntax.
262
             * @param parseError Struct to receive information on the position
263
             *                   of an error within the pattern.
264
             * @param status    Input/output error code. If the
265
             *                  pattern cannot be parsed, set to failure code.
266
             * @return       A reference to the builder.
267
             *
268
             * @internal ICU 75 technology preview
269
             * @deprecated This API is for technology preview only.
270
             */
271
            U_I18N_API Builder& setPattern(const UnicodeString& pattern,
272
                                           UParseError& parseError,
273
                                           UErrorCode& status);
274
            /**
275
             * Sets a custom function registry.
276
             *
277
             * @param functionRegistry Reference to the function registry to use.
278
             *        `functionRegistry` is not copied,
279
             *        and the caller must ensure its lifetime contains
280
             *        the lifetime of the `MessageFormatter` object built by this
281
             *        builder.
282
             * @return       A reference to the builder.
283
             *
284
             * @internal ICU 75 technology preview
285
             * @deprecated This API is for technology preview only.
286
             */
287
            U_I18N_API Builder& setFunctionRegistry(const MFFunctionRegistry& functionRegistry);
288
            /**
289
             * Sets a data model. If a pattern was previously set, it is removed.
290
             *
291
             * @param dataModel Data model to format. Passed by move.
292
             * @return       A reference to the builder.
293
             *
294
             * @internal ICU 75 technology preview
295
             * @deprecated This API is for technology preview only.
296
             */
297
            U_I18N_API Builder& setDataModel(MFDataModel&& dataModel);
298
            /**
299
             * Set the error handling behavior for this formatter.
300
             *
301
             * "Strict" error behavior means that that formatting methods
302
             * will set their UErrorCode arguments to signal MessageFormat
303
             * data model, resolution, and runtime errors. Syntax errors are
304
             * always signaled.
305
             *
306
             * "Best effort" error behavior means that MessageFormat errors are
307
             * suppressed:  formatting methods will _not_ set their
308
             * UErrorCode arguments to signal MessageFormat data model,
309
             * resolution, or runtime errors. Best-effort output
310
             * will be returned. Syntax errors are always signaled.
311
             * This is the default behavior.
312
             *
313
             * @param type An enum with type UMFErrorHandlingBehavior;
314
             *             if type == `U_MF_STRICT`, then
315
             *             errors are handled strictly.
316
             *             If type == `U_MF_BEST_EFFORT`, then
317
             *             best-effort output is returned.
318
             *
319
             * The default is to suppress all MessageFormat errors
320
             * and return best-effort output.
321
             *
322
             * @return       A reference to the builder.
323
             *
324
             * @internal ICU 76 technology preview
325
             * @deprecated This API is for technology preview only.
326
             */
327
            U_I18N_API Builder& setErrorHandlingBehavior(UMFErrorHandlingBehavior type);
328
            /**
329
             * Set the bidi isolation behavior for this formatter.
330
             *
331
             * "OFF" means that no bidi isolation will be performed.
332
             * "AUTO" means that the default bidi isolation strategy
333
             * as described in the MF2 specification
334
             * ( https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#handling-bidirectional-text )
335
             * will be applied.
336
             *
337
             * @param strategy An enum with type UMFBidiIsolationStrategy;
338
             *                 that specifies how bidi isolation marks are inserted into
339
             *                 the formatting result. The default is U_MF_BIDI_AUTO.
340
             *
341
             * @return       A reference to the builder.
342
             *
343
             * @internal ICU 79 technology preview
344
             * @deprecated This API is for technology preview only.
345
             */
346
            U_I18N_API Builder& setBidiIsolationStrategy(UMFBidiIsolationStrategy strategy);
347
            /**
348
             * Set the bidi isolation style for this formatter.
349
             *
350
             * "CONTROL" means that bidi control characters will be inserted into
351
             * the formatted result.
352
             * "HTML_SPAN" means that HTML markup will be inserted into
353
             * the formatted result.
354
             *
355
             * @param style An enum with type UMFBidiIsolationStyle
356
             *                 that specifies how bidi isolation is applied to
357
             *                 the formatting result. The default is
358
             *                 U_MF_BIDI_STYLE_CONTROL.
359
             *
360
             * @return       A reference to the builder.
361
             *
362
             * @internal ICU 79 technology preview
363
             * @deprecated This API is for technology preview only.
364
             */
365
            U_I18N_API Builder& setBidiIsolationStyle(UMFBidiIsolationStyle style);
366
            /**
367
             * Set the directionality context of the input message.
368
             *
369
             * "LTR" means left-to-right and "RTL" means right-to-left.
370
             * "AUTO" means to infer the context from the locale
371
             * (either what was set with setLocale(), or the default locale
372
             * if setLocale() was never called on the builder.)
373
              *
374
             * @param dir An enum with type UMFBidiContext
375
             *                 that specifies the directionality of the message.
376
             *                 The default is U_MF_BIDI_CONTEXT_AUTO..
377
             *
378
             * @return       A reference to the builder.
379
             *
380
             * @internal ICU 79 technology preview
381
             * @deprecated This API is for technology preview only.
382
             */
383
            U_I18N_API Builder& setBidiContext(UMFBidiContext dir);
384
            /**
385
             * Constructs a new immutable MessageFormatter using the pattern or data model
386
             * that was previously set, and the locale (if it was previously set)
387
             * or default locale (otherwise).
388
             *
389
             * The builder object (`this`) can still be used after calling `build()`.
390
             *
391
             * @param status    Input/output error code.  If neither the pattern
392
             *                  nor the data model is set, set to failure code.
393
             * @return          The new MessageFormatter object
394
             *
395
             * @internal ICU 75 technology preview
396
             * @deprecated This API is for technology preview only.
397
             */
398
            U_I18N_API MessageFormatter build(UErrorCode& status) const;
399
            /**
400
             * Default constructor.
401
             * Returns a Builder with the default locale and with no
402
             * data model or pattern set. Either `setPattern()`
403
             * or `setDataModel()` has to be called before calling `build()`.
404
             *
405
             * @param status    Input/output error code.
406
             *
407
             * @internal ICU 75 technology preview
408
             * @deprecated This API is for technology preview only.
409
             */
410
            U_I18N_API Builder(UErrorCode& status);
411
            /**
412
             * Destructor.
413
             *
414
             * @internal ICU 75 technology preview
415
             * @deprecated This API is for technology preview only.
416
             */
417
            U_I18N_API virtual ~Builder();
418
        }; // class MessageFormatter::Builder
419
420
        // TODO(ICU-23428): Shouldn't be public; only used for testing
421
        /**
422
         * Returns a string consisting of the input with optional spaces removed.
423
         *
424
         * @return        A normalized string representation of the input
425
         *
426
         * @internal ICU 75 technology preview
427
         * @deprecated This API is for technology preview only.
428
         */
429
0
        U_I18N_API const UnicodeString& getNormalizedPattern() const { return normalizedInput; }
430
431
    private:
432
        friend class Builder;
433
        friend class Checker;
434
        friend class MessageArguments;
435
        friend class MessageContext;
436
437
        MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &status);
438
439
        MessageFormatter() = delete; // default constructor not implemented
440
441
        // Do not define default assignment operator
442
        const MessageFormatter &operator=(const MessageFormatter &) = delete;
443
444
        // Selection methods
445
446
        // Takes a vector of FormattedPlaceholders
447
        void resolveSelectors(MessageContext&, Environment& env, UErrorCode&, UVector&) const;
448
        // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output)
449
        void filterVariants(const UVector&, UVector&, UErrorCode&) const;
450
        // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output)
451
        void sortVariants(const UVector&, UVector&, UErrorCode&) const;
452
        // Takes a vector of strings (input) and a vector of strings (output)
453
        void matchSelectorKeys(const UVector&, MessageContext&, InternalValue&& rv, UVector&, UErrorCode&) const;
454
        // Takes a vector of FormattedPlaceholders (input),
455
        // and a vector of vectors of strings (output)
456
        void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const;
457
458
        bool checkSelectOption(const FunctionValue&) const;
459
460
        // Formatting methods
461
        [[nodiscard]] InternalValue evalLiteral(const UnicodeString&, const data_model::Literal&, UErrorCode&) const;
462
        [[nodiscard]] UnicodeString& bidiIsolate(UMFBidiOption, UMFDirectionality, UnicodeString&) const;
463
        void formatPattern(MessageContext&, Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
464
        FunctionContext makeFunctionContext(const FunctionOptions&) const;
465
        [[nodiscard]] InternalValue& apply(Environment&, const FunctionName&, InternalValue&, FunctionOptions&&,
466
                                           MessageContext&, UErrorCode&) const;
467
        [[nodiscard]] InternalValue& evalExpression(const UnicodeString&, Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const;
468
        [[nodiscard]] FunctionOptions resolveOptions(Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const;
469
        [[nodiscard]] InternalValue& evalOperand(const UnicodeString&, Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const;
470
        bool operandToStringWithBadOptionError(MessageContext&, Environment&, const Operand&, UnicodeString&, UErrorCode&) const;
471
        void validateUOptionsOnMarkup(MessageContext&, Environment&, const Markup&, UErrorCode&) const;
472
        [[nodiscard]] InternalValue& evalVariableReference(const UnicodeString&, Environment&, const data_model::VariableName&, MessageContext&, UErrorCode&) const;
473
        [[nodiscard]] InternalValue evalArgument(const UnicodeString&, const data_model::VariableName&, MessageContext&, UErrorCode&) const;
474
        void formatSelectors(MessageContext& context, Environment& env, UErrorCode &status, UnicodeString& result) const;
475
476
        // Function registry methods
477
0
        bool hasCustomMFFunctionRegistry() const {
478
0
            return (customMFFunctionRegistry != nullptr);
479
0
        }
480
481
        // Precondition: custom function registry exists
482
        // Note: this is non-const because the values in the MFFunctionRegistry are mutable
483
        // (a FormatterFactory can have mutable state)
484
        const MFFunctionRegistry& getCustomMFFunctionRegistry() const;
485
486
        bool isCustomFunction(const FunctionName&) const;
487
        bool isBuiltInFunction(const FunctionName&) const;
488
0
        bool isFunction(const FunctionName& fn) const { return isBuiltInFunction(fn) || isCustomFunction(fn); }
489
        void setNotSelectableError(MessageContext&, const InternalValue&, UErrorCode&) const;
490
        // Result is not adopted
491
        Function* lookupFunction(const FunctionName&, UErrorCode&) const;
492
        bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const;
493
494
        // Checking for resolution errors
495
        void checkDeclarations(MessageContext&, Environment*&, UErrorCode&) const;
496
        void check(MessageContext&, const Environment&, const data_model::Expression&, UErrorCode&) const;
497
        void check(MessageContext&, const Environment&, const data_model::Operand&, UErrorCode&) const;
498
        void check(MessageContext&, const Environment&, const OptionMap&, UErrorCode&) const;
499
500
        void initErrors(UErrorCode&);
501
        void clearErrors() const;
502
        void cleanup() noexcept;
503
504
        // The locale this MessageFormatter was created with
505
        /* const */ Locale locale;
506
507
        // Registry for built-in functions
508
        MFFunctionRegistry standardMFFunctionRegistry;
509
        // Registry for custom functions; may be null if no custom registry supplied
510
        // Note: this is *not* owned by the MessageFormatter object
511
        // The reason for this choice is to have a non-destructive MessageFormatter::Builder,
512
        // while also not requiring the function registry to be deeply-copyable. Making the
513
        // function registry copyable would impose a requirement on any implementations
514
        // of the FormatterFactory and SelectorFactory interfaces to implement a custom
515
        // clone() method, which is necessary to avoid sharing between copies of the
516
        // function registry (and thus double-frees)
517
        // Not deeply immutable (the values in the function registry are mutable,
518
        // as a FormatterFactory can have mutable state
519
        const MFFunctionRegistry* customMFFunctionRegistry;
520
521
        // Data model, representing the parsed message
522
        MFDataModel dataModel;
523
524
        // Normalized version of the input string (optional whitespace removed)
525
        UnicodeString normalizedInput;
526
527
        // Errors -- only used while parsing and checking for data model errors; then
528
        // the MessageContext keeps track of errors
529
        // Must be a raw pointer to avoid including the internal header file
530
        // defining StaticErrors
531
        // Owned by `this`
532
        StaticErrors* errors = nullptr;
533
534
        // Error handling behavior.
535
        // If true, then formatting methods set their UErrorCode arguments
536
        // to signal MessageFormat errors, and no useful output is returned.
537
        // If false, then MessageFormat errors are not signaled and the
538
        // formatting methods return best-effort output.
539
        // The default is false.
540
        bool signalErrors = false;
541
542
        // Bidi isolation strategy.
543
        UMFBidiIsolationStrategy bidiIsolationStrategy = U_MF_BIDI_DEFAULT;
544
545
        // Message directionality
546
        // Inferred from locale by default
547
        UMFDirectionality msgdir = U_MF_DIRECTIONALITY_DEFAULT;
548
549
        // Bidi isolation style
550
        UMFBidiIsolationStyle bidiIsolationStyle = U_MF_BIDI_STYLE_DEFAULT;
551
552
    }; // class MessageFormatter
553
554
} // namespace message2
555
556
U_NAMESPACE_END
557
558
#endif // U_HIDE_DEPRECATED_API
559
560
#endif /* #if !UCONFIG_NO_MF2 */
561
562
#endif /* #if !UCONFIG_NO_FORMATTING */
563
564
#endif /* #if !UCONFIG_NO_NORMALIZATION */
565
566
#endif /* U_SHOW_CPLUSPLUS_API */
567
568
#endif // MESSAGEFORMAT2_H
569
570
// eof