/src/icu/icu4c/source/i18n/plurrule_impl.h
Line | Count | Source |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ******************************************************************************* |
5 | | * Copyright (C) 2007-2016, International Business Machines Corporation and |
6 | | * others. All Rights Reserved. |
7 | | ******************************************************************************* |
8 | | * |
9 | | * File PLURRULE_IMPL.H |
10 | | * |
11 | | ******************************************************************************* |
12 | | */ |
13 | | |
14 | | |
15 | | #ifndef PLURRULE_IMPL |
16 | | #define PLURRULE_IMPL |
17 | | |
18 | | // Internal definitions for the PluralRules implementation. |
19 | | |
20 | | #include "unicode/utypes.h" |
21 | | |
22 | | #if !UCONFIG_NO_FORMATTING |
23 | | |
24 | | #include "unicode/format.h" |
25 | | #include "unicode/locid.h" |
26 | | #include "unicode/parseerr.h" |
27 | | #include "unicode/strenum.h" |
28 | | #include "unicode/ures.h" |
29 | | #include "uvector.h" |
30 | | #include "hash.h" |
31 | | #include "uassert.h" |
32 | | |
33 | | /** |
34 | | * A FixedDecimal version of UPLRULES_NO_UNIQUE_VALUE used in PluralRulesTest |
35 | | * for parsing of samples. |
36 | | */ |
37 | | #define UPLRULES_NO_UNIQUE_VALUE_DECIMAL(ERROR_CODE) (DecimalQuantity::fromExponentString(u"-0.00123456777", ERROR_CODE)) |
38 | | |
39 | | class PluralRulesTest; |
40 | | |
41 | | U_NAMESPACE_BEGIN |
42 | | |
43 | | class AndConstraint; |
44 | | class RuleChain; |
45 | | class DigitInterval; |
46 | | class PluralRules; |
47 | | class VisibleDigits; |
48 | | |
49 | | namespace pluralimpl { |
50 | | |
51 | | // TODO: Remove this and replace with u"" literals. Was for EBCDIC compatibility. |
52 | | |
53 | | static const char16_t DOT = static_cast<char16_t>(0x002E); |
54 | | static const char16_t SINGLE_QUOTE = static_cast<char16_t>(0x0027); |
55 | | static const char16_t SLASH = static_cast<char16_t>(0x002F); |
56 | | static const char16_t BACKSLASH = static_cast<char16_t>(0x005C); |
57 | | static const char16_t SPACE = static_cast<char16_t>(0x0020); |
58 | | static const char16_t EXCLAMATION = static_cast<char16_t>(0x0021); |
59 | | static const char16_t QUOTATION_MARK = static_cast<char16_t>(0x0022); |
60 | | static const char16_t NUMBER_SIGN = static_cast<char16_t>(0x0023); |
61 | | static const char16_t PERCENT_SIGN = static_cast<char16_t>(0x0025); |
62 | | static const char16_t ASTERISK = static_cast<char16_t>(0x002A); |
63 | | static const char16_t COMMA = static_cast<char16_t>(0x002C); |
64 | | static const char16_t HYPHEN = static_cast<char16_t>(0x002D); |
65 | | static const char16_t U_ZERO = static_cast<char16_t>(0x0030); |
66 | | static const char16_t U_ONE = static_cast<char16_t>(0x0031); |
67 | | static const char16_t U_TWO = static_cast<char16_t>(0x0032); |
68 | | static const char16_t U_THREE = static_cast<char16_t>(0x0033); |
69 | | static const char16_t U_FOUR = static_cast<char16_t>(0x0034); |
70 | | static const char16_t U_FIVE = static_cast<char16_t>(0x0035); |
71 | | static const char16_t U_SIX = static_cast<char16_t>(0x0036); |
72 | | static const char16_t U_SEVEN = static_cast<char16_t>(0x0037); |
73 | | static const char16_t U_EIGHT = static_cast<char16_t>(0x0038); |
74 | | static const char16_t U_NINE = static_cast<char16_t>(0x0039); |
75 | | static const char16_t COLON = static_cast<char16_t>(0x003A); |
76 | | static const char16_t SEMI_COLON = static_cast<char16_t>(0x003B); |
77 | | static const char16_t EQUALS = static_cast<char16_t>(0x003D); |
78 | | static const char16_t AT = static_cast<char16_t>(0x0040); |
79 | | static const char16_t CAP_A = static_cast<char16_t>(0x0041); |
80 | | static const char16_t CAP_B = static_cast<char16_t>(0x0042); |
81 | | static const char16_t CAP_R = static_cast<char16_t>(0x0052); |
82 | | static const char16_t CAP_Z = static_cast<char16_t>(0x005A); |
83 | | static const char16_t LOWLINE = static_cast<char16_t>(0x005F); |
84 | | static const char16_t LEFTBRACE = static_cast<char16_t>(0x007B); |
85 | | static const char16_t RIGHTBRACE = static_cast<char16_t>(0x007D); |
86 | | static const char16_t TILDE = static_cast<char16_t>(0x007E); |
87 | | static const char16_t ELLIPSIS = static_cast<char16_t>(0x2026); |
88 | | |
89 | | static const char16_t LOW_A = static_cast<char16_t>(0x0061); |
90 | | static const char16_t LOW_B = static_cast<char16_t>(0x0062); |
91 | | static const char16_t LOW_C = static_cast<char16_t>(0x0063); |
92 | | static const char16_t LOW_D = static_cast<char16_t>(0x0064); |
93 | | static const char16_t LOW_E = static_cast<char16_t>(0x0065); |
94 | | static const char16_t LOW_F = static_cast<char16_t>(0x0066); |
95 | | static const char16_t LOW_G = static_cast<char16_t>(0x0067); |
96 | | static const char16_t LOW_H = static_cast<char16_t>(0x0068); |
97 | | static const char16_t LOW_I = static_cast<char16_t>(0x0069); |
98 | | static const char16_t LOW_J = static_cast<char16_t>(0x006a); |
99 | | static const char16_t LOW_K = static_cast<char16_t>(0x006B); |
100 | | static const char16_t LOW_L = static_cast<char16_t>(0x006C); |
101 | | static const char16_t LOW_M = static_cast<char16_t>(0x006D); |
102 | | static const char16_t LOW_N = static_cast<char16_t>(0x006E); |
103 | | static const char16_t LOW_O = static_cast<char16_t>(0x006F); |
104 | | static const char16_t LOW_P = static_cast<char16_t>(0x0070); |
105 | | static const char16_t LOW_Q = static_cast<char16_t>(0x0071); |
106 | | static const char16_t LOW_R = static_cast<char16_t>(0x0072); |
107 | | static const char16_t LOW_S = static_cast<char16_t>(0x0073); |
108 | | static const char16_t LOW_T = static_cast<char16_t>(0x0074); |
109 | | static const char16_t LOW_U = static_cast<char16_t>(0x0075); |
110 | | static const char16_t LOW_V = static_cast<char16_t>(0x0076); |
111 | | static const char16_t LOW_W = static_cast<char16_t>(0x0077); |
112 | | static const char16_t LOW_Y = static_cast<char16_t>(0x0079); |
113 | | static const char16_t LOW_Z = static_cast<char16_t>(0x007A); |
114 | | |
115 | | } |
116 | | |
117 | | |
118 | | static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff; |
119 | | |
120 | | enum tokenType { |
121 | | none, |
122 | | tNumber, |
123 | | tComma, |
124 | | tSemiColon, |
125 | | tSpace, |
126 | | tColon, |
127 | | tAt, // '@' |
128 | | tDot, |
129 | | tDot2, |
130 | | tEllipsis, |
131 | | tKeyword, |
132 | | tAnd, |
133 | | tOr, |
134 | | tMod, // 'mod' or '%' |
135 | | tNot, // 'not' only. |
136 | | tIn, // 'in' only. |
137 | | tEqual, // '=' only. |
138 | | tNotEqual, // '!=' |
139 | | tTilde, |
140 | | tWithin, |
141 | | tIs, |
142 | | tVariableN, |
143 | | tVariableI, |
144 | | tVariableF, |
145 | | tVariableV, |
146 | | tVariableT, |
147 | | tVariableE, |
148 | | tVariableC, |
149 | | tDecimal, |
150 | | tInteger, |
151 | | tEOF |
152 | | }; |
153 | | |
154 | | |
155 | | class PluralRuleParser: public UMemory { |
156 | | public: |
157 | | PluralRuleParser(); |
158 | | virtual ~PluralRuleParser(); |
159 | | |
160 | | void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status); |
161 | | void getNextToken(UErrorCode &status); |
162 | | void checkSyntax(UErrorCode &status); |
163 | | static int32_t getNumberValue(const UnicodeString &token); |
164 | | |
165 | | private: |
166 | | static tokenType getKeyType(const UnicodeString& token, tokenType type); |
167 | | static tokenType charType(char16_t ch); |
168 | | static UBool isValidKeyword(const UnicodeString& token); |
169 | | |
170 | | const UnicodeString *ruleSrc; // The rules string. |
171 | | int32_t ruleIndex; // String index in the input rules, the current parse position. |
172 | | UnicodeString token; // Token most recently scanned. |
173 | | tokenType type; |
174 | | tokenType prevType; |
175 | | |
176 | | // The items currently being parsed & built. |
177 | | // Note: currentChain may not be the last RuleChain in the |
178 | | // list because the "other" chain is forced to the end. |
179 | | AndConstraint *curAndConstraint; |
180 | | RuleChain *currentChain; |
181 | | |
182 | | int32_t rangeLowIdx; // Indices in the UVector of ranges of the |
183 | | int32_t rangeHiIdx; // low and hi values currently being parsed. |
184 | | |
185 | | enum EParseState { |
186 | | kKeyword, |
187 | | kExpr, |
188 | | kValue, |
189 | | kRangeList, |
190 | | kSamples |
191 | | }; |
192 | | }; |
193 | | |
194 | | enum PluralOperand { |
195 | | /** |
196 | | * The double value of the entire number. |
197 | | */ |
198 | | PLURAL_OPERAND_N, |
199 | | |
200 | | /** |
201 | | * The integer value, with the fraction digits truncated off. |
202 | | */ |
203 | | PLURAL_OPERAND_I, |
204 | | |
205 | | /** |
206 | | * All visible fraction digits as an integer, including trailing zeros. |
207 | | */ |
208 | | PLURAL_OPERAND_F, |
209 | | |
210 | | /** |
211 | | * Visible fraction digits as an integer, not including trailing zeros. |
212 | | */ |
213 | | PLURAL_OPERAND_T, |
214 | | |
215 | | /** |
216 | | * Number of visible fraction digits. |
217 | | */ |
218 | | PLURAL_OPERAND_V, |
219 | | |
220 | | /** |
221 | | * Number of visible fraction digits, not including trailing zeros. |
222 | | */ |
223 | | PLURAL_OPERAND_W, |
224 | | |
225 | | /** |
226 | | * Suppressed exponent for scientific notation (exponent needed in |
227 | | * scientific notation to approximate i). |
228 | | */ |
229 | | PLURAL_OPERAND_E, |
230 | | |
231 | | /** |
232 | | * This operand is currently treated as an alias for `PLURAL_OPERAND_E`. |
233 | | * In the future, it will represent: |
234 | | * |
235 | | * Suppressed exponent for compact notation (exponent needed in |
236 | | * compact notation to approximate i). |
237 | | */ |
238 | | PLURAL_OPERAND_C, |
239 | | |
240 | | /** |
241 | | * THIS OPERAND IS DEPRECATED AND HAS BEEN REMOVED FROM THE SPEC. |
242 | | * |
243 | | * <p>Returns the integer value, but will fail if the number has fraction digits. |
244 | | * That is, using "j" instead of "i" is like implicitly adding "v is 0". |
245 | | * |
246 | | * <p>For example, "j is 3" is equivalent to "i is 3 and v is 0": it matches |
247 | | * "3" but not "3.1" or "3.0". |
248 | | */ |
249 | | PLURAL_OPERAND_J |
250 | | }; |
251 | | |
252 | | /** |
253 | | * Converts from the tokenType enum to PluralOperand. Asserts that the given |
254 | | * tokenType can be mapped to a PluralOperand. |
255 | | */ |
256 | | PluralOperand tokenTypeToPluralOperand(tokenType tt); |
257 | | |
258 | | /** |
259 | | * An interface to FixedDecimal, allowing for other implementations. |
260 | | * @internal |
261 | | */ |
262 | | class U_I18N_API IFixedDecimal { |
263 | | public: |
264 | | virtual ~IFixedDecimal(); |
265 | | |
266 | | /** |
267 | | * Returns the value corresponding to the specified operand (n, i, f, t, v, or w). |
268 | | * If the operand is 'n', returns a double; otherwise, returns an integer. |
269 | | */ |
270 | | virtual double getPluralOperand(PluralOperand operand) const = 0; |
271 | | |
272 | | virtual bool isNaN() const = 0; |
273 | | |
274 | | virtual bool isInfinite() const = 0; |
275 | | |
276 | | /** Whether the number has no nonzero fraction digits. */ |
277 | | virtual bool hasIntegerValue() const = 0; |
278 | | }; |
279 | | |
280 | | /** |
281 | | * class FixedDecimal serves to communicate the properties |
282 | | * of a formatted number from a decimal formatter to PluralRules::select() |
283 | | * |
284 | | * see DecimalFormat::getFixedDecimal() |
285 | | * @internal |
286 | | */ |
287 | | class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject { |
288 | | public: |
289 | | /** |
290 | | * @param n the number, e.g. 12.345 |
291 | | * @param v The number of visible fraction digits, e.g. 3 |
292 | | * @param f The fraction digits, e.g. 345 |
293 | | * @param e The exponent, e.g. 7 in 1.2e7, for scientific notation |
294 | | * @param c Currently: an alias for param `e`. |
295 | | */ |
296 | | FixedDecimal(double n, int32_t v, int64_t f, int32_t e, int32_t c); |
297 | | FixedDecimal(double n, int32_t v, int64_t f, int32_t e); |
298 | | FixedDecimal(double n, int32_t v, int64_t f); |
299 | | FixedDecimal(double n, int32_t); |
300 | | explicit FixedDecimal(double n); |
301 | | FixedDecimal(); |
302 | | ~FixedDecimal() override; |
303 | | FixedDecimal(const UnicodeString &s, UErrorCode &ec); |
304 | | FixedDecimal(const FixedDecimal &other); |
305 | | |
306 | | static FixedDecimal createWithExponent(double n, int32_t v, int32_t e); |
307 | | |
308 | | double getPluralOperand(PluralOperand operand) const override; |
309 | | bool isNaN() const override; |
310 | | bool isInfinite() const override; |
311 | | bool hasIntegerValue() const override; |
312 | | |
313 | | bool isNanOrInfinity() const; // used in decimfmtimpl.cpp |
314 | | |
315 | | int32_t getVisibleFractionDigitCount() const; |
316 | | |
317 | | void init(double n, int32_t v, int64_t f, int32_t e, int32_t c); |
318 | | void init(double n, int32_t v, int64_t f, int32_t e); |
319 | | void init(double n, int32_t v, int64_t f); |
320 | | void init(double n); |
321 | | UBool quickInit(double n); // Try a fast-path only initialization, |
322 | | // return true if successful. |
323 | | void adjustForMinFractionDigits(int32_t min); |
324 | | static int64_t getFractionalDigits(double n, int32_t v); |
325 | | static int32_t decimals(double n); |
326 | | |
327 | | FixedDecimal& operator=(const FixedDecimal& other) = default; |
328 | | bool operator==(const FixedDecimal &other) const; |
329 | | |
330 | | UnicodeString toString() const; |
331 | | |
332 | | double doubleValue() const; |
333 | | int64_t longValue() const; |
334 | | |
335 | | double source; |
336 | | int32_t visibleDecimalDigitCount; |
337 | | int64_t decimalDigits; |
338 | | int64_t decimalDigitsWithoutTrailingZeros; |
339 | | int64_t intValue; |
340 | | int32_t exponent; |
341 | | UBool _hasIntegerValue; |
342 | | UBool isNegative; |
343 | | UBool _isNaN; |
344 | | UBool _isInfinite; |
345 | | }; |
346 | | |
347 | | class AndConstraint : public UMemory { |
348 | | public: |
349 | | typedef enum RuleOp { |
350 | | NONE, |
351 | | MOD |
352 | | } RuleOp; |
353 | | RuleOp op = AndConstraint::NONE; |
354 | | int32_t opNum = -1; // for mod expressions, the right operand of the mod. |
355 | | int32_t value = -1; // valid for 'is' rules only. |
356 | | UVector32 *rangeList = nullptr; // for 'in', 'within' rules. Null otherwise. |
357 | | UBool negated = false; // true for negated rules. |
358 | | UBool integerOnly = false; // true for 'within' rules. |
359 | | tokenType digitsType = none; // n | i | v | f constraint. |
360 | | AndConstraint *next = nullptr; |
361 | | // Internal error status, used for errors that occur during the copy constructor. |
362 | | UErrorCode fInternalStatus = U_ZERO_ERROR; |
363 | | |
364 | 192k | AndConstraint() = default; |
365 | | AndConstraint(const AndConstraint& other); |
366 | | virtual ~AndConstraint(); |
367 | | AndConstraint* add(UErrorCode& status); |
368 | | // UBool isFulfilled(double number); |
369 | | UBool isFulfilled(const IFixedDecimal &number); |
370 | | }; |
371 | | |
372 | | class OrConstraint : public UMemory { |
373 | | public: |
374 | | AndConstraint *childNode = nullptr; |
375 | | OrConstraint *next = nullptr; |
376 | | // Internal error status, used for errors that occur during the copy constructor. |
377 | | UErrorCode fInternalStatus = U_ZERO_ERROR; |
378 | | |
379 | 124k | OrConstraint() = default; |
380 | | OrConstraint(const OrConstraint& other); |
381 | | virtual ~OrConstraint(); |
382 | | AndConstraint* add(UErrorCode& status); |
383 | | // UBool isFulfilled(double number); |
384 | | UBool isFulfilled(const IFixedDecimal &number); |
385 | | }; |
386 | | |
387 | | class RuleChain : public UMemory { |
388 | | public: |
389 | | UnicodeString fKeyword; |
390 | | RuleChain *fNext = nullptr; |
391 | | OrConstraint *ruleHeader = nullptr; |
392 | | UnicodeString fDecimalSamples; // Samples strings from rule source |
393 | | UnicodeString fIntegerSamples; // without @decimal or @integer, otherwise unprocessed. |
394 | | UBool fDecimalSamplesUnbounded = false; |
395 | | UBool fIntegerSamplesUnbounded = false; |
396 | | // Internal error status, used for errors that occur during the copy constructor. |
397 | | UErrorCode fInternalStatus = U_ZERO_ERROR; |
398 | | |
399 | 118k | RuleChain() = default; |
400 | | RuleChain(const RuleChain& other); |
401 | | virtual ~RuleChain(); |
402 | | |
403 | | UnicodeString select(const IFixedDecimal &number) const; |
404 | | void dumpRules(UnicodeString& result); |
405 | | UErrorCode getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const; |
406 | | UBool isKeyword(const UnicodeString& keyword) const; |
407 | | }; |
408 | | |
409 | | class PluralKeywordEnumeration : public StringEnumeration { |
410 | | public: |
411 | | PluralKeywordEnumeration(RuleChain *header, UErrorCode& status); |
412 | | virtual ~PluralKeywordEnumeration(); |
413 | | static UClassID U_EXPORT2 getStaticClassID(); |
414 | | virtual UClassID getDynamicClassID() const override; |
415 | | virtual const UnicodeString* snext(UErrorCode& status) override; |
416 | | virtual void reset(UErrorCode& status) override; |
417 | | virtual int32_t count(UErrorCode& status) const override; |
418 | | private: |
419 | | int32_t pos; |
420 | | UVector fKeywordNames; |
421 | | }; |
422 | | |
423 | | |
424 | | class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration { |
425 | | public: |
426 | | PluralAvailableLocalesEnumeration(UErrorCode &status); |
427 | | virtual ~PluralAvailableLocalesEnumeration(); |
428 | | virtual const char* next(int32_t *resultLength, UErrorCode& status) override; |
429 | | virtual void reset(UErrorCode& status) override; |
430 | | virtual int32_t count(UErrorCode& status) const override; |
431 | | private: |
432 | | UErrorCode fOpenStatus; |
433 | | UResourceBundle *fLocales = nullptr; |
434 | | UResourceBundle *fRes = nullptr; |
435 | | }; |
436 | | |
437 | | U_NAMESPACE_END |
438 | | |
439 | | #endif /* #if !UCONFIG_NO_FORMATTING */ |
440 | | |
441 | | #endif // _PLURRULE_IMPL |
442 | | //eof |