Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/babel/plural.py: 36%
290 statements
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-26 06:16 +0000
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-26 06:16 +0000
1"""
2 babel.numbers
3 ~~~~~~~~~~~~~
5 CLDR Plural support. See UTS #35.
7 :copyright: (c) 2013-2023 by the Babel Team.
8 :license: BSD, see LICENSE for more details.
9"""
10from __future__ import annotations
12import decimal
13import re
14from collections.abc import Iterable, Mapping
15from typing import TYPE_CHECKING, Any, Callable
17if TYPE_CHECKING:
18 from typing_extensions import Literal
20_plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other')
21_fallback_tag = 'other'
24def extract_operands(source: float | decimal.Decimal) -> tuple[decimal.Decimal | int, int, int, int, int, int, Literal[0], Literal[0]]:
25 """Extract operands from a decimal, a float or an int, according to `CLDR rules`_.
27 The result is a 8-tuple (n, i, v, w, f, t, c, e), where those symbols are as follows:
29 ====== ===============================================================
30 Symbol Value
31 ------ ---------------------------------------------------------------
32 n absolute value of the source number (integer and decimals).
33 i integer digits of n.
34 v number of visible fraction digits in n, with trailing zeros.
35 w number of visible fraction digits in n, without trailing zeros.
36 f visible fractional digits in n, with trailing zeros.
37 t visible fractional digits in n, without trailing zeros.
38 c compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting.
39 e currently, synonym for ‘c’. however, may be redefined in the future.
40 ====== ===============================================================
42 .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-61/tr35-numbers.html#Operands
44 :param source: A real number
45 :type source: int|float|decimal.Decimal
46 :return: A n-i-v-w-f-t-c-e tuple
47 :rtype: tuple[decimal.Decimal, int, int, int, int, int, int, int]
48 """
49 n = abs(source)
50 i = int(n)
51 if isinstance(n, float):
52 if i == n:
53 n = i
54 else:
55 # Cast the `float` to a number via the string representation.
56 # This is required for Python 2.6 anyway (it will straight out fail to
57 # do the conversion otherwise), and it's highly unlikely that the user
58 # actually wants the lossless conversion behavior (quoting the Python
59 # documentation):
60 # > If value is a float, the binary floating point value is losslessly
61 # > converted to its exact decimal equivalent.
62 # > This conversion can often require 53 or more digits of precision.
63 # Should the user want that behavior, they can simply pass in a pre-
64 # converted `Decimal` instance of desired accuracy.
65 n = decimal.Decimal(str(n))
67 if isinstance(n, decimal.Decimal):
68 dec_tuple = n.as_tuple()
69 exp = dec_tuple.exponent
70 fraction_digits = dec_tuple.digits[exp:] if exp < 0 else ()
71 trailing = ''.join(str(d) for d in fraction_digits)
72 no_trailing = trailing.rstrip('0')
73 v = len(trailing)
74 w = len(no_trailing)
75 f = int(trailing or 0)
76 t = int(no_trailing or 0)
77 else:
78 v = w = f = t = 0
79 c = e = 0 # TODO: c and e are not supported
80 return n, i, v, w, f, t, c, e
83class PluralRule:
84 """Represents a set of language pluralization rules. The constructor
85 accepts a list of (tag, expr) tuples or a dict of `CLDR rules`_. The
86 resulting object is callable and accepts one parameter with a positive or
87 negative number (both integer and float) for the number that indicates the
88 plural form for a string and returns the tag for the format:
90 >>> rule = PluralRule({'one': 'n is 1'})
91 >>> rule(1)
92 'one'
93 >>> rule(2)
94 'other'
96 Currently the CLDR defines these tags: zero, one, two, few, many and
97 other where other is an implicit default. Rules should be mutually
98 exclusive; for a given numeric value, only one rule should apply (i.e.
99 the condition should only be true for one of the plural rule elements.
101 .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules
102 """
104 __slots__ = ('abstract', '_func')
106 def __init__(self, rules: Mapping[str, str] | Iterable[tuple[str, str]]) -> None:
107 """Initialize the rule instance.
109 :param rules: a list of ``(tag, expr)``) tuples with the rules
110 conforming to UTS #35 or a dict with the tags as keys
111 and expressions as values.
112 :raise RuleError: if the expression is malformed
113 """
114 if isinstance(rules, Mapping):
115 rules = rules.items()
116 found = set()
117 self.abstract: list[tuple[str, Any]] = []
118 for key, expr in sorted(rules):
119 if key not in _plural_tags:
120 raise ValueError(f"unknown tag {key!r}")
121 elif key in found:
122 raise ValueError(f"tag {key!r} defined twice")
123 found.add(key)
124 ast = _Parser(expr).ast
125 if ast:
126 self.abstract.append((key, ast))
128 def __repr__(self) -> str:
129 rules = self.rules
130 args = ", ".join([f"{tag}: {rules[tag]}" for tag in _plural_tags if tag in rules])
131 return f"<{type(self).__name__} {args!r}>"
133 @classmethod
134 def parse(cls, rules: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> PluralRule:
135 """Create a `PluralRule` instance for the given rules. If the rules
136 are a `PluralRule` object, that object is returned.
138 :param rules: the rules as list or dict, or a `PluralRule` object
139 :raise RuleError: if the expression is malformed
140 """
141 if isinstance(rules, PluralRule):
142 return rules
143 return cls(rules)
145 @property
146 def rules(self) -> Mapping[str, str]:
147 """The `PluralRule` as a dict of unicode plural rules.
149 >>> rule = PluralRule({'one': 'n is 1'})
150 >>> rule.rules
151 {'one': 'n is 1'}
152 """
153 _compile = _UnicodeCompiler().compile
154 return {tag: _compile(ast) for tag, ast in self.abstract}
156 @property
157 def tags(self) -> frozenset[str]:
158 """A set of explicitly defined tags in this rule. The implicit default
159 ``'other'`` rules is not part of this set unless there is an explicit
160 rule for it.
161 """
162 return frozenset(i[0] for i in self.abstract)
164 def __getstate__(self) -> list[tuple[str, Any]]:
165 return self.abstract
167 def __setstate__(self, abstract: list[tuple[str, Any]]) -> None:
168 self.abstract = abstract
170 def __call__(self, n: float | decimal.Decimal) -> str:
171 if not hasattr(self, '_func'):
172 self._func = to_python(self)
173 return self._func(n)
176def to_javascript(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> str:
177 """Convert a list/dict of rules or a `PluralRule` object into a JavaScript
178 function. This function depends on no external library:
180 >>> to_javascript({'one': 'n is 1'})
181 "(function(n) { return (n == 1) ? 'one' : 'other'; })"
183 Implementation detail: The function generated will probably evaluate
184 expressions involved into range operations multiple times. This has the
185 advantage that external helper functions are not required and is not a
186 big performance hit for these simple calculations.
188 :param rule: the rules as list or dict, or a `PluralRule` object
189 :raise RuleError: if the expression is malformed
190 """
191 to_js = _JavaScriptCompiler().compile
192 result = ['(function(n) { return ']
193 for tag, ast in PluralRule.parse(rule).abstract:
194 result.append(f"{to_js(ast)} ? {tag!r} : ")
195 result.append('%r; })' % _fallback_tag)
196 return ''.join(result)
199def to_python(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> Callable[[float | decimal.Decimal], str]:
200 """Convert a list/dict of rules or a `PluralRule` object into a regular
201 Python function. This is useful in situations where you need a real
202 function and don't are about the actual rule object:
204 >>> func = to_python({'one': 'n is 1', 'few': 'n in 2..4'})
205 >>> func(1)
206 'one'
207 >>> func(3)
208 'few'
209 >>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'})
210 >>> func(11)
211 'one'
212 >>> func(15)
213 'few'
215 :param rule: the rules as list or dict, or a `PluralRule` object
216 :raise RuleError: if the expression is malformed
217 """
218 namespace = {
219 'IN': in_range_list,
220 'WITHIN': within_range_list,
221 'MOD': cldr_modulo,
222 'extract_operands': extract_operands,
223 }
224 to_python_func = _PythonCompiler().compile
225 result = [
226 'def evaluate(n):',
227 ' n, i, v, w, f, t, c, e = extract_operands(n)',
228 ]
229 for tag, ast in PluralRule.parse(rule).abstract:
230 # the str() call is to coerce the tag to the native string. It's
231 # a limited ascii restricted set of tags anyways so that is fine.
232 result.append(f" if ({to_python_func(ast)}): return {str(tag)!r}")
233 result.append(f" return {_fallback_tag!r}")
234 code = compile('\n'.join(result), '<rule>', 'exec')
235 eval(code, namespace)
236 return namespace['evaluate']
239def to_gettext(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> str:
240 """The plural rule as gettext expression. The gettext expression is
241 technically limited to integers and returns indices rather than tags.
243 >>> to_gettext({'one': 'n is 1', 'two': 'n is 2'})
244 'nplurals=3; plural=((n == 1) ? 0 : (n == 2) ? 1 : 2);'
246 :param rule: the rules as list or dict, or a `PluralRule` object
247 :raise RuleError: if the expression is malformed
248 """
249 rule = PluralRule.parse(rule)
251 used_tags = rule.tags | {_fallback_tag}
252 _compile = _GettextCompiler().compile
253 _get_index = [tag for tag in _plural_tags if tag in used_tags].index
255 result = [f"nplurals={len(used_tags)}; plural=("]
256 for tag, ast in rule.abstract:
257 result.append(f"{_compile(ast)} ? {_get_index(tag)} : ")
258 result.append(f"{_get_index(_fallback_tag)});")
259 return ''.join(result)
262def in_range_list(num: float | decimal.Decimal, range_list: Iterable[Iterable[float | decimal.Decimal]]) -> bool:
263 """Integer range list test. This is the callback for the "in" operator
264 of the UTS #35 pluralization rule language:
266 >>> in_range_list(1, [(1, 3)])
267 True
268 >>> in_range_list(3, [(1, 3)])
269 True
270 >>> in_range_list(3, [(1, 3), (5, 8)])
271 True
272 >>> in_range_list(1.2, [(1, 4)])
273 False
274 >>> in_range_list(10, [(1, 4)])
275 False
276 >>> in_range_list(10, [(1, 4), (6, 8)])
277 False
278 """
279 return num == int(num) and within_range_list(num, range_list)
282def within_range_list(num: float | decimal.Decimal, range_list: Iterable[Iterable[float | decimal.Decimal]]) -> bool:
283 """Float range test. This is the callback for the "within" operator
284 of the UTS #35 pluralization rule language:
286 >>> within_range_list(1, [(1, 3)])
287 True
288 >>> within_range_list(1.0, [(1, 3)])
289 True
290 >>> within_range_list(1.2, [(1, 4)])
291 True
292 >>> within_range_list(8.8, [(1, 4), (7, 15)])
293 True
294 >>> within_range_list(10, [(1, 4)])
295 False
296 >>> within_range_list(10.5, [(1, 4), (20, 30)])
297 False
298 """
299 return any(num >= min_ and num <= max_ for min_, max_ in range_list)
302def cldr_modulo(a: float, b: float) -> float:
303 """Javaish modulo. This modulo operator returns the value with the sign
304 of the dividend rather than the divisor like Python does:
306 >>> cldr_modulo(-3, 5)
307 -3
308 >>> cldr_modulo(-3, -5)
309 -3
310 >>> cldr_modulo(3, 5)
311 3
312 """
313 reverse = 0
314 if a < 0:
315 a *= -1
316 reverse = 1
317 if b < 0:
318 b *= -1
319 rv = a % b
320 if reverse:
321 rv *= -1
322 return rv
325class RuleError(Exception):
326 """Raised if a rule is malformed."""
329_VARS = {
330 'n', # absolute value of the source number.
331 'i', # integer digits of n.
332 'v', # number of visible fraction digits in n, with trailing zeros.*
333 'w', # number of visible fraction digits in n, without trailing zeros.*
334 'f', # visible fraction digits in n, with trailing zeros.*
335 't', # visible fraction digits in n, without trailing zeros.*
336 'c', # compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting.
337 'e', # currently, synonym for `c`. however, may be redefined in the future.
338}
340_RULES: list[tuple[str | None, re.Pattern[str]]] = [
341 (None, re.compile(r'\s+', re.UNICODE)),
342 ('word', re.compile(fr'\b(and|or|is|(?:with)?in|not|mod|[{"".join(_VARS)}])\b')),
343 ('value', re.compile(r'\d+')),
344 ('symbol', re.compile(r'%|,|!=|=')),
345 ('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE)) # U+2026: ELLIPSIS
346]
349def tokenize_rule(s: str) -> list[tuple[str, str]]:
350 s = s.split('@')[0]
351 result: list[tuple[str, str]] = []
352 pos = 0
353 end = len(s)
354 while pos < end:
355 for tok, rule in _RULES:
356 match = rule.match(s, pos)
357 if match is not None:
358 pos = match.end()
359 if tok:
360 result.append((tok, match.group()))
361 break
362 else:
363 raise RuleError('malformed CLDR pluralization rule. '
364 'Got unexpected %r' % s[pos])
365 return result[::-1]
368def test_next_token(
369 tokens: list[tuple[str, str]],
370 type_: str,
371 value: str | None = None,
372) -> list[tuple[str, str]] | bool:
373 return tokens and tokens[-1][0] == type_ and \
374 (value is None or tokens[-1][1] == value)
377def skip_token(tokens: list[tuple[str, str]], type_: str, value: str | None = None):
378 if test_next_token(tokens, type_, value):
379 return tokens.pop()
382def value_node(value: int) -> tuple[Literal['value'], tuple[int]]:
383 return 'value', (value, )
386def ident_node(name: str) -> tuple[str, tuple[()]]:
387 return name, ()
390def range_list_node(
391 range_list: Iterable[Iterable[float | decimal.Decimal]],
392) -> tuple[Literal['range_list'], Iterable[Iterable[float | decimal.Decimal]]]:
393 return 'range_list', range_list
396def negate(rv: tuple[Any, ...]) -> tuple[Literal['not'], tuple[tuple[Any, ...]]]:
397 return 'not', (rv,)
400class _Parser:
401 """Internal parser. This class can translate a single rule into an abstract
402 tree of tuples. It implements the following grammar::
404 condition = and_condition ('or' and_condition)*
405 ('@integer' samples)?
406 ('@decimal' samples)?
407 and_condition = relation ('and' relation)*
408 relation = is_relation | in_relation | within_relation
409 is_relation = expr 'is' ('not')? value
410 in_relation = expr (('not')? 'in' | '=' | '!=') range_list
411 within_relation = expr ('not')? 'within' range_list
412 expr = operand (('mod' | '%') value)?
413 operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
414 range_list = (range | value) (',' range_list)*
415 value = digit+
416 digit = 0|1|2|3|4|5|6|7|8|9
417 range = value'..'value
418 samples = sampleRange (',' sampleRange)* (',' ('…'|'...'))?
419 sampleRange = decimalValue '~' decimalValue
420 decimalValue = value ('.' value)?
422 - Whitespace can occur between or around any of the above tokens.
423 - Rules should be mutually exclusive; for a given numeric value, only one
424 rule should apply (i.e. the condition should only be true for one of
425 the plural rule elements).
426 - The in and within relations can take comma-separated lists, such as:
427 'n in 3,5,7..15'.
428 - Samples are ignored.
430 The translator parses the expression on instantiation into an attribute
431 called `ast`.
432 """
434 def __init__(self, string):
435 self.tokens = tokenize_rule(string)
436 if not self.tokens:
437 # If the pattern is only samples, it's entirely possible
438 # no stream of tokens whatsoever is generated.
439 self.ast = None
440 return
441 self.ast = self.condition()
442 if self.tokens:
443 raise RuleError(f"Expected end of rule, got {self.tokens[-1][1]!r}")
445 def expect(self, type_, value=None, term=None):
446 token = skip_token(self.tokens, type_, value)
447 if token is not None:
448 return token
449 if term is None:
450 term = repr(value is None and type_ or value)
451 if not self.tokens:
452 raise RuleError(f"expected {term} but end of rule reached")
453 raise RuleError(f"expected {term} but got {self.tokens[-1][1]!r}")
455 def condition(self):
456 op = self.and_condition()
457 while skip_token(self.tokens, 'word', 'or'):
458 op = 'or', (op, self.and_condition())
459 return op
461 def and_condition(self):
462 op = self.relation()
463 while skip_token(self.tokens, 'word', 'and'):
464 op = 'and', (op, self.relation())
465 return op
467 def relation(self):
468 left = self.expr()
469 if skip_token(self.tokens, 'word', 'is'):
470 return skip_token(self.tokens, 'word', 'not') and 'isnot' or 'is', \
471 (left, self.value())
472 negated = skip_token(self.tokens, 'word', 'not')
473 method = 'in'
474 if skip_token(self.tokens, 'word', 'within'):
475 method = 'within'
476 else:
477 if not skip_token(self.tokens, 'word', 'in'):
478 if negated:
479 raise RuleError('Cannot negate operator based rules.')
480 return self.newfangled_relation(left)
481 rv = 'relation', (method, left, self.range_list())
482 return negate(rv) if negated else rv
484 def newfangled_relation(self, left):
485 if skip_token(self.tokens, 'symbol', '='):
486 negated = False
487 elif skip_token(self.tokens, 'symbol', '!='):
488 negated = True
489 else:
490 raise RuleError('Expected "=" or "!=" or legacy relation')
491 rv = 'relation', ('in', left, self.range_list())
492 return negate(rv) if negated else rv
494 def range_or_value(self):
495 left = self.value()
496 if skip_token(self.tokens, 'ellipsis'):
497 return left, self.value()
498 else:
499 return left, left
501 def range_list(self):
502 range_list = [self.range_or_value()]
503 while skip_token(self.tokens, 'symbol', ','):
504 range_list.append(self.range_or_value())
505 return range_list_node(range_list)
507 def expr(self):
508 word = skip_token(self.tokens, 'word')
509 if word is None or word[1] not in _VARS:
510 raise RuleError('Expected identifier variable')
511 name = word[1]
512 if skip_token(self.tokens, 'word', 'mod'):
513 return 'mod', ((name, ()), self.value())
514 elif skip_token(self.tokens, 'symbol', '%'):
515 return 'mod', ((name, ()), self.value())
516 return ident_node(name)
518 def value(self):
519 return value_node(int(self.expect('value')[1]))
522def _binary_compiler(tmpl):
523 """Compiler factory for the `_Compiler`."""
524 return lambda self, left, right: tmpl % (self.compile(left), self.compile(right))
527def _unary_compiler(tmpl):
528 """Compiler factory for the `_Compiler`."""
529 return lambda self, x: tmpl % self.compile(x)
532compile_zero = lambda x: '0'
535class _Compiler:
536 """The compilers are able to transform the expressions into multiple
537 output formats.
538 """
540 def compile(self, arg):
541 op, args = arg
542 return getattr(self, f"compile_{op}")(*args)
544 compile_n = lambda x: 'n'
545 compile_i = lambda x: 'i'
546 compile_v = lambda x: 'v'
547 compile_w = lambda x: 'w'
548 compile_f = lambda x: 'f'
549 compile_t = lambda x: 't'
550 compile_c = lambda x: 'c'
551 compile_e = lambda x: 'e'
552 compile_value = lambda x, v: str(v)
553 compile_and = _binary_compiler('(%s && %s)')
554 compile_or = _binary_compiler('(%s || %s)')
555 compile_not = _unary_compiler('(!%s)')
556 compile_mod = _binary_compiler('(%s %% %s)')
557 compile_is = _binary_compiler('(%s == %s)')
558 compile_isnot = _binary_compiler('(%s != %s)')
560 def compile_relation(self, method, expr, range_list):
561 raise NotImplementedError()
564class _PythonCompiler(_Compiler):
565 """Compiles an expression to Python."""
567 compile_and = _binary_compiler('(%s and %s)')
568 compile_or = _binary_compiler('(%s or %s)')
569 compile_not = _unary_compiler('(not %s)')
570 compile_mod = _binary_compiler('MOD(%s, %s)')
572 def compile_relation(self, method, expr, range_list):
573 ranges = ",".join([f"({self.compile(a)}, {self.compile(b)})" for (a, b) in range_list[1]])
574 return f"{method.upper()}({self.compile(expr)}, [{ranges}])"
577class _GettextCompiler(_Compiler):
578 """Compile into a gettext plural expression."""
580 compile_i = _Compiler.compile_n
581 compile_v = compile_zero
582 compile_w = compile_zero
583 compile_f = compile_zero
584 compile_t = compile_zero
586 def compile_relation(self, method, expr, range_list):
587 rv = []
588 expr = self.compile(expr)
589 for item in range_list[1]:
590 if item[0] == item[1]:
591 rv.append(f"({expr} == {self.compile(item[0])})")
592 else:
593 min, max = map(self.compile, item)
594 rv.append(f"({expr} >= {min} && {expr} <= {max})")
595 return f"({' || '.join(rv)})"
598class _JavaScriptCompiler(_GettextCompiler):
599 """Compiles the expression to plain of JavaScript."""
601 # XXX: presently javascript does not support any of the
602 # fraction support and basically only deals with integers.
603 compile_i = lambda x: 'parseInt(n, 10)'
604 compile_v = compile_zero
605 compile_w = compile_zero
606 compile_f = compile_zero
607 compile_t = compile_zero
609 def compile_relation(self, method, expr, range_list):
610 code = _GettextCompiler.compile_relation(
611 self, method, expr, range_list)
612 if method == 'in':
613 expr = self.compile(expr)
614 code = f"(parseInt({expr}, 10) == {expr} && {code})"
615 return code
618class _UnicodeCompiler(_Compiler):
619 """Returns a unicode pluralization rule again."""
621 # XXX: this currently spits out the old syntax instead of the new
622 # one. We can change that, but it will break a whole bunch of stuff
623 # for users I suppose.
625 compile_is = _binary_compiler('%s is %s')
626 compile_isnot = _binary_compiler('%s is not %s')
627 compile_and = _binary_compiler('%s and %s')
628 compile_or = _binary_compiler('%s or %s')
629 compile_mod = _binary_compiler('%s mod %s')
631 def compile_not(self, relation):
632 return self.compile_relation(*relation[1], negated=True)
634 def compile_relation(self, method, expr, range_list, negated=False):
635 ranges = []
636 for item in range_list[1]:
637 if item[0] == item[1]:
638 ranges.append(self.compile(item[0]))
639 else:
640 ranges.append(f"{self.compile(item[0])}..{self.compile(item[1])}")
641 return f"{self.compile(expr)}{' not' if negated else ''} {method} {','.join(ranges)}"