Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/babel/plural.py: 36%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2 babel.numbers
3 ~~~~~~~~~~~~~
5 CLDR Plural support. See UTS #35.
7 :copyright: (c) 2013-2025 by the Babel Team.
8 :license: BSD, see LICENSE for more details.
9"""
10from __future__ import annotations
12import decimal
13import re
14from collections.abc import Iterable, Mapping
15from typing import Any, Callable, Literal
17_plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other')
18_fallback_tag = 'other'
21def extract_operands(source: float | decimal.Decimal) -> tuple[decimal.Decimal | int, int, int, int, int, int, Literal[0], Literal[0]]:
22 """Extract operands from a decimal, a float or an int, according to `CLDR rules`_.
24 The result is an 8-tuple (n, i, v, w, f, t, c, e), where those symbols are as follows:
26 ====== ===============================================================
27 Symbol Value
28 ------ ---------------------------------------------------------------
29 n absolute value of the source number (integer and decimals).
30 i integer digits of n.
31 v number of visible fraction digits in n, with trailing zeros.
32 w number of visible fraction digits in n, without trailing zeros.
33 f visible fractional digits in n, with trailing zeros.
34 t visible fractional digits in n, without trailing zeros.
35 c compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting.
36 e currently, synonym for ‘c’. however, may be redefined in the future.
37 ====== ===============================================================
39 .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-61/tr35-numbers.html#Operands
41 :param source: A real number
42 :type source: int|float|decimal.Decimal
43 :return: A n-i-v-w-f-t-c-e tuple
44 :rtype: tuple[decimal.Decimal, int, int, int, int, int, int, int]
45 """
46 n = abs(source)
47 i = int(n)
48 if isinstance(n, float):
49 if i == n:
50 n = i
51 else:
52 # Cast the `float` to a number via the string representation.
53 # This is required for Python 2.6 anyway (it will straight out fail to
54 # do the conversion otherwise), and it's highly unlikely that the user
55 # actually wants the lossless conversion behavior (quoting the Python
56 # documentation):
57 # > If value is a float, the binary floating point value is losslessly
58 # > converted to its exact decimal equivalent.
59 # > This conversion can often require 53 or more digits of precision.
60 # Should the user want that behavior, they can simply pass in a pre-
61 # converted `Decimal` instance of desired accuracy.
62 n = decimal.Decimal(str(n))
64 if isinstance(n, decimal.Decimal):
65 dec_tuple = n.as_tuple()
66 exp = dec_tuple.exponent
67 fraction_digits = dec_tuple.digits[exp:] if exp < 0 else ()
68 trailing = ''.join(str(d) for d in fraction_digits)
69 no_trailing = trailing.rstrip('0')
70 v = len(trailing)
71 w = len(no_trailing)
72 f = int(trailing or 0)
73 t = int(no_trailing or 0)
74 else:
75 v = w = f = t = 0
76 c = e = 0 # TODO: c and e are not supported
77 return n, i, v, w, f, t, c, e
80class PluralRule:
81 """Represents a set of language pluralization rules. The constructor
82 accepts a list of (tag, expr) tuples or a dict of `CLDR rules`_. The
83 resulting object is callable and accepts one parameter with a positive or
84 negative number (both integer and float) for the number that indicates the
85 plural form for a string and returns the tag for the format:
87 >>> rule = PluralRule({'one': 'n is 1'})
88 >>> rule(1)
89 'one'
90 >>> rule(2)
91 'other'
93 Currently the CLDR defines these tags: zero, one, two, few, many and
94 other where other is an implicit default. Rules should be mutually
95 exclusive; for a given numeric value, only one rule should apply (i.e.
96 the condition should only be true for one of the plural rule elements.
98 .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules
99 """
101 __slots__ = ('abstract', '_func')
103 def __init__(self, rules: Mapping[str, str] | Iterable[tuple[str, str]]) -> None:
104 """Initialize the rule instance.
106 :param rules: a list of ``(tag, expr)``) tuples with the rules
107 conforming to UTS #35 or a dict with the tags as keys
108 and expressions as values.
109 :raise RuleError: if the expression is malformed
110 """
111 if isinstance(rules, Mapping):
112 rules = rules.items()
113 found = set()
114 self.abstract: list[tuple[str, Any]] = []
115 for key, expr in sorted(rules):
116 if key not in _plural_tags:
117 raise ValueError(f"unknown tag {key!r}")
118 elif key in found:
119 raise ValueError(f"tag {key!r} defined twice")
120 found.add(key)
121 ast = _Parser(expr).ast
122 if ast:
123 self.abstract.append((key, ast))
125 def __repr__(self) -> str:
126 rules = self.rules
127 args = ", ".join([f"{tag}: {rules[tag]}" for tag in _plural_tags if tag in rules])
128 return f"<{type(self).__name__} {args!r}>"
130 @classmethod
131 def parse(cls, rules: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> PluralRule:
132 """Create a `PluralRule` instance for the given rules. If the rules
133 are a `PluralRule` object, that object is returned.
135 :param rules: the rules as list or dict, or a `PluralRule` object
136 :raise RuleError: if the expression is malformed
137 """
138 if isinstance(rules, PluralRule):
139 return rules
140 return cls(rules)
142 @property
143 def rules(self) -> Mapping[str, str]:
144 """The `PluralRule` as a dict of unicode plural rules.
146 >>> rule = PluralRule({'one': 'n is 1'})
147 >>> rule.rules
148 {'one': 'n is 1'}
149 """
150 _compile = _UnicodeCompiler().compile
151 return {tag: _compile(ast) for tag, ast in self.abstract}
153 @property
154 def tags(self) -> frozenset[str]:
155 """A set of explicitly defined tags in this rule. The implicit default
156 ``'other'`` rules is not part of this set unless there is an explicit
157 rule for it.
158 """
159 return frozenset(i[0] for i in self.abstract)
161 def __getstate__(self) -> list[tuple[str, Any]]:
162 return self.abstract
164 def __setstate__(self, abstract: list[tuple[str, Any]]) -> None:
165 self.abstract = abstract
167 def __call__(self, n: float | decimal.Decimal) -> str:
168 if not hasattr(self, '_func'):
169 self._func = to_python(self)
170 return self._func(n)
173def to_javascript(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> str:
174 """Convert a list/dict of rules or a `PluralRule` object into a JavaScript
175 function. This function depends on no external library:
177 >>> to_javascript({'one': 'n is 1'})
178 "(function(n) { return (n == 1) ? 'one' : 'other'; })"
180 Implementation detail: The function generated will probably evaluate
181 expressions involved into range operations multiple times. This has the
182 advantage that external helper functions are not required and is not a
183 big performance hit for these simple calculations.
185 :param rule: the rules as list or dict, or a `PluralRule` object
186 :raise RuleError: if the expression is malformed
187 """
188 to_js = _JavaScriptCompiler().compile
189 result = ['(function(n) { return ']
190 for tag, ast in PluralRule.parse(rule).abstract:
191 result.append(f"{to_js(ast)} ? {tag!r} : ")
192 result.append('%r; })' % _fallback_tag)
193 return ''.join(result)
196def to_python(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> Callable[[float | decimal.Decimal], str]:
197 """Convert a list/dict of rules or a `PluralRule` object into a regular
198 Python function. This is useful in situations where you need a real
199 function and don't are about the actual rule object:
201 >>> func = to_python({'one': 'n is 1', 'few': 'n in 2..4'})
202 >>> func(1)
203 'one'
204 >>> func(3)
205 'few'
206 >>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'})
207 >>> func(11)
208 'one'
209 >>> func(15)
210 'few'
212 :param rule: the rules as list or dict, or a `PluralRule` object
213 :raise RuleError: if the expression is malformed
214 """
215 namespace = {
216 'IN': in_range_list,
217 'WITHIN': within_range_list,
218 'MOD': cldr_modulo,
219 'extract_operands': extract_operands,
220 }
221 to_python_func = _PythonCompiler().compile
222 result = [
223 'def evaluate(n):',
224 ' n, i, v, w, f, t, c, e = extract_operands(n)',
225 ]
226 for tag, ast in PluralRule.parse(rule).abstract:
227 # the str() call is to coerce the tag to the native string. It's
228 # a limited ascii restricted set of tags anyways so that is fine.
229 result.append(f" if ({to_python_func(ast)}): return {str(tag)!r}")
230 result.append(f" return {_fallback_tag!r}")
231 code = compile('\n'.join(result), '<rule>', 'exec')
232 eval(code, namespace)
233 return namespace['evaluate']
236def to_gettext(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> str:
237 """The plural rule as gettext expression. The gettext expression is
238 technically limited to integers and returns indices rather than tags.
240 >>> to_gettext({'one': 'n is 1', 'two': 'n is 2'})
241 'nplurals=3; plural=((n == 1) ? 0 : (n == 2) ? 1 : 2);'
243 :param rule: the rules as list or dict, or a `PluralRule` object
244 :raise RuleError: if the expression is malformed
245 """
246 rule = PluralRule.parse(rule)
248 used_tags = rule.tags | {_fallback_tag}
249 _compile = _GettextCompiler().compile
250 _get_index = [tag for tag in _plural_tags if tag in used_tags].index
252 result = [f"nplurals={len(used_tags)}; plural=("]
253 for tag, ast in rule.abstract:
254 result.append(f"{_compile(ast)} ? {_get_index(tag)} : ")
255 result.append(f"{_get_index(_fallback_tag)});")
256 return ''.join(result)
259def in_range_list(num: float | decimal.Decimal, range_list: Iterable[Iterable[float | decimal.Decimal]]) -> bool:
260 """Integer range list test. This is the callback for the "in" operator
261 of the UTS #35 pluralization rule language:
263 >>> in_range_list(1, [(1, 3)])
264 True
265 >>> in_range_list(3, [(1, 3)])
266 True
267 >>> in_range_list(3, [(1, 3), (5, 8)])
268 True
269 >>> in_range_list(1.2, [(1, 4)])
270 False
271 >>> in_range_list(10, [(1, 4)])
272 False
273 >>> in_range_list(10, [(1, 4), (6, 8)])
274 False
275 """
276 return num == int(num) and within_range_list(num, range_list)
279def within_range_list(num: float | decimal.Decimal, range_list: Iterable[Iterable[float | decimal.Decimal]]) -> bool:
280 """Float range test. This is the callback for the "within" operator
281 of the UTS #35 pluralization rule language:
283 >>> within_range_list(1, [(1, 3)])
284 True
285 >>> within_range_list(1.0, [(1, 3)])
286 True
287 >>> within_range_list(1.2, [(1, 4)])
288 True
289 >>> within_range_list(8.8, [(1, 4), (7, 15)])
290 True
291 >>> within_range_list(10, [(1, 4)])
292 False
293 >>> within_range_list(10.5, [(1, 4), (20, 30)])
294 False
295 """
296 return any(min_ <= num <= max_ for min_, max_ in range_list)
299def cldr_modulo(a: float, b: float) -> float:
300 """Javaish modulo. This modulo operator returns the value with the sign
301 of the dividend rather than the divisor like Python does:
303 >>> cldr_modulo(-3, 5)
304 -3
305 >>> cldr_modulo(-3, -5)
306 -3
307 >>> cldr_modulo(3, 5)
308 3
309 """
310 reverse = 0
311 if a < 0:
312 a *= -1
313 reverse = 1
314 if b < 0:
315 b *= -1
316 rv = a % b
317 if reverse:
318 rv *= -1
319 return rv
322class RuleError(Exception):
323 """Raised if a rule is malformed."""
326_VARS = {
327 'n', # absolute value of the source number.
328 'i', # integer digits of n.
329 'v', # number of visible fraction digits in n, with trailing zeros.*
330 'w', # number of visible fraction digits in n, without trailing zeros.*
331 'f', # visible fraction digits in n, with trailing zeros.*
332 't', # visible fraction digits in n, without trailing zeros.*
333 'c', # compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting.
334 'e', # currently, synonym for `c`. however, may be redefined in the future.
335}
337_RULES: list[tuple[str | None, re.Pattern[str]]] = [
338 (None, re.compile(r'\s+', re.UNICODE)),
339 ('word', re.compile(fr'\b(and|or|is|(?:with)?in|not|mod|[{"".join(_VARS)}])\b')),
340 ('value', re.compile(r'\d+')),
341 ('symbol', re.compile(r'%|,|!=|=')),
342 ('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE)), # U+2026: ELLIPSIS
343]
346def tokenize_rule(s: str) -> list[tuple[str, str]]:
347 s = s.split('@')[0]
348 result: list[tuple[str, str]] = []
349 pos = 0
350 end = len(s)
351 while pos < end:
352 for tok, rule in _RULES:
353 match = rule.match(s, pos)
354 if match is not None:
355 pos = match.end()
356 if tok:
357 result.append((tok, match.group()))
358 break
359 else:
360 raise RuleError(f"malformed CLDR pluralization rule. Got unexpected {s[pos]!r}")
361 return result[::-1]
364def test_next_token(
365 tokens: list[tuple[str, str]],
366 type_: str,
367 value: str | None = None,
368) -> list[tuple[str, str]] | bool:
369 return tokens and tokens[-1][0] == type_ and \
370 (value is None or tokens[-1][1] == value)
373def skip_token(tokens: list[tuple[str, str]], type_: str, value: str | None = None):
374 if test_next_token(tokens, type_, value):
375 return tokens.pop()
378def value_node(value: int) -> tuple[Literal['value'], tuple[int]]:
379 return 'value', (value, )
382def ident_node(name: str) -> tuple[str, tuple[()]]:
383 return name, ()
386def range_list_node(
387 range_list: Iterable[Iterable[float | decimal.Decimal]],
388) -> tuple[Literal['range_list'], Iterable[Iterable[float | decimal.Decimal]]]:
389 return 'range_list', range_list
392def negate(rv: tuple[Any, ...]) -> tuple[Literal['not'], tuple[tuple[Any, ...]]]:
393 return 'not', (rv,)
396class _Parser:
397 """Internal parser. This class can translate a single rule into an abstract
398 tree of tuples. It implements the following grammar::
400 condition = and_condition ('or' and_condition)*
401 ('@integer' samples)?
402 ('@decimal' samples)?
403 and_condition = relation ('and' relation)*
404 relation = is_relation | in_relation | within_relation
405 is_relation = expr 'is' ('not')? value
406 in_relation = expr (('not')? 'in' | '=' | '!=') range_list
407 within_relation = expr ('not')? 'within' range_list
408 expr = operand (('mod' | '%') value)?
409 operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
410 range_list = (range | value) (',' range_list)*
411 value = digit+
412 digit = 0|1|2|3|4|5|6|7|8|9
413 range = value'..'value
414 samples = sampleRange (',' sampleRange)* (',' ('…'|'...'))?
415 sampleRange = decimalValue '~' decimalValue
416 decimalValue = value ('.' value)?
418 - Whitespace can occur between or around any of the above tokens.
419 - Rules should be mutually exclusive; for a given numeric value, only one
420 rule should apply (i.e. the condition should only be true for one of
421 the plural rule elements).
422 - The in and within relations can take comma-separated lists, such as:
423 'n in 3,5,7..15'.
424 - Samples are ignored.
426 The translator parses the expression on instantiation into an attribute
427 called `ast`.
428 """
430 def __init__(self, string):
431 self.tokens = tokenize_rule(string)
432 if not self.tokens:
433 # If the pattern is only samples, it's entirely possible
434 # no stream of tokens whatsoever is generated.
435 self.ast = None
436 return
437 self.ast = self.condition()
438 if self.tokens:
439 raise RuleError(f"Expected end of rule, got {self.tokens[-1][1]!r}")
441 def expect(self, type_, value=None, term=None):
442 token = skip_token(self.tokens, type_, value)
443 if token is not None:
444 return token
445 if term is None:
446 term = repr(value is None and type_ or value)
447 if not self.tokens:
448 raise RuleError(f"expected {term} but end of rule reached")
449 raise RuleError(f"expected {term} but got {self.tokens[-1][1]!r}")
451 def condition(self):
452 op = self.and_condition()
453 while skip_token(self.tokens, 'word', 'or'):
454 op = 'or', (op, self.and_condition())
455 return op
457 def and_condition(self):
458 op = self.relation()
459 while skip_token(self.tokens, 'word', 'and'):
460 op = 'and', (op, self.relation())
461 return op
463 def relation(self):
464 left = self.expr()
465 if skip_token(self.tokens, 'word', 'is'):
466 return skip_token(self.tokens, 'word', 'not') and 'isnot' or 'is', \
467 (left, self.value())
468 negated = skip_token(self.tokens, 'word', 'not')
469 method = 'in'
470 if skip_token(self.tokens, 'word', 'within'):
471 method = 'within'
472 else:
473 if not skip_token(self.tokens, 'word', 'in'):
474 if negated:
475 raise RuleError('Cannot negate operator based rules.')
476 return self.newfangled_relation(left)
477 rv = 'relation', (method, left, self.range_list())
478 return negate(rv) if negated else rv
480 def newfangled_relation(self, left):
481 if skip_token(self.tokens, 'symbol', '='):
482 negated = False
483 elif skip_token(self.tokens, 'symbol', '!='):
484 negated = True
485 else:
486 raise RuleError('Expected "=" or "!=" or legacy relation')
487 rv = 'relation', ('in', left, self.range_list())
488 return negate(rv) if negated else rv
490 def range_or_value(self):
491 left = self.value()
492 if skip_token(self.tokens, 'ellipsis'):
493 return left, self.value()
494 else:
495 return left, left
497 def range_list(self):
498 range_list = [self.range_or_value()]
499 while skip_token(self.tokens, 'symbol', ','):
500 range_list.append(self.range_or_value())
501 return range_list_node(range_list)
503 def expr(self):
504 word = skip_token(self.tokens, 'word')
505 if word is None or word[1] not in _VARS:
506 raise RuleError('Expected identifier variable')
507 name = word[1]
508 if skip_token(self.tokens, 'word', 'mod'):
509 return 'mod', ((name, ()), self.value())
510 elif skip_token(self.tokens, 'symbol', '%'):
511 return 'mod', ((name, ()), self.value())
512 return ident_node(name)
514 def value(self):
515 return value_node(int(self.expect('value')[1]))
518def _binary_compiler(tmpl):
519 """Compiler factory for the `_Compiler`."""
520 return lambda self, left, right: tmpl % (self.compile(left), self.compile(right))
523def _unary_compiler(tmpl):
524 """Compiler factory for the `_Compiler`."""
525 return lambda self, x: tmpl % self.compile(x)
528compile_zero = lambda x: '0'
531class _Compiler:
532 """The compilers are able to transform the expressions into multiple
533 output formats.
534 """
536 def compile(self, arg):
537 op, args = arg
538 return getattr(self, f"compile_{op}")(*args)
540 compile_n = lambda x: 'n'
541 compile_i = lambda x: 'i'
542 compile_v = lambda x: 'v'
543 compile_w = lambda x: 'w'
544 compile_f = lambda x: 'f'
545 compile_t = lambda x: 't'
546 compile_c = lambda x: 'c'
547 compile_e = lambda x: 'e'
548 compile_value = lambda x, v: str(v)
549 compile_and = _binary_compiler('(%s && %s)')
550 compile_or = _binary_compiler('(%s || %s)')
551 compile_not = _unary_compiler('(!%s)')
552 compile_mod = _binary_compiler('(%s %% %s)')
553 compile_is = _binary_compiler('(%s == %s)')
554 compile_isnot = _binary_compiler('(%s != %s)')
556 def compile_relation(self, method, expr, range_list):
557 raise NotImplementedError()
560class _PythonCompiler(_Compiler):
561 """Compiles an expression to Python."""
563 compile_and = _binary_compiler('(%s and %s)')
564 compile_or = _binary_compiler('(%s or %s)')
565 compile_not = _unary_compiler('(not %s)')
566 compile_mod = _binary_compiler('MOD(%s, %s)')
568 def compile_relation(self, method, expr, range_list):
569 ranges = ",".join([f"({self.compile(a)}, {self.compile(b)})" for (a, b) in range_list[1]])
570 return f"{method.upper()}({self.compile(expr)}, [{ranges}])"
573class _GettextCompiler(_Compiler):
574 """Compile into a gettext plural expression."""
576 compile_i = _Compiler.compile_n
577 compile_v = compile_zero
578 compile_w = compile_zero
579 compile_f = compile_zero
580 compile_t = compile_zero
582 def compile_relation(self, method, expr, range_list):
583 rv = []
584 expr = self.compile(expr)
585 for item in range_list[1]:
586 if item[0] == item[1]:
587 rv.append(f"({expr} == {self.compile(item[0])})")
588 else:
589 min, max = map(self.compile, item)
590 rv.append(f"({expr} >= {min} && {expr} <= {max})")
591 return f"({' || '.join(rv)})"
594class _JavaScriptCompiler(_GettextCompiler):
595 """Compiles the expression to plain of JavaScript."""
597 # XXX: presently javascript does not support any of the
598 # fraction support and basically only deals with integers.
599 compile_i = lambda x: 'parseInt(n, 10)'
600 compile_v = compile_zero
601 compile_w = compile_zero
602 compile_f = compile_zero
603 compile_t = compile_zero
605 def compile_relation(self, method, expr, range_list):
606 code = _GettextCompiler.compile_relation(
607 self, method, expr, range_list)
608 if method == 'in':
609 expr = self.compile(expr)
610 code = f"(parseInt({expr}, 10) == {expr} && {code})"
611 return code
614class _UnicodeCompiler(_Compiler):
615 """Returns a unicode pluralization rule again."""
617 # XXX: this currently spits out the old syntax instead of the new
618 # one. We can change that, but it will break a whole bunch of stuff
619 # for users I suppose.
621 compile_is = _binary_compiler('%s is %s')
622 compile_isnot = _binary_compiler('%s is not %s')
623 compile_and = _binary_compiler('%s and %s')
624 compile_or = _binary_compiler('%s or %s')
625 compile_mod = _binary_compiler('%s mod %s')
627 def compile_not(self, relation):
628 return self.compile_relation(*relation[1], negated=True)
630 def compile_relation(self, method, expr, range_list, negated=False):
631 ranges = []
632 for item in range_list[1]:
633 if item[0] == item[1]:
634 ranges.append(self.compile(item[0]))
635 else:
636 ranges.append(f"{self.compile(item[0])}..{self.compile(item[1])}")
637 return f"{self.compile(expr)}{' not' if negated else ''} {method} {','.join(ranges)}"