Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/babel/plural.py: 36%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2babel.numbers
3~~~~~~~~~~~~~
5CLDR Plural support. See UTS #35.
7:copyright: (c) 2013-2025 by the Babel Team.
8:license: BSD, see LICENSE for more details.
9"""
11from __future__ import annotations
13import decimal
14import re
15from collections.abc import Iterable, Mapping
16from typing import Any, Callable, Literal
18_plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other')
19_fallback_tag = 'other'
22def extract_operands(
23 source: float | decimal.Decimal,
24) -> tuple[decimal.Decimal | int, int, int, int, int, int, Literal[0], Literal[0]]:
25 """Extract operands from a decimal, a float or an int, according to `CLDR rules`_.
27 The result is an 8-tuple (n, i, v, w, f, t, c, e), where those symbols are as follows:
29 ====== ===============================================================
30 Symbol Value
31 ------ ---------------------------------------------------------------
32 n absolute value of the source number (integer and decimals).
33 i integer digits of n.
34 v number of visible fraction digits in n, with trailing zeros.
35 w number of visible fraction digits in n, without trailing zeros.
36 f visible fractional digits in n, with trailing zeros.
37 t visible fractional digits in n, without trailing zeros.
38 c compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting.
39 e currently, synonym for ‘c’. however, may be redefined in the future.
40 ====== ===============================================================
42 .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-61/tr35-numbers.html#Operands
44 :param source: A real number
45 :type source: int|float|decimal.Decimal
46 :return: A n-i-v-w-f-t-c-e tuple
47 :rtype: tuple[decimal.Decimal, int, int, int, int, int, int, int]
48 """
49 n = abs(source)
50 i = int(n)
51 if isinstance(n, float):
52 if i == n:
53 n = i
54 else:
55 # Cast the `float` to a number via the string representation.
56 # This is required for Python 2.6 anyway (it will straight out fail to
57 # do the conversion otherwise), and it's highly unlikely that the user
58 # actually wants the lossless conversion behavior (quoting the Python
59 # documentation):
60 # > If value is a float, the binary floating point value is losslessly
61 # > converted to its exact decimal equivalent.
62 # > This conversion can often require 53 or more digits of precision.
63 # Should the user want that behavior, they can simply pass in a pre-
64 # converted `Decimal` instance of desired accuracy.
65 n = decimal.Decimal(str(n))
67 if isinstance(n, decimal.Decimal):
68 dec_tuple = n.as_tuple()
69 exp = dec_tuple.exponent
70 fraction_digits = dec_tuple.digits[exp:] if exp < 0 else ()
71 trailing = ''.join(str(d) for d in fraction_digits)
72 no_trailing = trailing.rstrip('0')
73 v = len(trailing)
74 w = len(no_trailing)
75 f = int(trailing or 0)
76 t = int(no_trailing or 0)
77 else:
78 v = w = f = t = 0
79 c = e = 0 # TODO: c and e are not supported
80 return n, i, v, w, f, t, c, e
83class PluralRule:
84 """Represents a set of language pluralization rules. The constructor
85 accepts a list of (tag, expr) tuples or a dict of `CLDR rules`_. The
86 resulting object is callable and accepts one parameter with a positive or
87 negative number (both integer and float) for the number that indicates the
88 plural form for a string and returns the tag for the format:
90 >>> rule = PluralRule({'one': 'n is 1'})
91 >>> rule(1)
92 'one'
93 >>> rule(2)
94 'other'
96 Currently the CLDR defines these tags: zero, one, two, few, many and
97 other where other is an implicit default. Rules should be mutually
98 exclusive; for a given numeric value, only one rule should apply (i.e.
99 the condition should only be true for one of the plural rule elements.
101 .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules
102 """
104 __slots__ = ('abstract', '_func')
106 def __init__(self, rules: Mapping[str, str] | Iterable[tuple[str, str]]) -> None:
107 """Initialize the rule instance.
109 :param rules: a list of ``(tag, expr)``) tuples with the rules
110 conforming to UTS #35 or a dict with the tags as keys
111 and expressions as values.
112 :raise RuleError: if the expression is malformed
113 """
114 if isinstance(rules, Mapping):
115 rules = rules.items()
116 found = set()
117 self.abstract: list[tuple[str, Any]] = []
118 for key, expr in sorted(rules):
119 if key not in _plural_tags:
120 raise ValueError(f"unknown tag {key!r}")
121 elif key in found:
122 raise ValueError(f"tag {key!r} defined twice")
123 found.add(key)
124 ast = _Parser(expr).ast
125 if ast:
126 self.abstract.append((key, ast))
128 def __repr__(self) -> str:
129 rules = self.rules
130 args = ", ".join(f"{tag}: {rules[tag]}" for tag in _plural_tags if tag in rules)
131 return f"<{type(self).__name__} {args!r}>"
133 @classmethod
134 def parse(
135 cls,
136 rules: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule,
137 ) -> PluralRule:
138 """Create a `PluralRule` instance for the given rules. If the rules
139 are a `PluralRule` object, that object is returned.
141 :param rules: the rules as list or dict, or a `PluralRule` object
142 :raise RuleError: if the expression is malformed
143 """
144 if isinstance(rules, PluralRule):
145 return rules
146 return cls(rules)
148 @property
149 def rules(self) -> Mapping[str, str]:
150 """The `PluralRule` as a dict of unicode plural rules.
152 >>> rule = PluralRule({'one': 'n is 1'})
153 >>> rule.rules
154 {'one': 'n is 1'}
155 """
156 _compile = _UnicodeCompiler().compile
157 return {tag: _compile(ast) for tag, ast in self.abstract}
159 @property
160 def tags(self) -> frozenset[str]:
161 """A set of explicitly defined tags in this rule. The implicit default
162 ``'other'`` rules is not part of this set unless there is an explicit
163 rule for it.
164 """
165 return frozenset(i[0] for i in self.abstract)
167 def __getstate__(self) -> list[tuple[str, Any]]:
168 return self.abstract
170 def __setstate__(self, abstract: list[tuple[str, Any]]) -> None:
171 self.abstract = abstract
173 def __call__(self, n: float | decimal.Decimal) -> str:
174 if not hasattr(self, '_func'):
175 self._func = to_python(self)
176 return self._func(n)
179def to_javascript(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> str:
180 """Convert a list/dict of rules or a `PluralRule` object into a JavaScript
181 function. This function depends on no external library:
183 >>> to_javascript({'one': 'n is 1'})
184 "(function(n) { return (n == 1) ? 'one' : 'other'; })"
186 Implementation detail: The function generated will probably evaluate
187 expressions involved into range operations multiple times. This has the
188 advantage that external helper functions are not required and is not a
189 big performance hit for these simple calculations.
191 :param rule: the rules as list or dict, or a `PluralRule` object
192 :raise RuleError: if the expression is malformed
193 """
194 to_js = _JavaScriptCompiler().compile
195 result = ['(function(n) { return ']
196 for tag, ast in PluralRule.parse(rule).abstract:
197 result.append(f"{to_js(ast)} ? {tag!r} : ")
198 result.append('%r; })' % _fallback_tag)
199 return ''.join(result)
202def to_python(
203 rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule,
204) -> Callable[[float | decimal.Decimal], str]:
205 """Convert a list/dict of rules or a `PluralRule` object into a regular
206 Python function. This is useful in situations where you need a real
207 function and don't are about the actual rule object:
209 >>> func = to_python({'one': 'n is 1', 'few': 'n in 2..4'})
210 >>> func(1)
211 'one'
212 >>> func(3)
213 'few'
214 >>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'})
215 >>> func(11)
216 'one'
217 >>> func(15)
218 'few'
220 :param rule: the rules as list or dict, or a `PluralRule` object
221 :raise RuleError: if the expression is malformed
222 """
223 namespace = {
224 'IN': in_range_list,
225 'WITHIN': within_range_list,
226 'MOD': cldr_modulo,
227 'extract_operands': extract_operands,
228 }
229 to_python_func = _PythonCompiler().compile
230 result = [
231 'def evaluate(n):',
232 ' n, i, v, w, f, t, c, e = extract_operands(n)',
233 ]
234 for tag, ast in PluralRule.parse(rule).abstract:
235 # the str() call is to coerce the tag to the native string. It's
236 # a limited ascii restricted set of tags anyways so that is fine.
237 result.append(f" if ({to_python_func(ast)}): return {str(tag)!r}")
238 result.append(f" return {_fallback_tag!r}")
239 code = compile('\n'.join(result), '<rule>', 'exec')
240 eval(code, namespace)
241 return namespace['evaluate']
244def to_gettext(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> str:
245 """The plural rule as gettext expression. The gettext expression is
246 technically limited to integers and returns indices rather than tags.
248 >>> to_gettext({'one': 'n is 1', 'two': 'n is 2'})
249 'nplurals=3; plural=((n == 1) ? 0 : (n == 2) ? 1 : 2);'
251 :param rule: the rules as list or dict, or a `PluralRule` object
252 :raise RuleError: if the expression is malformed
253 """
254 rule = PluralRule.parse(rule)
256 used_tags = rule.tags | {_fallback_tag}
257 _compile = _GettextCompiler().compile
258 _get_index = [tag for tag in _plural_tags if tag in used_tags].index
260 result = [f"nplurals={len(used_tags)}; plural=("]
261 for tag, ast in rule.abstract:
262 result.append(f"{_compile(ast)} ? {_get_index(tag)} : ")
263 result.append(f"{_get_index(_fallback_tag)});")
264 return ''.join(result)
267def in_range_list(
268 num: float | decimal.Decimal,
269 range_list: Iterable[Iterable[float | decimal.Decimal]],
270) -> bool:
271 """Integer range list test. This is the callback for the "in" operator
272 of the UTS #35 pluralization rule language:
274 >>> in_range_list(1, [(1, 3)])
275 True
276 >>> in_range_list(3, [(1, 3)])
277 True
278 >>> in_range_list(3, [(1, 3), (5, 8)])
279 True
280 >>> in_range_list(1.2, [(1, 4)])
281 False
282 >>> in_range_list(10, [(1, 4)])
283 False
284 >>> in_range_list(10, [(1, 4), (6, 8)])
285 False
286 """
287 return num == int(num) and within_range_list(num, range_list)
290def within_range_list(
291 num: float | decimal.Decimal,
292 range_list: Iterable[Iterable[float | decimal.Decimal]],
293) -> bool:
294 """Float range test. This is the callback for the "within" operator
295 of the UTS #35 pluralization rule language:
297 >>> within_range_list(1, [(1, 3)])
298 True
299 >>> within_range_list(1.0, [(1, 3)])
300 True
301 >>> within_range_list(1.2, [(1, 4)])
302 True
303 >>> within_range_list(8.8, [(1, 4), (7, 15)])
304 True
305 >>> within_range_list(10, [(1, 4)])
306 False
307 >>> within_range_list(10.5, [(1, 4), (20, 30)])
308 False
309 """
310 return any(min_ <= num <= max_ for min_, max_ in range_list)
313def cldr_modulo(a: float, b: float) -> float:
314 """Javaish modulo. This modulo operator returns the value with the sign
315 of the dividend rather than the divisor like Python does:
317 >>> cldr_modulo(-3, 5)
318 -3
319 >>> cldr_modulo(-3, -5)
320 -3
321 >>> cldr_modulo(3, 5)
322 3
323 """
324 reverse = 0
325 if a < 0:
326 a *= -1
327 reverse = 1
328 if b < 0:
329 b *= -1
330 rv = a % b
331 if reverse:
332 rv *= -1
333 return rv
336class RuleError(Exception):
337 """Raised if a rule is malformed."""
340_VARS = {
341 'n', # absolute value of the source number.
342 'i', # integer digits of n.
343 'v', # number of visible fraction digits in n, with trailing zeros.*
344 'w', # number of visible fraction digits in n, without trailing zeros.*
345 'f', # visible fraction digits in n, with trailing zeros.*
346 't', # visible fraction digits in n, without trailing zeros.*
347 'c', # compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting.
348 'e', # currently, synonym for `c`. however, may be redefined in the future.
349}
351_RULES: list[tuple[str | None, re.Pattern[str]]] = [
352 (None, re.compile(r'\s+', re.UNICODE)),
353 ('word', re.compile(rf'\b(and|or|is|(?:with)?in|not|mod|[{"".join(_VARS)}])\b')),
354 ('value', re.compile(r'\d+')),
355 ('symbol', re.compile(r'%|,|!=|=')),
356 ('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE)), # U+2026: ELLIPSIS
357]
360def tokenize_rule(s: str) -> list[tuple[str, str]]:
361 s = s.split('@')[0]
362 result: list[tuple[str, str]] = []
363 pos = 0
364 end = len(s)
365 while pos < end:
366 for tok, rule in _RULES:
367 match = rule.match(s, pos)
368 if match is not None:
369 pos = match.end()
370 if tok:
371 result.append((tok, match.group()))
372 break
373 else:
374 raise RuleError(f"malformed CLDR pluralization rule. Got unexpected {s[pos]!r}")
375 return result[::-1]
378def test_next_token(
379 tokens: list[tuple[str, str]],
380 type_: str,
381 value: str | None = None,
382) -> list[tuple[str, str]] | bool:
383 return tokens and tokens[-1][0] == type_ and (value is None or tokens[-1][1] == value)
386def skip_token(tokens: list[tuple[str, str]], type_: str, value: str | None = None):
387 if test_next_token(tokens, type_, value):
388 return tokens.pop()
391def value_node(value: int) -> tuple[Literal['value'], tuple[int]]:
392 return 'value', (value,)
395def ident_node(name: str) -> tuple[str, tuple[()]]:
396 return name, ()
399def range_list_node(
400 range_list: Iterable[Iterable[float | decimal.Decimal]],
401) -> tuple[Literal['range_list'], Iterable[Iterable[float | decimal.Decimal]]]:
402 return 'range_list', range_list
405def negate(rv: tuple[Any, ...]) -> tuple[Literal['not'], tuple[tuple[Any, ...]]]:
406 return 'not', (rv,)
409class _Parser:
410 """Internal parser. This class can translate a single rule into an abstract
411 tree of tuples. It implements the following grammar::
413 condition = and_condition ('or' and_condition)*
414 ('@integer' samples)?
415 ('@decimal' samples)?
416 and_condition = relation ('and' relation)*
417 relation = is_relation | in_relation | within_relation
418 is_relation = expr 'is' ('not')? value
419 in_relation = expr (('not')? 'in' | '=' | '!=') range_list
420 within_relation = expr ('not')? 'within' range_list
421 expr = operand (('mod' | '%') value)?
422 operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
423 range_list = (range | value) (',' range_list)*
424 value = digit+
425 digit = 0|1|2|3|4|5|6|7|8|9
426 range = value'..'value
427 samples = sampleRange (',' sampleRange)* (',' ('…'|'...'))?
428 sampleRange = decimalValue '~' decimalValue
429 decimalValue = value ('.' value)?
431 - Whitespace can occur between or around any of the above tokens.
432 - Rules should be mutually exclusive; for a given numeric value, only one
433 rule should apply (i.e. the condition should only be true for one of
434 the plural rule elements).
435 - The in and within relations can take comma-separated lists, such as:
436 'n in 3,5,7..15'.
437 - Samples are ignored.
439 The translator parses the expression on instantiation into an attribute
440 called `ast`.
441 """
443 def __init__(self, string):
444 self.tokens = tokenize_rule(string)
445 if not self.tokens:
446 # If the pattern is only samples, it's entirely possible
447 # no stream of tokens whatsoever is generated.
448 self.ast = None
449 return
450 self.ast = self.condition()
451 if self.tokens:
452 raise RuleError(f"Expected end of rule, got {self.tokens[-1][1]!r}")
454 def expect(self, type_, value=None, term=None):
455 token = skip_token(self.tokens, type_, value)
456 if token is not None:
457 return token
458 if term is None:
459 term = repr(value is None and type_ or value)
460 if not self.tokens:
461 raise RuleError(f"expected {term} but end of rule reached")
462 raise RuleError(f"expected {term} but got {self.tokens[-1][1]!r}")
464 def condition(self):
465 op = self.and_condition()
466 while skip_token(self.tokens, 'word', 'or'):
467 op = 'or', (op, self.and_condition())
468 return op
470 def and_condition(self):
471 op = self.relation()
472 while skip_token(self.tokens, 'word', 'and'):
473 op = 'and', (op, self.relation())
474 return op
476 def relation(self):
477 left = self.expr()
478 if skip_token(self.tokens, 'word', 'is'):
479 op = 'isnot' if skip_token(self.tokens, 'word', 'not') else 'is'
480 return op, (left, self.value())
481 negated = skip_token(self.tokens, 'word', 'not')
482 method = 'in'
483 if skip_token(self.tokens, 'word', 'within'):
484 method = 'within'
485 else:
486 if not skip_token(self.tokens, 'word', 'in'):
487 if negated:
488 raise RuleError('Cannot negate operator based rules.')
489 return self.newfangled_relation(left)
490 rv = 'relation', (method, left, self.range_list())
491 return negate(rv) if negated else rv
493 def newfangled_relation(self, left):
494 if skip_token(self.tokens, 'symbol', '='):
495 negated = False
496 elif skip_token(self.tokens, 'symbol', '!='):
497 negated = True
498 else:
499 raise RuleError('Expected "=" or "!=" or legacy relation')
500 rv = 'relation', ('in', left, self.range_list())
501 return negate(rv) if negated else rv
503 def range_or_value(self):
504 left = self.value()
505 if skip_token(self.tokens, 'ellipsis'):
506 return left, self.value()
507 else:
508 return left, left
510 def range_list(self):
511 range_list = [self.range_or_value()]
512 while skip_token(self.tokens, 'symbol', ','):
513 range_list.append(self.range_or_value())
514 return range_list_node(range_list)
516 def expr(self):
517 word = skip_token(self.tokens, 'word')
518 if word is None or word[1] not in _VARS:
519 raise RuleError('Expected identifier variable')
520 name = word[1]
521 if skip_token(self.tokens, 'word', 'mod'):
522 return 'mod', ((name, ()), self.value())
523 elif skip_token(self.tokens, 'symbol', '%'):
524 return 'mod', ((name, ()), self.value())
525 return ident_node(name)
527 def value(self):
528 return value_node(int(self.expect('value')[1]))
531def _binary_compiler(tmpl):
532 """Compiler factory for the `_Compiler`."""
533 return lambda self, left, right: tmpl % (self.compile(left), self.compile(right))
536def _unary_compiler(tmpl):
537 """Compiler factory for the `_Compiler`."""
538 return lambda self, x: tmpl % self.compile(x)
541compile_zero = lambda x: '0'
544class _Compiler:
545 """The compilers are able to transform the expressions into multiple
546 output formats.
547 """
549 def compile(self, arg):
550 op, args = arg
551 return getattr(self, f"compile_{op}")(*args)
553 compile_n = lambda x: 'n'
554 compile_i = lambda x: 'i'
555 compile_v = lambda x: 'v'
556 compile_w = lambda x: 'w'
557 compile_f = lambda x: 'f'
558 compile_t = lambda x: 't'
559 compile_c = lambda x: 'c'
560 compile_e = lambda x: 'e'
561 compile_value = lambda x, v: str(v)
562 compile_and = _binary_compiler('(%s && %s)')
563 compile_or = _binary_compiler('(%s || %s)')
564 compile_not = _unary_compiler('(!%s)')
565 compile_mod = _binary_compiler('(%s %% %s)')
566 compile_is = _binary_compiler('(%s == %s)')
567 compile_isnot = _binary_compiler('(%s != %s)')
569 def compile_relation(self, method, expr, range_list):
570 raise NotImplementedError()
573class _PythonCompiler(_Compiler):
574 """Compiles an expression to Python."""
576 compile_and = _binary_compiler('(%s and %s)')
577 compile_or = _binary_compiler('(%s or %s)')
578 compile_not = _unary_compiler('(not %s)')
579 compile_mod = _binary_compiler('MOD(%s, %s)')
581 def compile_relation(self, method, expr, range_list):
582 ranges = ",".join(
583 f"({self.compile(a)}, {self.compile(b)})" for (a, b) in range_list[1]
584 )
585 return f"{method.upper()}({self.compile(expr)}, [{ranges}])"
588class _GettextCompiler(_Compiler):
589 """Compile into a gettext plural expression."""
591 compile_i = _Compiler.compile_n
592 compile_v = compile_zero
593 compile_w = compile_zero
594 compile_f = compile_zero
595 compile_t = compile_zero
597 def compile_relation(self, method, expr, range_list):
598 rv = []
599 expr = self.compile(expr)
600 for item in range_list[1]:
601 if item[0] == item[1]:
602 rv.append(f"({expr} == {self.compile(item[0])})")
603 else:
604 min = self.compile(item[0])
605 max = self.compile(item[1])
606 rv.append(f"({expr} >= {min} && {expr} <= {max})")
607 return f"({' || '.join(rv)})"
610class _JavaScriptCompiler(_GettextCompiler):
611 """Compiles the expression to plain of JavaScript."""
613 # XXX: presently javascript does not support any of the
614 # fraction support and basically only deals with integers.
615 compile_i = lambda x: 'parseInt(n, 10)'
616 compile_v = compile_zero
617 compile_w = compile_zero
618 compile_f = compile_zero
619 compile_t = compile_zero
621 def compile_relation(self, method, expr, range_list):
622 code = _GettextCompiler.compile_relation(self, method, expr, range_list)
623 if method == 'in':
624 expr = self.compile(expr)
625 code = f"(parseInt({expr}, 10) == {expr} && {code})"
626 return code
629class _UnicodeCompiler(_Compiler):
630 """Returns a unicode pluralization rule again."""
632 # XXX: this currently spits out the old syntax instead of the new
633 # one. We can change that, but it will break a whole bunch of stuff
634 # for users I suppose.
636 compile_is = _binary_compiler('%s is %s')
637 compile_isnot = _binary_compiler('%s is not %s')
638 compile_and = _binary_compiler('%s and %s')
639 compile_or = _binary_compiler('%s or %s')
640 compile_mod = _binary_compiler('%s mod %s')
642 def compile_not(self, relation):
643 return self.compile_relation(*relation[1], negated=True)
645 def compile_relation(self, method, expr, range_list, negated=False):
646 ranges = []
647 for item in range_list[1]:
648 if item[0] == item[1]:
649 ranges.append(self.compile(item[0]))
650 else:
651 ranges.append(f"{self.compile(item[0])}..{self.compile(item[1])}")
652 return f"{self.compile(expr)}{' not' if negated else ''} {method} {','.join(ranges)}"