1"""
2babel.messages.catalog
3~~~~~~~~~~~~~~~~~~~~~~
4
5Data structures for message catalogs.
6
7:copyright: (c) 2013-2025 by the Babel Team.
8:license: BSD, see LICENSE for more details.
9"""
10
11from __future__ import annotations
12
13import datetime
14import re
15from collections.abc import Iterable, Iterator
16from copy import copy
17from difflib import SequenceMatcher
18from email import message_from_string
19from heapq import nlargest
20from string import Formatter
21from typing import TYPE_CHECKING
22
23from babel import __version__ as VERSION
24from babel.core import Locale, UnknownLocaleError
25from babel.dates import format_datetime
26from babel.messages.plurals import get_plural
27from babel.util import LOCALTZ, _cmp
28
29if TYPE_CHECKING:
30 from typing_extensions import TypeAlias
31
32 _MessageID: TypeAlias = str | tuple[str, ...] | list[str]
33
34__all__ = [
35 'DEFAULT_HEADER',
36 'PYTHON_FORMAT',
37 'Catalog',
38 'Message',
39 'TranslationError',
40]
41
42
43def get_close_matches(word, possibilities, n=3, cutoff=0.6):
44 """A modified version of ``difflib.get_close_matches``.
45
46 It just passes ``autojunk=False`` to the ``SequenceMatcher``, to work
47 around https://github.com/python/cpython/issues/90825.
48 """
49 if not n > 0: # pragma: no cover
50 raise ValueError(f"n must be > 0: {n!r}")
51 if not 0.0 <= cutoff <= 1.0: # pragma: no cover
52 raise ValueError(f"cutoff must be in [0.0, 1.0]: {cutoff!r}")
53 result = []
54 s = SequenceMatcher(autojunk=False) # only line changed from difflib.py
55 s.set_seq2(word)
56 for x in possibilities:
57 s.set_seq1(x)
58 if (
59 s.real_quick_ratio() >= cutoff
60 and s.quick_ratio() >= cutoff
61 and s.ratio() >= cutoff
62 ):
63 result.append((s.ratio(), x))
64
65 # Move the best scorers to head of list
66 result = nlargest(n, result)
67 # Strip scores for the best n matches
68 return [x for score, x in result]
69
70
71PYTHON_FORMAT = re.compile(
72 r'''
73 \%
74 (?:\(([\w]*)\))?
75 (
76 [-#0\ +]?(?:\*|[\d]+)?
77 (?:\.(?:\*|[\d]+))?
78 [hlL]?
79 )
80 ([diouxXeEfFgGcrs%])
81''',
82 re.VERBOSE,
83)
84
85
86def _has_python_brace_format(string: str) -> bool:
87 if "{" not in string:
88 return False
89 fmt = Formatter()
90 try:
91 # `fmt.parse` returns 3-or-4-tuples of the form
92 # `(literal_text, field_name, format_spec, conversion)`;
93 # if `field_name` is set, this smells like brace format
94 field_name_seen = False
95 for t in fmt.parse(string):
96 if t[1] is not None:
97 field_name_seen = True
98 # We cannot break here, as we need to consume the whole string
99 # to ensure that it is a valid format string.
100 except ValueError:
101 return False
102 return field_name_seen
103
104
105def _parse_datetime_header(value: str) -> datetime.datetime:
106 match = re.match(r'^(?P<datetime>.*?)(?P<tzoffset>[+-]\d{4})?$', value)
107
108 dt = datetime.datetime.strptime(match.group('datetime'), '%Y-%m-%d %H:%M')
109
110 # Separate the offset into a sign component, hours, and # minutes
111 tzoffset = match.group('tzoffset')
112 if tzoffset is not None:
113 plus_minus_s, rest = tzoffset[0], tzoffset[1:]
114 hours_offset_s, mins_offset_s = rest[:2], rest[2:]
115
116 # Make them all integers
117 plus_minus = int(f"{plus_minus_s}1")
118 hours_offset = int(hours_offset_s)
119 mins_offset = int(mins_offset_s)
120
121 # Calculate net offset
122 net_mins_offset = hours_offset * 60
123 net_mins_offset += mins_offset
124 net_mins_offset *= plus_minus
125
126 # Create an offset object
127 tzoffset = datetime.timezone(
128 offset=datetime.timedelta(minutes=net_mins_offset),
129 name=f'Etc/GMT{net_mins_offset:+d}',
130 )
131
132 # Store the offset in a datetime object
133 dt = dt.replace(tzinfo=tzoffset)
134
135 return dt
136
137
138class Message:
139 """Representation of a single message in a catalog."""
140
141 def __init__(
142 self,
143 id: _MessageID,
144 string: _MessageID | None = '',
145 locations: Iterable[tuple[str, int]] = (),
146 flags: Iterable[str] = (),
147 auto_comments: Iterable[str] = (),
148 user_comments: Iterable[str] = (),
149 previous_id: _MessageID = (),
150 lineno: int | None = None,
151 context: str | None = None,
152 ) -> None:
153 """Create the message object.
154
155 :param id: the message ID, or a ``(singular, plural)`` tuple for
156 pluralizable messages
157 :param string: the translated message string, or a
158 ``(singular, plural)`` tuple for pluralizable messages
159 :param locations: a sequence of ``(filename, lineno)`` tuples
160 :param flags: a set or sequence of flags
161 :param auto_comments: a sequence of automatic comments for the message
162 :param user_comments: a sequence of user comments for the message
163 :param previous_id: the previous message ID, or a ``(singular, plural)``
164 tuple for pluralizable messages
165 :param lineno: the line number on which the msgid line was found in the
166 PO file, if any
167 :param context: the message context
168 """
169 self.id = id
170 if not string and self.pluralizable:
171 string = ('', '')
172 self.string = string
173 self.locations = list(dict.fromkeys(locations)) if locations else []
174 self.flags = set(flags)
175 if id and self.python_format:
176 self.flags.add('python-format')
177 else:
178 self.flags.discard('python-format')
179 if id and self.python_brace_format:
180 self.flags.add('python-brace-format')
181 else:
182 self.flags.discard('python-brace-format')
183 self.auto_comments = list(dict.fromkeys(auto_comments)) if auto_comments else []
184 self.user_comments = list(dict.fromkeys(user_comments)) if user_comments else []
185 if previous_id:
186 if isinstance(previous_id, str):
187 self.previous_id = [previous_id]
188 else:
189 self.previous_id = list(previous_id)
190 else:
191 self.previous_id = []
192 self.lineno = lineno
193 self.context = context
194
195 def __repr__(self) -> str:
196 return f"<{type(self).__name__} {self.id!r} (flags: {list(self.flags)!r})>"
197
198 def __cmp__(self, other: object) -> int:
199 """Compare Messages, taking into account plural ids"""
200
201 def values_to_compare(obj):
202 if isinstance(obj, Message) and obj.pluralizable:
203 return obj.id[0], obj.context or ''
204 return obj.id, obj.context or ''
205
206 return _cmp(values_to_compare(self), values_to_compare(other))
207
208 def __gt__(self, other: object) -> bool:
209 return self.__cmp__(other) > 0
210
211 def __lt__(self, other: object) -> bool:
212 return self.__cmp__(other) < 0
213
214 def __ge__(self, other: object) -> bool:
215 return self.__cmp__(other) >= 0
216
217 def __le__(self, other: object) -> bool:
218 return self.__cmp__(other) <= 0
219
220 def __eq__(self, other: object) -> bool:
221 return self.__cmp__(other) == 0
222
223 def __ne__(self, other: object) -> bool:
224 return self.__cmp__(other) != 0
225
226 def is_identical(self, other: Message) -> bool:
227 """Checks whether messages are identical, taking into account all
228 properties.
229 """
230 assert isinstance(other, Message)
231 return self.__dict__ == other.__dict__
232
233 def clone(self) -> Message:
234 return Message(
235 id=copy(self.id),
236 string=copy(self.string),
237 locations=copy(self.locations),
238 flags=copy(self.flags),
239 auto_comments=copy(self.auto_comments),
240 user_comments=copy(self.user_comments),
241 previous_id=copy(self.previous_id),
242 lineno=self.lineno, # immutable (str/None)
243 context=self.context, # immutable (str/None)
244 )
245
246 def check(self, catalog: Catalog | None = None) -> list[TranslationError]:
247 """Run various validation checks on the message. Some validations
248 are only performed if the catalog is provided. This method returns
249 a sequence of `TranslationError` objects.
250
251 :rtype: ``iterator``
252 :param catalog: A catalog instance that is passed to the checkers
253 :see: `Catalog.check` for a way to perform checks for all messages
254 in a catalog.
255 """
256 from babel.messages.checkers import checkers
257
258 errors: list[TranslationError] = []
259 for checker in checkers:
260 try:
261 checker(catalog, self)
262 except TranslationError as e:
263 errors.append(e)
264 return errors
265
266 @property
267 def fuzzy(self) -> bool:
268 """Whether the translation is fuzzy.
269
270 >>> Message('foo').fuzzy
271 False
272 >>> msg = Message('foo', 'foo', flags=['fuzzy'])
273 >>> msg.fuzzy
274 True
275 >>> msg
276 <Message 'foo' (flags: ['fuzzy'])>
277
278 :type: `bool`"""
279 return 'fuzzy' in self.flags
280
281 @property
282 def pluralizable(self) -> bool:
283 """Whether the message is plurizable.
284
285 >>> Message('foo').pluralizable
286 False
287 >>> Message(('foo', 'bar')).pluralizable
288 True
289
290 :type: `bool`"""
291 return isinstance(self.id, (list, tuple))
292
293 @property
294 def python_format(self) -> bool:
295 """Whether the message contains Python-style parameters.
296
297 >>> Message('foo %(name)s bar').python_format
298 True
299 >>> Message(('foo %(name)s', 'foo %(name)s')).python_format
300 True
301
302 :type: `bool`"""
303 ids = self.id
304 if isinstance(ids, (list, tuple)):
305 for id in ids: # Explicit loop for performance reasons.
306 if PYTHON_FORMAT.search(id):
307 return True
308 return False
309 return bool(PYTHON_FORMAT.search(ids))
310
311 @property
312 def python_brace_format(self) -> bool:
313 """Whether the message contains Python f-string parameters.
314
315 >>> Message('Hello, {name}!').python_brace_format
316 True
317 >>> Message(('One apple', '{count} apples')).python_brace_format
318 True
319
320 :type: `bool`"""
321 ids = self.id
322 if isinstance(ids, (list, tuple)):
323 for id in ids: # Explicit loop for performance reasons.
324 if _has_python_brace_format(id):
325 return True
326 return False
327 return _has_python_brace_format(ids)
328
329
330class TranslationError(Exception):
331 """Exception thrown by translation checkers when invalid message
332 translations are encountered."""
333
334
335DEFAULT_HEADER = """\
336# Translations template for PROJECT.
337# Copyright (C) YEAR ORGANIZATION
338# This file is distributed under the same license as the PROJECT project.
339# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
340#"""
341
342
343def parse_separated_header(value: str) -> dict[str, str]:
344 # Adapted from https://peps.python.org/pep-0594/#cgi
345 from email.message import Message
346
347 m = Message()
348 m['content-type'] = value
349 return dict(m.get_params())
350
351
352def _force_text(s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict') -> str:
353 if isinstance(s, str):
354 return s
355 if isinstance(s, bytes):
356 return s.decode(encoding, errors)
357 return str(s)
358
359
360class Catalog:
361 """Representation of a message catalog."""
362
363 def __init__(
364 self,
365 locale: Locale | str | None = None,
366 domain: str | None = None,
367 header_comment: str | None = DEFAULT_HEADER,
368 project: str | None = None,
369 version: str | None = None,
370 copyright_holder: str | None = None,
371 msgid_bugs_address: str | None = None,
372 creation_date: datetime.datetime | str | None = None,
373 revision_date: datetime.datetime | datetime.time | float | str | None = None,
374 last_translator: str | None = None,
375 language_team: str | None = None,
376 charset: str | None = None,
377 fuzzy: bool = True,
378 ) -> None:
379 """Initialize the catalog object.
380
381 :param locale: the locale identifier or `Locale` object, or `None`
382 if the catalog is not bound to a locale (which basically
383 means it's a template)
384 :param domain: the message domain
385 :param header_comment: the header comment as string, or `None` for the
386 default header
387 :param project: the project's name
388 :param version: the project's version
389 :param copyright_holder: the copyright holder of the catalog
390 :param msgid_bugs_address: the email address or URL to submit bug
391 reports to
392 :param creation_date: the date the catalog was created
393 :param revision_date: the date the catalog was revised
394 :param last_translator: the name and email of the last translator
395 :param language_team: the name and email of the language team
396 :param charset: the encoding to use in the output (defaults to utf-8)
397 :param fuzzy: the fuzzy bit on the catalog header
398 """
399 self.domain = domain
400 self.locale = locale
401 self._header_comment = header_comment
402 self._messages: dict[str | tuple[str, str], Message] = {}
403
404 self.project = project or 'PROJECT'
405 self.version = version or 'VERSION'
406 self.copyright_holder = copyright_holder or 'ORGANIZATION'
407 self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS'
408
409 self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>'
410 """Name and email address of the last translator."""
411 self.language_team = language_team or 'LANGUAGE <LL@li.org>'
412 """Name and email address of the language team."""
413
414 self.charset = charset or 'utf-8'
415
416 if creation_date is None:
417 creation_date = datetime.datetime.now(LOCALTZ)
418 elif isinstance(creation_date, datetime.datetime) and not creation_date.tzinfo:
419 creation_date = creation_date.replace(tzinfo=LOCALTZ)
420 self.creation_date = creation_date
421 if revision_date is None:
422 revision_date = 'YEAR-MO-DA HO:MI+ZONE'
423 elif isinstance(revision_date, datetime.datetime) and not revision_date.tzinfo:
424 revision_date = revision_date.replace(tzinfo=LOCALTZ)
425 self.revision_date = revision_date
426 self.fuzzy = fuzzy
427
428 # Dictionary of obsolete messages
429 self.obsolete: dict[str | tuple[str, str], Message] = {}
430 self._num_plurals = None
431 self._plural_expr = None
432
433 def _set_locale(self, locale: Locale | str | None) -> None:
434 if locale is None:
435 self._locale_identifier = None
436 self._locale = None
437 return
438
439 if isinstance(locale, Locale):
440 self._locale_identifier = str(locale)
441 self._locale = locale
442 return
443
444 if isinstance(locale, str):
445 self._locale_identifier = str(locale)
446 try:
447 self._locale = Locale.parse(locale)
448 except UnknownLocaleError:
449 self._locale = None
450 return
451
452 raise TypeError(
453 f"`locale` must be a Locale, a locale identifier string, or None; got {locale!r}",
454 )
455
456 def _get_locale(self) -> Locale | None:
457 return self._locale
458
459 def _get_locale_identifier(self) -> str | None:
460 return self._locale_identifier
461
462 locale = property(_get_locale, _set_locale)
463 locale_identifier = property(_get_locale_identifier)
464
465 def _get_header_comment(self) -> str:
466 comment = self._header_comment
467 year = datetime.datetime.now(LOCALTZ).strftime('%Y')
468 if hasattr(self.revision_date, 'strftime'):
469 year = self.revision_date.strftime('%Y')
470 comment = (
471 comment.replace('PROJECT', self.project)
472 .replace('VERSION', self.version)
473 .replace('YEAR', year)
474 .replace('ORGANIZATION', self.copyright_holder)
475 )
476 locale_name = self.locale.english_name if self.locale else self.locale_identifier
477 if locale_name:
478 comment = comment.replace("Translations template", f"{locale_name} translations")
479 return comment
480
481 def _set_header_comment(self, string: str | None) -> None:
482 self._header_comment = string
483
484 header_comment = property(
485 _get_header_comment,
486 _set_header_comment,
487 doc="""\
488 The header comment for the catalog.
489
490 >>> catalog = Catalog(project='Foobar', version='1.0',
491 ... copyright_holder='Foo Company')
492 >>> print(catalog.header_comment) #doctest: +ELLIPSIS
493 # Translations template for Foobar.
494 # Copyright (C) ... Foo Company
495 # This file is distributed under the same license as the Foobar project.
496 # FIRST AUTHOR <EMAIL@ADDRESS>, ....
497 #
498
499 The header can also be set from a string. Any known upper-case variables
500 will be replaced when the header is retrieved again:
501
502 >>> catalog = Catalog(project='Foobar', version='1.0',
503 ... copyright_holder='Foo Company')
504 >>> catalog.header_comment = '''\\
505 ... # The POT for my really cool PROJECT project.
506 ... # Copyright (C) 1990-2003 ORGANIZATION
507 ... # This file is distributed under the same license as the PROJECT
508 ... # project.
509 ... #'''
510 >>> print(catalog.header_comment)
511 # The POT for my really cool Foobar project.
512 # Copyright (C) 1990-2003 Foo Company
513 # This file is distributed under the same license as the Foobar
514 # project.
515 #
516
517 :type: `unicode`
518 """,
519 )
520
521 def _get_mime_headers(self) -> list[tuple[str, str]]:
522 if isinstance(self.revision_date, (datetime.datetime, datetime.time, int, float)):
523 revision_date = format_datetime(
524 self.revision_date,
525 'yyyy-MM-dd HH:mmZ',
526 locale='en',
527 )
528 else:
529 revision_date = self.revision_date
530
531 language_team = self.language_team
532 if self.locale_identifier and 'LANGUAGE' in language_team:
533 language_team = language_team.replace('LANGUAGE', str(self.locale_identifier))
534
535 headers: list[tuple[str, str]] = [
536 ("Project-Id-Version", f"{self.project} {self.version}"),
537 ('Report-Msgid-Bugs-To', self.msgid_bugs_address),
538 ('POT-Creation-Date', format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ', locale='en')),
539 ('PO-Revision-Date', revision_date),
540 ('Last-Translator', self.last_translator),
541 ] # fmt: skip
542 if self.locale_identifier:
543 headers.append(('Language', str(self.locale_identifier)))
544 headers.append(('Language-Team', language_team))
545 if self.locale is not None:
546 headers.append(('Plural-Forms', self.plural_forms))
547 headers += [
548 ('MIME-Version', '1.0'),
549 ("Content-Type", f"text/plain; charset={self.charset}"),
550 ('Content-Transfer-Encoding', '8bit'),
551 ("Generated-By", f"Babel {VERSION}\n"),
552 ]
553 return headers
554
555 def _set_mime_headers(self, headers: Iterable[tuple[str, str]]) -> None:
556 for name, value in headers:
557 name = _force_text(name.lower(), encoding=self.charset)
558 value = _force_text(value, encoding=self.charset)
559 if name == 'project-id-version':
560 parts = value.split(' ')
561 self.project = ' '.join(parts[:-1])
562 self.version = parts[-1]
563 elif name == 'report-msgid-bugs-to':
564 self.msgid_bugs_address = value
565 elif name == 'last-translator':
566 self.last_translator = value
567 elif name == 'language':
568 value = value.replace('-', '_')
569 # The `or None` makes sure that the locale is set to None
570 # if the header's value is an empty string, which is what
571 # some tools generate (instead of eliding the empty Language
572 # header altogether).
573 self._set_locale(value or None)
574 elif name == 'language-team':
575 self.language_team = value
576 elif name == 'content-type':
577 params = parse_separated_header(value)
578 if 'charset' in params:
579 self.charset = params['charset'].lower()
580 elif name == 'plural-forms':
581 params = parse_separated_header(f" ;{value}")
582 self._num_plurals = int(params.get('nplurals', 2))
583 self._plural_expr = params.get('plural', '(n != 1)')
584 elif name == 'pot-creation-date':
585 self.creation_date = _parse_datetime_header(value)
586 elif name == 'po-revision-date':
587 # Keep the value if it's not the default one
588 if 'YEAR' not in value:
589 self.revision_date = _parse_datetime_header(value)
590
591 mime_headers = property(
592 _get_mime_headers,
593 _set_mime_headers,
594 doc="""\
595 The MIME headers of the catalog, used for the special ``msgid ""`` entry.
596
597 The behavior of this property changes slightly depending on whether a locale
598 is set or not, the latter indicating that the catalog is actually a template
599 for actual translations.
600
601 Here's an example of the output for such a catalog template:
602
603 >>> from babel.dates import UTC
604 >>> from datetime import datetime
605 >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC)
606 >>> catalog = Catalog(project='Foobar', version='1.0',
607 ... creation_date=created)
608 >>> for name, value in catalog.mime_headers:
609 ... print('%s: %s' % (name, value))
610 Project-Id-Version: Foobar 1.0
611 Report-Msgid-Bugs-To: EMAIL@ADDRESS
612 POT-Creation-Date: 1990-04-01 15:30+0000
613 PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE
614 Last-Translator: FULL NAME <EMAIL@ADDRESS>
615 Language-Team: LANGUAGE <LL@li.org>
616 MIME-Version: 1.0
617 Content-Type: text/plain; charset=utf-8
618 Content-Transfer-Encoding: 8bit
619 Generated-By: Babel ...
620
621 And here's an example of the output when the locale is set:
622
623 >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC)
624 >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0',
625 ... creation_date=created, revision_date=revised,
626 ... last_translator='John Doe <jd@example.com>',
627 ... language_team='de_DE <de@example.com>')
628 >>> for name, value in catalog.mime_headers:
629 ... print('%s: %s' % (name, value))
630 Project-Id-Version: Foobar 1.0
631 Report-Msgid-Bugs-To: EMAIL@ADDRESS
632 POT-Creation-Date: 1990-04-01 15:30+0000
633 PO-Revision-Date: 1990-08-03 12:00+0000
634 Last-Translator: John Doe <jd@example.com>
635 Language: de_DE
636 Language-Team: de_DE <de@example.com>
637 Plural-Forms: nplurals=2; plural=(n != 1);
638 MIME-Version: 1.0
639 Content-Type: text/plain; charset=utf-8
640 Content-Transfer-Encoding: 8bit
641 Generated-By: Babel ...
642
643 :type: `list`
644 """,
645 )
646
647 @property
648 def num_plurals(self) -> int:
649 """The number of plurals used by the catalog or locale.
650
651 >>> Catalog(locale='en').num_plurals
652 2
653 >>> Catalog(locale='ga').num_plurals
654 5
655
656 :type: `int`"""
657 if self._num_plurals is None:
658 num = 2
659 if self.locale:
660 num = get_plural(self.locale)[0]
661 self._num_plurals = num
662 return self._num_plurals
663
664 @property
665 def plural_expr(self) -> str:
666 """The plural expression used by the catalog or locale.
667
668 >>> Catalog(locale='en').plural_expr
669 '(n != 1)'
670 >>> Catalog(locale='ga').plural_expr
671 '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)'
672 >>> Catalog(locale='ding').plural_expr # unknown locale
673 '(n != 1)'
674
675 :type: `str`"""
676 if self._plural_expr is None:
677 expr = '(n != 1)'
678 if self.locale:
679 expr = get_plural(self.locale)[1]
680 self._plural_expr = expr
681 return self._plural_expr
682
683 @property
684 def plural_forms(self) -> str:
685 """Return the plural forms declaration for the locale.
686
687 >>> Catalog(locale='en').plural_forms
688 'nplurals=2; plural=(n != 1);'
689 >>> Catalog(locale='pt_BR').plural_forms
690 'nplurals=2; plural=(n > 1);'
691
692 :type: `str`"""
693 return f"nplurals={self.num_plurals}; plural={self.plural_expr};"
694
695 def __contains__(self, id: _MessageID) -> bool:
696 """Return whether the catalog has a message with the specified ID."""
697 return self._key_for(id) in self._messages
698
699 def __len__(self) -> int:
700 """The number of messages in the catalog.
701
702 This does not include the special ``msgid ""`` entry."""
703 return len(self._messages)
704
705 def __iter__(self) -> Iterator[Message]:
706 """Iterates through all the entries in the catalog, in the order they
707 were added, yielding a `Message` object for every entry.
708
709 :rtype: ``iterator``"""
710 buf = []
711 for name, value in self.mime_headers:
712 buf.append(f"{name}: {value}")
713 flags = set()
714 if self.fuzzy:
715 flags |= {'fuzzy'}
716 yield Message('', '\n'.join(buf), flags=flags)
717 for key in self._messages:
718 yield self._messages[key]
719
720 def __repr__(self) -> str:
721 locale = ''
722 if self.locale:
723 locale = f" {self.locale}"
724 return f"<{type(self).__name__} {self.domain!r}{locale}>"
725
726 def __delitem__(self, id: _MessageID) -> None:
727 """Delete the message with the specified ID."""
728 self.delete(id)
729
730 def __getitem__(self, id: _MessageID) -> Message:
731 """Return the message with the specified ID.
732
733 :param id: the message ID
734 """
735 return self.get(id)
736
737 def __setitem__(self, id: _MessageID, message: Message) -> None:
738 """Add or update the message with the specified ID.
739
740 >>> catalog = Catalog()
741 >>> catalog['foo'] = Message('foo')
742 >>> catalog['foo']
743 <Message 'foo' (flags: [])>
744
745 If a message with that ID is already in the catalog, it is updated
746 to include the locations and flags of the new message.
747
748 >>> catalog = Catalog()
749 >>> catalog['foo'] = Message('foo', locations=[('main.py', 1)])
750 >>> catalog['foo'].locations
751 [('main.py', 1)]
752 >>> catalog['foo'] = Message('foo', locations=[('utils.py', 5)])
753 >>> catalog['foo'].locations
754 [('main.py', 1), ('utils.py', 5)]
755
756 :param id: the message ID
757 :param message: the `Message` object
758 """
759 assert isinstance(message, Message), 'expected a Message object'
760 key = self._key_for(id, message.context)
761 current = self._messages.get(key)
762 if current:
763 if message.pluralizable and not current.pluralizable:
764 # The new message adds pluralization
765 current.id = message.id
766 current.string = message.string
767 current.locations = list(dict.fromkeys([*current.locations, *message.locations]))
768 current.auto_comments = list(dict.fromkeys([*current.auto_comments, *message.auto_comments])) # fmt:skip
769 current.user_comments = list(dict.fromkeys([*current.user_comments, *message.user_comments])) # fmt:skip
770 current.flags |= message.flags
771 elif id == '':
772 # special treatment for the header message
773 self.mime_headers = message_from_string(message.string).items()
774 self.header_comment = "\n".join(f"# {c}".rstrip() for c in message.user_comments)
775 self.fuzzy = message.fuzzy
776 else:
777 if isinstance(id, (list, tuple)):
778 assert isinstance(message.string, (list, tuple)), (
779 f"Expected sequence but got {type(message.string)}"
780 )
781 self._messages[key] = message
782
783 def add(
784 self,
785 id: _MessageID,
786 string: _MessageID | None = None,
787 locations: Iterable[tuple[str, int]] = (),
788 flags: Iterable[str] = (),
789 auto_comments: Iterable[str] = (),
790 user_comments: Iterable[str] = (),
791 previous_id: _MessageID = (),
792 lineno: int | None = None,
793 context: str | None = None,
794 ) -> Message:
795 """Add or update the message with the specified ID.
796
797 >>> catalog = Catalog()
798 >>> catalog.add('foo')
799 <Message ...>
800 >>> catalog['foo']
801 <Message 'foo' (flags: [])>
802
803 This method simply constructs a `Message` object with the given
804 arguments and invokes `__setitem__` with that object.
805
806 :param id: the message ID, or a ``(singular, plural)`` tuple for
807 pluralizable messages
808 :param string: the translated message string, or a
809 ``(singular, plural)`` tuple for pluralizable messages
810 :param locations: a sequence of ``(filename, lineno)`` tuples
811 :param flags: a set or sequence of flags
812 :param auto_comments: a sequence of automatic comments
813 :param user_comments: a sequence of user comments
814 :param previous_id: the previous message ID, or a ``(singular, plural)``
815 tuple for pluralizable messages
816 :param lineno: the line number on which the msgid line was found in the
817 PO file, if any
818 :param context: the message context
819 """
820 message = Message(
821 id,
822 string,
823 list(locations),
824 flags,
825 auto_comments,
826 user_comments,
827 previous_id,
828 lineno=lineno,
829 context=context,
830 )
831 self[id] = message
832 return message
833
834 def check(self) -> Iterable[tuple[Message, list[TranslationError]]]:
835 """Run various validation checks on the translations in the catalog.
836
837 For every message which fails validation, this method yield a
838 ``(message, errors)`` tuple, where ``message`` is the `Message` object
839 and ``errors`` is a sequence of `TranslationError` objects.
840
841 :rtype: ``generator`` of ``(message, errors)``
842 """
843 for message in self._messages.values():
844 errors = message.check(catalog=self)
845 if errors:
846 yield message, errors
847
848 def get(self, id: _MessageID, context: str | None = None) -> Message | None:
849 """Return the message with the specified ID and context.
850
851 :param id: the message ID
852 :param context: the message context, or ``None`` for no context
853 """
854 return self._messages.get(self._key_for(id, context))
855
856 def delete(self, id: _MessageID, context: str | None = None) -> None:
857 """Delete the message with the specified ID and context.
858
859 :param id: the message ID
860 :param context: the message context, or ``None`` for no context
861 """
862 key = self._key_for(id, context)
863 if key in self._messages:
864 del self._messages[key]
865
866 def update(
867 self,
868 template: Catalog,
869 no_fuzzy_matching: bool = False,
870 update_header_comment: bool = False,
871 keep_user_comments: bool = True,
872 update_creation_date: bool = True,
873 ) -> None:
874 """Update the catalog based on the given template catalog.
875
876 >>> from babel.messages import Catalog
877 >>> template = Catalog()
878 >>> template.add('green', locations=[('main.py', 99)])
879 <Message ...>
880 >>> template.add('blue', locations=[('main.py', 100)])
881 <Message ...>
882 >>> template.add(('salad', 'salads'), locations=[('util.py', 42)])
883 <Message ...>
884 >>> catalog = Catalog(locale='de_DE')
885 >>> catalog.add('blue', 'blau', locations=[('main.py', 98)])
886 <Message ...>
887 >>> catalog.add('head', 'Kopf', locations=[('util.py', 33)])
888 <Message ...>
889 >>> catalog.add(('salad', 'salads'), ('Salat', 'Salate'),
890 ... locations=[('util.py', 38)])
891 <Message ...>
892
893 >>> catalog.update(template)
894 >>> len(catalog)
895 3
896
897 >>> msg1 = catalog['green']
898 >>> msg1.string
899 >>> msg1.locations
900 [('main.py', 99)]
901
902 >>> msg2 = catalog['blue']
903 >>> msg2.string
904 'blau'
905 >>> msg2.locations
906 [('main.py', 100)]
907
908 >>> msg3 = catalog['salad']
909 >>> msg3.string
910 ('Salat', 'Salate')
911 >>> msg3.locations
912 [('util.py', 42)]
913
914 Messages that are in the catalog but not in the template are removed
915 from the main collection, but can still be accessed via the `obsolete`
916 member:
917
918 >>> 'head' in catalog
919 False
920 >>> list(catalog.obsolete.values())
921 [<Message 'head' (flags: [])>]
922
923 :param template: the reference catalog, usually read from a POT file
924 :param no_fuzzy_matching: whether to use fuzzy matching of message IDs
925 :param update_header_comment: whether to copy the header comment from the template
926 :param keep_user_comments: whether to keep user comments from the old catalog
927 :param update_creation_date: whether to copy the creation date from the template
928 """
929 messages = self._messages
930 remaining = messages.copy()
931 self._messages = {}
932
933 # Prepare for fuzzy matching
934 fuzzy_candidates = {}
935 if not no_fuzzy_matching:
936 for msgid in messages:
937 if msgid and messages[msgid].string:
938 key = self._key_for(msgid)
939 ctxt = messages[msgid].context
940 fuzzy_candidates[self._to_fuzzy_match_key(key)] = (key, ctxt)
941 fuzzy_matches = set()
942
943 def _merge(
944 message: Message,
945 oldkey: tuple[str, str] | str,
946 newkey: tuple[str, str] | str,
947 ) -> None:
948 message = message.clone()
949 fuzzy = False
950 if oldkey != newkey:
951 fuzzy = True
952 fuzzy_matches.add(oldkey)
953 oldmsg = messages.get(oldkey)
954 assert oldmsg is not None
955 if isinstance(oldmsg.id, str):
956 message.previous_id = [oldmsg.id]
957 else:
958 message.previous_id = list(oldmsg.id)
959 else:
960 oldmsg = remaining.pop(oldkey, None)
961 assert oldmsg is not None
962 message.string = oldmsg.string
963
964 if keep_user_comments and oldmsg.user_comments:
965 message.user_comments = list(dict.fromkeys(oldmsg.user_comments))
966
967 if isinstance(message.id, (list, tuple)):
968 if not isinstance(message.string, (list, tuple)):
969 fuzzy = True
970 message.string = tuple(
971 [message.string] + ([''] * (len(message.id) - 1)),
972 )
973 elif len(message.string) != self.num_plurals:
974 fuzzy = True
975 message.string = tuple(message.string[: len(oldmsg.string)])
976 elif isinstance(message.string, (list, tuple)):
977 fuzzy = True
978 message.string = message.string[0]
979 message.flags |= oldmsg.flags
980 if fuzzy:
981 message.flags |= {'fuzzy'}
982 self[message.id] = message
983
984 for message in template:
985 if message.id:
986 key = self._key_for(message.id, message.context)
987 if key in messages:
988 _merge(message, key, key)
989 else:
990 if not no_fuzzy_matching:
991 # do some fuzzy matching with difflib
992 matches = get_close_matches(
993 self._to_fuzzy_match_key(key),
994 fuzzy_candidates.keys(),
995 1,
996 )
997 if matches:
998 modified_key = matches[0]
999 newkey, newctxt = fuzzy_candidates[modified_key]
1000 if newctxt is not None:
1001 newkey = newkey, newctxt
1002 _merge(message, newkey, key)
1003 continue
1004
1005 self[message.id] = message
1006
1007 for msgid in remaining:
1008 if no_fuzzy_matching or msgid not in fuzzy_matches:
1009 self.obsolete[msgid] = remaining[msgid]
1010
1011 if update_header_comment:
1012 # Allow the updated catalog's header to be rewritten based on the
1013 # template's header
1014 self.header_comment = template.header_comment
1015
1016 # Make updated catalog's POT-Creation-Date equal to the template
1017 # used to update the catalog
1018 if update_creation_date:
1019 self.creation_date = template.creation_date
1020
1021 def _to_fuzzy_match_key(self, key: tuple[str, str] | str) -> str:
1022 """Converts a message key to a string suitable for fuzzy matching."""
1023 if isinstance(key, tuple):
1024 matchkey = key[0] # just the msgid, no context
1025 else:
1026 matchkey = key
1027 return matchkey.lower().strip()
1028
1029 def _key_for(
1030 self,
1031 id: _MessageID,
1032 context: str | None = None,
1033 ) -> tuple[str, str] | str:
1034 """The key for a message is just the singular ID even for pluralizable
1035 messages, but is a ``(msgid, msgctxt)`` tuple for context-specific
1036 messages.
1037 """
1038 key = id
1039 if isinstance(key, (list, tuple)):
1040 key = id[0]
1041 if context is not None:
1042 key = (key, context)
1043 return key
1044
1045 def is_identical(self, other: Catalog) -> bool:
1046 """Checks if catalogs are identical, taking into account messages and
1047 headers.
1048 """
1049 assert isinstance(other, Catalog)
1050 for key in self._messages.keys() | other._messages.keys():
1051 message_1 = self.get(key)
1052 message_2 = other.get(key)
1053 if message_1 is None or message_2 is None or not message_1.is_identical(message_2):
1054 return False
1055 return dict(self.mime_headers) == dict(other.mime_headers)