1"""
2 babel.messages.catalog
3 ~~~~~~~~~~~~~~~~~~~~~~
4
5 Data structures for message catalogs.
6
7 :copyright: (c) 2013-2025 by the Babel Team.
8 :license: BSD, see LICENSE for more details.
9"""
10from __future__ import annotations
11
12import datetime
13import re
14from collections.abc import Iterable, Iterator
15from copy import copy
16from difflib import SequenceMatcher
17from email import message_from_string
18from heapq import nlargest
19from string import Formatter
20from typing import TYPE_CHECKING
21
22from babel import __version__ as VERSION
23from babel.core import Locale, UnknownLocaleError
24from babel.dates import format_datetime
25from babel.messages.plurals import get_plural
26from babel.util import LOCALTZ, _cmp
27
28if TYPE_CHECKING:
29 from typing_extensions import TypeAlias
30
31 _MessageID: TypeAlias = str | tuple[str, ...] | list[str]
32
33__all__ = [
34 'DEFAULT_HEADER',
35 'PYTHON_FORMAT',
36 'Catalog',
37 'Message',
38 'TranslationError',
39]
40
41
42def get_close_matches(word, possibilities, n=3, cutoff=0.6):
43 """A modified version of ``difflib.get_close_matches``.
44
45 It just passes ``autojunk=False`` to the ``SequenceMatcher``, to work
46 around https://github.com/python/cpython/issues/90825.
47 """
48 if not n > 0: # pragma: no cover
49 raise ValueError(f"n must be > 0: {n!r}")
50 if not 0.0 <= cutoff <= 1.0: # pragma: no cover
51 raise ValueError(f"cutoff must be in [0.0, 1.0]: {cutoff!r}")
52 result = []
53 s = SequenceMatcher(autojunk=False) # only line changed from difflib.py
54 s.set_seq2(word)
55 for x in possibilities:
56 s.set_seq1(x)
57 if s.real_quick_ratio() >= cutoff and \
58 s.quick_ratio() >= cutoff and \
59 s.ratio() >= cutoff:
60 result.append((s.ratio(), x))
61
62 # Move the best scorers to head of list
63 result = nlargest(n, result)
64 # Strip scores for the best n matches
65 return [x for score, x in result]
66
67
68PYTHON_FORMAT = re.compile(r'''
69 \%
70 (?:\(([\w]*)\))?
71 (
72 [-#0\ +]?(?:\*|[\d]+)?
73 (?:\.(?:\*|[\d]+))?
74 [hlL]?
75 )
76 ([diouxXeEfFgGcrs%])
77''', re.VERBOSE)
78
79
80def _has_python_brace_format(string: str) -> bool:
81 if "{" not in string:
82 return False
83 fmt = Formatter()
84 try:
85 # `fmt.parse` returns 3-or-4-tuples of the form
86 # `(literal_text, field_name, format_spec, conversion)`;
87 # if `field_name` is set, this smells like brace format
88 field_name_seen = False
89 for t in fmt.parse(string):
90 if t[1] is not None:
91 field_name_seen = True
92 # We cannot break here, as we need to consume the whole string
93 # to ensure that it is a valid format string.
94 except ValueError:
95 return False
96 return field_name_seen
97
98
99def _parse_datetime_header(value: str) -> datetime.datetime:
100 match = re.match(r'^(?P<datetime>.*?)(?P<tzoffset>[+-]\d{4})?$', value)
101
102 dt = datetime.datetime.strptime(match.group('datetime'), '%Y-%m-%d %H:%M')
103
104 # Separate the offset into a sign component, hours, and # minutes
105 tzoffset = match.group('tzoffset')
106 if tzoffset is not None:
107 plus_minus_s, rest = tzoffset[0], tzoffset[1:]
108 hours_offset_s, mins_offset_s = rest[:2], rest[2:]
109
110 # Make them all integers
111 plus_minus = int(f"{plus_minus_s}1")
112 hours_offset = int(hours_offset_s)
113 mins_offset = int(mins_offset_s)
114
115 # Calculate net offset
116 net_mins_offset = hours_offset * 60
117 net_mins_offset += mins_offset
118 net_mins_offset *= plus_minus
119
120 # Create an offset object
121 tzoffset = datetime.timezone(
122 offset=datetime.timedelta(minutes=net_mins_offset),
123 name=f'Etc/GMT{net_mins_offset:+d}',
124 )
125
126 # Store the offset in a datetime object
127 dt = dt.replace(tzinfo=tzoffset)
128
129 return dt
130
131
132class Message:
133 """Representation of a single message in a catalog."""
134
135 def __init__(
136 self,
137 id: _MessageID,
138 string: _MessageID | None = '',
139 locations: Iterable[tuple[str, int]] = (),
140 flags: Iterable[str] = (),
141 auto_comments: Iterable[str] = (),
142 user_comments: Iterable[str] = (),
143 previous_id: _MessageID = (),
144 lineno: int | None = None,
145 context: str | None = None,
146 ) -> None:
147 """Create the message object.
148
149 :param id: the message ID, or a ``(singular, plural)`` tuple for
150 pluralizable messages
151 :param string: the translated message string, or a
152 ``(singular, plural)`` tuple for pluralizable messages
153 :param locations: a sequence of ``(filename, lineno)`` tuples
154 :param flags: a set or sequence of flags
155 :param auto_comments: a sequence of automatic comments for the message
156 :param user_comments: a sequence of user comments for the message
157 :param previous_id: the previous message ID, or a ``(singular, plural)``
158 tuple for pluralizable messages
159 :param lineno: the line number on which the msgid line was found in the
160 PO file, if any
161 :param context: the message context
162 """
163 self.id = id
164 if not string and self.pluralizable:
165 string = ('', '')
166 self.string = string
167 self.locations = list(dict.fromkeys(locations)) if locations else []
168 self.flags = set(flags)
169 if id and self.python_format:
170 self.flags.add('python-format')
171 else:
172 self.flags.discard('python-format')
173 if id and self.python_brace_format:
174 self.flags.add('python-brace-format')
175 else:
176 self.flags.discard('python-brace-format')
177 self.auto_comments = list(dict.fromkeys(auto_comments)) if auto_comments else []
178 self.user_comments = list(dict.fromkeys(user_comments)) if user_comments else []
179 if previous_id:
180 if isinstance(previous_id, str):
181 self.previous_id = [previous_id]
182 else:
183 self.previous_id = list(previous_id)
184 else:
185 self.previous_id = []
186 self.lineno = lineno
187 self.context = context
188
189 def __repr__(self) -> str:
190 return f"<{type(self).__name__} {self.id!r} (flags: {list(self.flags)!r})>"
191
192 def __cmp__(self, other: object) -> int:
193 """Compare Messages, taking into account plural ids"""
194 def values_to_compare(obj):
195 if isinstance(obj, Message) and obj.pluralizable:
196 return obj.id[0], obj.context or ''
197 return obj.id, obj.context or ''
198 return _cmp(values_to_compare(self), values_to_compare(other))
199
200 def __gt__(self, other: object) -> bool:
201 return self.__cmp__(other) > 0
202
203 def __lt__(self, other: object) -> bool:
204 return self.__cmp__(other) < 0
205
206 def __ge__(self, other: object) -> bool:
207 return self.__cmp__(other) >= 0
208
209 def __le__(self, other: object) -> bool:
210 return self.__cmp__(other) <= 0
211
212 def __eq__(self, other: object) -> bool:
213 return self.__cmp__(other) == 0
214
215 def __ne__(self, other: object) -> bool:
216 return self.__cmp__(other) != 0
217
218 def is_identical(self, other: Message) -> bool:
219 """Checks whether messages are identical, taking into account all
220 properties.
221 """
222 assert isinstance(other, Message)
223 return self.__dict__ == other.__dict__
224
225 def clone(self) -> Message:
226 return Message(
227 id=copy(self.id),
228 string=copy(self.string),
229 locations=copy(self.locations),
230 flags=copy(self.flags),
231 auto_comments=copy(self.auto_comments),
232 user_comments=copy(self.user_comments),
233 previous_id=copy(self.previous_id),
234 lineno=self.lineno, # immutable (str/None)
235 context=self.context, # immutable (str/None)
236 )
237
238 def check(self, catalog: Catalog | None = None) -> list[TranslationError]:
239 """Run various validation checks on the message. Some validations
240 are only performed if the catalog is provided. This method returns
241 a sequence of `TranslationError` objects.
242
243 :rtype: ``iterator``
244 :param catalog: A catalog instance that is passed to the checkers
245 :see: `Catalog.check` for a way to perform checks for all messages
246 in a catalog.
247 """
248 from babel.messages.checkers import checkers
249 errors: list[TranslationError] = []
250 for checker in checkers:
251 try:
252 checker(catalog, self)
253 except TranslationError as e:
254 errors.append(e)
255 return errors
256
257 @property
258 def fuzzy(self) -> bool:
259 """Whether the translation is fuzzy.
260
261 >>> Message('foo').fuzzy
262 False
263 >>> msg = Message('foo', 'foo', flags=['fuzzy'])
264 >>> msg.fuzzy
265 True
266 >>> msg
267 <Message 'foo' (flags: ['fuzzy'])>
268
269 :type: `bool`"""
270 return 'fuzzy' in self.flags
271
272 @property
273 def pluralizable(self) -> bool:
274 """Whether the message is plurizable.
275
276 >>> Message('foo').pluralizable
277 False
278 >>> Message(('foo', 'bar')).pluralizable
279 True
280
281 :type: `bool`"""
282 return isinstance(self.id, (list, tuple))
283
284 @property
285 def python_format(self) -> bool:
286 """Whether the message contains Python-style parameters.
287
288 >>> Message('foo %(name)s bar').python_format
289 True
290 >>> Message(('foo %(name)s', 'foo %(name)s')).python_format
291 True
292
293 :type: `bool`"""
294 ids = self.id
295 if isinstance(ids, (list, tuple)):
296 for id in ids: # Explicit loop for performance reasons.
297 if PYTHON_FORMAT.search(id):
298 return True
299 return False
300 return bool(PYTHON_FORMAT.search(ids))
301
302 @property
303 def python_brace_format(self) -> bool:
304 """Whether the message contains Python f-string parameters.
305
306 >>> Message('Hello, {name}!').python_brace_format
307 True
308 >>> Message(('One apple', '{count} apples')).python_brace_format
309 True
310
311 :type: `bool`"""
312 ids = self.id
313 if isinstance(ids, (list, tuple)):
314 for id in ids: # Explicit loop for performance reasons.
315 if _has_python_brace_format(id):
316 return True
317 return False
318 return _has_python_brace_format(ids)
319
320
321class TranslationError(Exception):
322 """Exception thrown by translation checkers when invalid message
323 translations are encountered."""
324
325
326DEFAULT_HEADER = """\
327# Translations template for PROJECT.
328# Copyright (C) YEAR ORGANIZATION
329# This file is distributed under the same license as the PROJECT project.
330# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
331#"""
332
333
334def parse_separated_header(value: str) -> dict[str, str]:
335 # Adapted from https://peps.python.org/pep-0594/#cgi
336 from email.message import Message
337 m = Message()
338 m['content-type'] = value
339 return dict(m.get_params())
340
341
342def _force_text(s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict') -> str:
343 if isinstance(s, str):
344 return s
345 if isinstance(s, bytes):
346 return s.decode(encoding, errors)
347 return str(s)
348
349
350class Catalog:
351 """Representation of a message catalog."""
352
353 def __init__(
354 self,
355 locale: Locale | str | None = None,
356 domain: str | None = None,
357 header_comment: str | None = DEFAULT_HEADER,
358 project: str | None = None,
359 version: str | None = None,
360 copyright_holder: str | None = None,
361 msgid_bugs_address: str | None = None,
362 creation_date: datetime.datetime | str | None = None,
363 revision_date: datetime.datetime | datetime.time | float | str | None = None,
364 last_translator: str | None = None,
365 language_team: str | None = None,
366 charset: str | None = None,
367 fuzzy: bool = True,
368 ) -> None:
369 """Initialize the catalog object.
370
371 :param locale: the locale identifier or `Locale` object, or `None`
372 if the catalog is not bound to a locale (which basically
373 means it's a template)
374 :param domain: the message domain
375 :param header_comment: the header comment as string, or `None` for the
376 default header
377 :param project: the project's name
378 :param version: the project's version
379 :param copyright_holder: the copyright holder of the catalog
380 :param msgid_bugs_address: the email address or URL to submit bug
381 reports to
382 :param creation_date: the date the catalog was created
383 :param revision_date: the date the catalog was revised
384 :param last_translator: the name and email of the last translator
385 :param language_team: the name and email of the language team
386 :param charset: the encoding to use in the output (defaults to utf-8)
387 :param fuzzy: the fuzzy bit on the catalog header
388 """
389 self.domain = domain
390 self.locale = locale
391 self._header_comment = header_comment
392 self._messages: dict[str | tuple[str, str], Message] = {}
393
394 self.project = project or 'PROJECT'
395 self.version = version or 'VERSION'
396 self.copyright_holder = copyright_holder or 'ORGANIZATION'
397 self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS'
398
399 self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>'
400 """Name and email address of the last translator."""
401 self.language_team = language_team or 'LANGUAGE <LL@li.org>'
402 """Name and email address of the language team."""
403
404 self.charset = charset or 'utf-8'
405
406 if creation_date is None:
407 creation_date = datetime.datetime.now(LOCALTZ)
408 elif isinstance(creation_date, datetime.datetime) and not creation_date.tzinfo:
409 creation_date = creation_date.replace(tzinfo=LOCALTZ)
410 self.creation_date = creation_date
411 if revision_date is None:
412 revision_date = 'YEAR-MO-DA HO:MI+ZONE'
413 elif isinstance(revision_date, datetime.datetime) and not revision_date.tzinfo:
414 revision_date = revision_date.replace(tzinfo=LOCALTZ)
415 self.revision_date = revision_date
416 self.fuzzy = fuzzy
417
418 # Dictionary of obsolete messages
419 self.obsolete: dict[str | tuple[str, str], Message] = {}
420 self._num_plurals = None
421 self._plural_expr = None
422
423 def _set_locale(self, locale: Locale | str | None) -> None:
424 if locale is None:
425 self._locale_identifier = None
426 self._locale = None
427 return
428
429 if isinstance(locale, Locale):
430 self._locale_identifier = str(locale)
431 self._locale = locale
432 return
433
434 if isinstance(locale, str):
435 self._locale_identifier = str(locale)
436 try:
437 self._locale = Locale.parse(locale)
438 except UnknownLocaleError:
439 self._locale = None
440 return
441
442 raise TypeError(f"`locale` must be a Locale, a locale identifier string, or None; got {locale!r}")
443
444 def _get_locale(self) -> Locale | None:
445 return self._locale
446
447 def _get_locale_identifier(self) -> str | None:
448 return self._locale_identifier
449
450 locale = property(_get_locale, _set_locale)
451 locale_identifier = property(_get_locale_identifier)
452
453 def _get_header_comment(self) -> str:
454 comment = self._header_comment
455 year = datetime.datetime.now(LOCALTZ).strftime('%Y')
456 if hasattr(self.revision_date, 'strftime'):
457 year = self.revision_date.strftime('%Y')
458 comment = comment.replace('PROJECT', self.project) \
459 .replace('VERSION', self.version) \
460 .replace('YEAR', year) \
461 .replace('ORGANIZATION', self.copyright_holder)
462 locale_name = (self.locale.english_name if self.locale else self.locale_identifier)
463 if locale_name:
464 comment = comment.replace("Translations template", f"{locale_name} translations")
465 return comment
466
467 def _set_header_comment(self, string: str | None) -> None:
468 self._header_comment = string
469
470 header_comment = property(_get_header_comment, _set_header_comment, doc="""\
471 The header comment for the catalog.
472
473 >>> catalog = Catalog(project='Foobar', version='1.0',
474 ... copyright_holder='Foo Company')
475 >>> print(catalog.header_comment) #doctest: +ELLIPSIS
476 # Translations template for Foobar.
477 # Copyright (C) ... Foo Company
478 # This file is distributed under the same license as the Foobar project.
479 # FIRST AUTHOR <EMAIL@ADDRESS>, ....
480 #
481
482 The header can also be set from a string. Any known upper-case variables
483 will be replaced when the header is retrieved again:
484
485 >>> catalog = Catalog(project='Foobar', version='1.0',
486 ... copyright_holder='Foo Company')
487 >>> catalog.header_comment = '''\\
488 ... # The POT for my really cool PROJECT project.
489 ... # Copyright (C) 1990-2003 ORGANIZATION
490 ... # This file is distributed under the same license as the PROJECT
491 ... # project.
492 ... #'''
493 >>> print(catalog.header_comment)
494 # The POT for my really cool Foobar project.
495 # Copyright (C) 1990-2003 Foo Company
496 # This file is distributed under the same license as the Foobar
497 # project.
498 #
499
500 :type: `unicode`
501 """)
502
503 def _get_mime_headers(self) -> list[tuple[str, str]]:
504 if isinstance(self.revision_date, (datetime.datetime, datetime.time, int, float)):
505 revision_date = format_datetime(self.revision_date, 'yyyy-MM-dd HH:mmZ', locale='en')
506 else:
507 revision_date = self.revision_date
508
509 language_team = self.language_team
510 if self.locale_identifier and 'LANGUAGE' in language_team:
511 language_team = language_team.replace('LANGUAGE', str(self.locale_identifier))
512
513 headers: list[tuple[str, str]] = [
514 ("Project-Id-Version", f"{self.project} {self.version}"),
515 ('Report-Msgid-Bugs-To', self.msgid_bugs_address),
516 ('POT-Creation-Date', format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ', locale='en')),
517 ('PO-Revision-Date', revision_date),
518 ('Last-Translator', self.last_translator),
519 ]
520 if self.locale_identifier:
521 headers.append(('Language', str(self.locale_identifier)))
522 headers.append(('Language-Team', language_team))
523 if self.locale is not None:
524 headers.append(('Plural-Forms', self.plural_forms))
525 headers += [
526 ('MIME-Version', '1.0'),
527 ("Content-Type", f"text/plain; charset={self.charset}"),
528 ('Content-Transfer-Encoding', '8bit'),
529 ("Generated-By", f"Babel {VERSION}\n"),
530 ]
531 return headers
532
533 def _set_mime_headers(self, headers: Iterable[tuple[str, str]]) -> None:
534 for name, value in headers:
535 name = _force_text(name.lower(), encoding=self.charset)
536 value = _force_text(value, encoding=self.charset)
537 if name == 'project-id-version':
538 parts = value.split(' ')
539 self.project = ' '.join(parts[:-1])
540 self.version = parts[-1]
541 elif name == 'report-msgid-bugs-to':
542 self.msgid_bugs_address = value
543 elif name == 'last-translator':
544 self.last_translator = value
545 elif name == 'language':
546 value = value.replace('-', '_')
547 # The `or None` makes sure that the locale is set to None
548 # if the header's value is an empty string, which is what
549 # some tools generate (instead of eliding the empty Language
550 # header altogether).
551 self._set_locale(value or None)
552 elif name == 'language-team':
553 self.language_team = value
554 elif name == 'content-type':
555 params = parse_separated_header(value)
556 if 'charset' in params:
557 self.charset = params['charset'].lower()
558 elif name == 'plural-forms':
559 params = parse_separated_header(f" ;{value}")
560 self._num_plurals = int(params.get('nplurals', 2))
561 self._plural_expr = params.get('plural', '(n != 1)')
562 elif name == 'pot-creation-date':
563 self.creation_date = _parse_datetime_header(value)
564 elif name == 'po-revision-date':
565 # Keep the value if it's not the default one
566 if 'YEAR' not in value:
567 self.revision_date = _parse_datetime_header(value)
568
569 mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\
570 The MIME headers of the catalog, used for the special ``msgid ""`` entry.
571
572 The behavior of this property changes slightly depending on whether a locale
573 is set or not, the latter indicating that the catalog is actually a template
574 for actual translations.
575
576 Here's an example of the output for such a catalog template:
577
578 >>> from babel.dates import UTC
579 >>> from datetime import datetime
580 >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC)
581 >>> catalog = Catalog(project='Foobar', version='1.0',
582 ... creation_date=created)
583 >>> for name, value in catalog.mime_headers:
584 ... print('%s: %s' % (name, value))
585 Project-Id-Version: Foobar 1.0
586 Report-Msgid-Bugs-To: EMAIL@ADDRESS
587 POT-Creation-Date: 1990-04-01 15:30+0000
588 PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE
589 Last-Translator: FULL NAME <EMAIL@ADDRESS>
590 Language-Team: LANGUAGE <LL@li.org>
591 MIME-Version: 1.0
592 Content-Type: text/plain; charset=utf-8
593 Content-Transfer-Encoding: 8bit
594 Generated-By: Babel ...
595
596 And here's an example of the output when the locale is set:
597
598 >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC)
599 >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0',
600 ... creation_date=created, revision_date=revised,
601 ... last_translator='John Doe <jd@example.com>',
602 ... language_team='de_DE <de@example.com>')
603 >>> for name, value in catalog.mime_headers:
604 ... print('%s: %s' % (name, value))
605 Project-Id-Version: Foobar 1.0
606 Report-Msgid-Bugs-To: EMAIL@ADDRESS
607 POT-Creation-Date: 1990-04-01 15:30+0000
608 PO-Revision-Date: 1990-08-03 12:00+0000
609 Last-Translator: John Doe <jd@example.com>
610 Language: de_DE
611 Language-Team: de_DE <de@example.com>
612 Plural-Forms: nplurals=2; plural=(n != 1);
613 MIME-Version: 1.0
614 Content-Type: text/plain; charset=utf-8
615 Content-Transfer-Encoding: 8bit
616 Generated-By: Babel ...
617
618 :type: `list`
619 """)
620
621 @property
622 def num_plurals(self) -> int:
623 """The number of plurals used by the catalog or locale.
624
625 >>> Catalog(locale='en').num_plurals
626 2
627 >>> Catalog(locale='ga').num_plurals
628 5
629
630 :type: `int`"""
631 if self._num_plurals is None:
632 num = 2
633 if self.locale:
634 num = get_plural(self.locale)[0]
635 self._num_plurals = num
636 return self._num_plurals
637
638 @property
639 def plural_expr(self) -> str:
640 """The plural expression used by the catalog or locale.
641
642 >>> Catalog(locale='en').plural_expr
643 '(n != 1)'
644 >>> Catalog(locale='ga').plural_expr
645 '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)'
646 >>> Catalog(locale='ding').plural_expr # unknown locale
647 '(n != 1)'
648
649 :type: `str`"""
650 if self._plural_expr is None:
651 expr = '(n != 1)'
652 if self.locale:
653 expr = get_plural(self.locale)[1]
654 self._plural_expr = expr
655 return self._plural_expr
656
657 @property
658 def plural_forms(self) -> str:
659 """Return the plural forms declaration for the locale.
660
661 >>> Catalog(locale='en').plural_forms
662 'nplurals=2; plural=(n != 1);'
663 >>> Catalog(locale='pt_BR').plural_forms
664 'nplurals=2; plural=(n > 1);'
665
666 :type: `str`"""
667 return f"nplurals={self.num_plurals}; plural={self.plural_expr};"
668
669 def __contains__(self, id: _MessageID) -> bool:
670 """Return whether the catalog has a message with the specified ID."""
671 return self._key_for(id) in self._messages
672
673 def __len__(self) -> int:
674 """The number of messages in the catalog.
675
676 This does not include the special ``msgid ""`` entry."""
677 return len(self._messages)
678
679 def __iter__(self) -> Iterator[Message]:
680 """Iterates through all the entries in the catalog, in the order they
681 were added, yielding a `Message` object for every entry.
682
683 :rtype: ``iterator``"""
684 buf = []
685 for name, value in self.mime_headers:
686 buf.append(f"{name}: {value}")
687 flags = set()
688 if self.fuzzy:
689 flags |= {'fuzzy'}
690 yield Message('', '\n'.join(buf), flags=flags)
691 for key in self._messages:
692 yield self._messages[key]
693
694 def __repr__(self) -> str:
695 locale = ''
696 if self.locale:
697 locale = f" {self.locale}"
698 return f"<{type(self).__name__} {self.domain!r}{locale}>"
699
700 def __delitem__(self, id: _MessageID) -> None:
701 """Delete the message with the specified ID."""
702 self.delete(id)
703
704 def __getitem__(self, id: _MessageID) -> Message:
705 """Return the message with the specified ID.
706
707 :param id: the message ID
708 """
709 return self.get(id)
710
711 def __setitem__(self, id: _MessageID, message: Message) -> None:
712 """Add or update the message with the specified ID.
713
714 >>> catalog = Catalog()
715 >>> catalog['foo'] = Message('foo')
716 >>> catalog['foo']
717 <Message 'foo' (flags: [])>
718
719 If a message with that ID is already in the catalog, it is updated
720 to include the locations and flags of the new message.
721
722 >>> catalog = Catalog()
723 >>> catalog['foo'] = Message('foo', locations=[('main.py', 1)])
724 >>> catalog['foo'].locations
725 [('main.py', 1)]
726 >>> catalog['foo'] = Message('foo', locations=[('utils.py', 5)])
727 >>> catalog['foo'].locations
728 [('main.py', 1), ('utils.py', 5)]
729
730 :param id: the message ID
731 :param message: the `Message` object
732 """
733 assert isinstance(message, Message), 'expected a Message object'
734 key = self._key_for(id, message.context)
735 current = self._messages.get(key)
736 if current:
737 if message.pluralizable and not current.pluralizable:
738 # The new message adds pluralization
739 current.id = message.id
740 current.string = message.string
741 current.locations = list(dict.fromkeys([*current.locations, *message.locations]))
742 current.auto_comments = list(dict.fromkeys([*current.auto_comments, *message.auto_comments]))
743 current.user_comments = list(dict.fromkeys([*current.user_comments, *message.user_comments]))
744 current.flags |= message.flags
745 elif id == '':
746 # special treatment for the header message
747 self.mime_headers = message_from_string(message.string).items()
748 self.header_comment = "\n".join([f"# {c}".rstrip() for c in message.user_comments])
749 self.fuzzy = message.fuzzy
750 else:
751 if isinstance(id, (list, tuple)):
752 assert isinstance(message.string, (list, tuple)), \
753 f"Expected sequence but got {type(message.string)}"
754 self._messages[key] = message
755
756 def add(
757 self,
758 id: _MessageID,
759 string: _MessageID | None = None,
760 locations: Iterable[tuple[str, int]] = (),
761 flags: Iterable[str] = (),
762 auto_comments: Iterable[str] = (),
763 user_comments: Iterable[str] = (),
764 previous_id: _MessageID = (),
765 lineno: int | None = None,
766 context: str | None = None,
767 ) -> Message:
768 """Add or update the message with the specified ID.
769
770 >>> catalog = Catalog()
771 >>> catalog.add('foo')
772 <Message ...>
773 >>> catalog['foo']
774 <Message 'foo' (flags: [])>
775
776 This method simply constructs a `Message` object with the given
777 arguments and invokes `__setitem__` with that object.
778
779 :param id: the message ID, or a ``(singular, plural)`` tuple for
780 pluralizable messages
781 :param string: the translated message string, or a
782 ``(singular, plural)`` tuple for pluralizable messages
783 :param locations: a sequence of ``(filename, lineno)`` tuples
784 :param flags: a set or sequence of flags
785 :param auto_comments: a sequence of automatic comments
786 :param user_comments: a sequence of user comments
787 :param previous_id: the previous message ID, or a ``(singular, plural)``
788 tuple for pluralizable messages
789 :param lineno: the line number on which the msgid line was found in the
790 PO file, if any
791 :param context: the message context
792 """
793 message = Message(id, string, list(locations), flags, auto_comments,
794 user_comments, previous_id, lineno=lineno,
795 context=context)
796 self[id] = message
797 return message
798
799 def check(self) -> Iterable[tuple[Message, list[TranslationError]]]:
800 """Run various validation checks on the translations in the catalog.
801
802 For every message which fails validation, this method yield a
803 ``(message, errors)`` tuple, where ``message`` is the `Message` object
804 and ``errors`` is a sequence of `TranslationError` objects.
805
806 :rtype: ``generator`` of ``(message, errors)``
807 """
808 for message in self._messages.values():
809 errors = message.check(catalog=self)
810 if errors:
811 yield message, errors
812
813 def get(self, id: _MessageID, context: str | None = None) -> Message | None:
814 """Return the message with the specified ID and context.
815
816 :param id: the message ID
817 :param context: the message context, or ``None`` for no context
818 """
819 return self._messages.get(self._key_for(id, context))
820
821 def delete(self, id: _MessageID, context: str | None = None) -> None:
822 """Delete the message with the specified ID and context.
823
824 :param id: the message ID
825 :param context: the message context, or ``None`` for no context
826 """
827 key = self._key_for(id, context)
828 if key in self._messages:
829 del self._messages[key]
830
831 def update(
832 self,
833 template: Catalog,
834 no_fuzzy_matching: bool = False,
835 update_header_comment: bool = False,
836 keep_user_comments: bool = True,
837 update_creation_date: bool = True,
838 ) -> None:
839 """Update the catalog based on the given template catalog.
840
841 >>> from babel.messages import Catalog
842 >>> template = Catalog()
843 >>> template.add('green', locations=[('main.py', 99)])
844 <Message ...>
845 >>> template.add('blue', locations=[('main.py', 100)])
846 <Message ...>
847 >>> template.add(('salad', 'salads'), locations=[('util.py', 42)])
848 <Message ...>
849 >>> catalog = Catalog(locale='de_DE')
850 >>> catalog.add('blue', 'blau', locations=[('main.py', 98)])
851 <Message ...>
852 >>> catalog.add('head', 'Kopf', locations=[('util.py', 33)])
853 <Message ...>
854 >>> catalog.add(('salad', 'salads'), ('Salat', 'Salate'),
855 ... locations=[('util.py', 38)])
856 <Message ...>
857
858 >>> catalog.update(template)
859 >>> len(catalog)
860 3
861
862 >>> msg1 = catalog['green']
863 >>> msg1.string
864 >>> msg1.locations
865 [('main.py', 99)]
866
867 >>> msg2 = catalog['blue']
868 >>> msg2.string
869 'blau'
870 >>> msg2.locations
871 [('main.py', 100)]
872
873 >>> msg3 = catalog['salad']
874 >>> msg3.string
875 ('Salat', 'Salate')
876 >>> msg3.locations
877 [('util.py', 42)]
878
879 Messages that are in the catalog but not in the template are removed
880 from the main collection, but can still be accessed via the `obsolete`
881 member:
882
883 >>> 'head' in catalog
884 False
885 >>> list(catalog.obsolete.values())
886 [<Message 'head' (flags: [])>]
887
888 :param template: the reference catalog, usually read from a POT file
889 :param no_fuzzy_matching: whether to use fuzzy matching of message IDs
890 :param update_header_comment: whether to copy the header comment from the template
891 :param keep_user_comments: whether to keep user comments from the old catalog
892 :param update_creation_date: whether to copy the creation date from the template
893 """
894 messages = self._messages
895 remaining = messages.copy()
896 self._messages = {}
897
898 # Prepare for fuzzy matching
899 fuzzy_candidates = {}
900 if not no_fuzzy_matching:
901 for msgid in messages:
902 if msgid and messages[msgid].string:
903 key = self._key_for(msgid)
904 ctxt = messages[msgid].context
905 fuzzy_candidates[self._to_fuzzy_match_key(key)] = (key, ctxt)
906 fuzzy_matches = set()
907
908 def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, str] | str) -> None:
909 message = message.clone()
910 fuzzy = False
911 if oldkey != newkey:
912 fuzzy = True
913 fuzzy_matches.add(oldkey)
914 oldmsg = messages.get(oldkey)
915 assert oldmsg is not None
916 if isinstance(oldmsg.id, str):
917 message.previous_id = [oldmsg.id]
918 else:
919 message.previous_id = list(oldmsg.id)
920 else:
921 oldmsg = remaining.pop(oldkey, None)
922 assert oldmsg is not None
923 message.string = oldmsg.string
924
925 if keep_user_comments and oldmsg.user_comments:
926 message.user_comments = list(dict.fromkeys(oldmsg.user_comments))
927
928 if isinstance(message.id, (list, tuple)):
929 if not isinstance(message.string, (list, tuple)):
930 fuzzy = True
931 message.string = tuple(
932 [message.string] + ([''] * (len(message.id) - 1)),
933 )
934 elif len(message.string) != self.num_plurals:
935 fuzzy = True
936 message.string = tuple(message.string[:len(oldmsg.string)])
937 elif isinstance(message.string, (list, tuple)):
938 fuzzy = True
939 message.string = message.string[0]
940 message.flags |= oldmsg.flags
941 if fuzzy:
942 message.flags |= {'fuzzy'}
943 self[message.id] = message
944
945 for message in template:
946 if message.id:
947 key = self._key_for(message.id, message.context)
948 if key in messages:
949 _merge(message, key, key)
950 else:
951 if not no_fuzzy_matching:
952 # do some fuzzy matching with difflib
953 matches = get_close_matches(
954 self._to_fuzzy_match_key(key),
955 fuzzy_candidates.keys(),
956 1,
957 )
958 if matches:
959 modified_key = matches[0]
960 newkey, newctxt = fuzzy_candidates[modified_key]
961 if newctxt is not None:
962 newkey = newkey, newctxt
963 _merge(message, newkey, key)
964 continue
965
966 self[message.id] = message
967
968 for msgid in remaining:
969 if no_fuzzy_matching or msgid not in fuzzy_matches:
970 self.obsolete[msgid] = remaining[msgid]
971
972 if update_header_comment:
973 # Allow the updated catalog's header to be rewritten based on the
974 # template's header
975 self.header_comment = template.header_comment
976
977 # Make updated catalog's POT-Creation-Date equal to the template
978 # used to update the catalog
979 if update_creation_date:
980 self.creation_date = template.creation_date
981
982 def _to_fuzzy_match_key(self, key: tuple[str, str] | str) -> str:
983 """Converts a message key to a string suitable for fuzzy matching."""
984 if isinstance(key, tuple):
985 matchkey = key[0] # just the msgid, no context
986 else:
987 matchkey = key
988 return matchkey.lower().strip()
989
990 def _key_for(self, id: _MessageID, context: str | None = None) -> tuple[str, str] | str:
991 """The key for a message is just the singular ID even for pluralizable
992 messages, but is a ``(msgid, msgctxt)`` tuple for context-specific
993 messages.
994 """
995 key = id
996 if isinstance(key, (list, tuple)):
997 key = id[0]
998 if context is not None:
999 key = (key, context)
1000 return key
1001
1002 def is_identical(self, other: Catalog) -> bool:
1003 """Checks if catalogs are identical, taking into account messages and
1004 headers.
1005 """
1006 assert isinstance(other, Catalog)
1007 for key in self._messages.keys() | other._messages.keys():
1008 message_1 = self.get(key)
1009 message_2 = other.get(key)
1010 if (
1011 message_1 is None
1012 or message_2 is None
1013 or not message_1.is_identical(message_2)
1014 ):
1015 return False
1016 return dict(self.mime_headers) == dict(other.mime_headers)