Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/babel/messages/catalog.py: 33%
398 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:39 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:39 +0000
1"""
2 babel.messages.catalog
3 ~~~~~~~~~~~~~~~~~~~~~~
5 Data structures for message catalogs.
7 :copyright: (c) 2013-2023 by the Babel Team.
8 :license: BSD, see LICENSE for more details.
9"""
10from __future__ import annotations
12import datetime
13import re
14from collections import OrderedDict
15from collections.abc import Iterable, Iterator
16from copy import copy
17from difflib import SequenceMatcher
18from email import message_from_string
19from heapq import nlargest
20from typing import TYPE_CHECKING
22from babel import __version__ as VERSION
23from babel.core import Locale, UnknownLocaleError
24from babel.dates import format_datetime
25from babel.messages.plurals import get_plural
26from babel.util import LOCALTZ, FixedOffsetTimezone, _cmp, distinct
28if TYPE_CHECKING:
29 from typing_extensions import TypeAlias
31 _MessageID: TypeAlias = str | tuple[str, ...] | list[str]
33__all__ = ['Message', 'Catalog', 'TranslationError']
35def get_close_matches(word, possibilities, n=3, cutoff=0.6):
36 """A modified version of ``difflib.get_close_matches``.
38 It just passes ``autojunk=False`` to the ``SequenceMatcher``, to work
39 around https://github.com/python/cpython/issues/90825.
40 """
41 if not n > 0: # pragma: no cover
42 raise ValueError(f"n must be > 0: {n!r}")
43 if not 0.0 <= cutoff <= 1.0: # pragma: no cover
44 raise ValueError(f"cutoff must be in [0.0, 1.0]: {cutoff!r}")
45 result = []
46 s = SequenceMatcher(autojunk=False) # only line changed from difflib.py
47 s.set_seq2(word)
48 for x in possibilities:
49 s.set_seq1(x)
50 if s.real_quick_ratio() >= cutoff and \
51 s.quick_ratio() >= cutoff and \
52 s.ratio() >= cutoff:
53 result.append((s.ratio(), x))
55 # Move the best scorers to head of list
56 result = nlargest(n, result)
57 # Strip scores for the best n matches
58 return [x for score, x in result]
61PYTHON_FORMAT = re.compile(r'''
62 \%
63 (?:\(([\w]*)\))?
64 (
65 [-#0\ +]?(?:\*|[\d]+)?
66 (?:\.(?:\*|[\d]+))?
67 [hlL]?
68 )
69 ([diouxXeEfFgGcrs%])
70''', re.VERBOSE)
73def _parse_datetime_header(value: str) -> datetime.datetime:
74 match = re.match(r'^(?P<datetime>.*?)(?P<tzoffset>[+-]\d{4})?$', value)
76 dt = datetime.datetime.strptime(match.group('datetime'), '%Y-%m-%d %H:%M')
78 # Separate the offset into a sign component, hours, and # minutes
79 tzoffset = match.group('tzoffset')
80 if tzoffset is not None:
81 plus_minus_s, rest = tzoffset[0], tzoffset[1:]
82 hours_offset_s, mins_offset_s = rest[:2], rest[2:]
84 # Make them all integers
85 plus_minus = int(f"{plus_minus_s}1")
86 hours_offset = int(hours_offset_s)
87 mins_offset = int(mins_offset_s)
89 # Calculate net offset
90 net_mins_offset = hours_offset * 60
91 net_mins_offset += mins_offset
92 net_mins_offset *= plus_minus
94 # Create an offset object
95 tzoffset = FixedOffsetTimezone(net_mins_offset)
97 # Store the offset in a datetime object
98 dt = dt.replace(tzinfo=tzoffset)
100 return dt
103class Message:
104 """Representation of a single message in a catalog."""
106 def __init__(
107 self,
108 id: _MessageID,
109 string: _MessageID | None = '',
110 locations: Iterable[tuple[str, int]] = (),
111 flags: Iterable[str] = (),
112 auto_comments: Iterable[str] = (),
113 user_comments: Iterable[str] = (),
114 previous_id: _MessageID = (),
115 lineno: int | None = None,
116 context: str | None = None,
117 ) -> None:
118 """Create the message object.
120 :param id: the message ID, or a ``(singular, plural)`` tuple for
121 pluralizable messages
122 :param string: the translated message string, or a
123 ``(singular, plural)`` tuple for pluralizable messages
124 :param locations: a sequence of ``(filename, lineno)`` tuples
125 :param flags: a set or sequence of flags
126 :param auto_comments: a sequence of automatic comments for the message
127 :param user_comments: a sequence of user comments for the message
128 :param previous_id: the previous message ID, or a ``(singular, plural)``
129 tuple for pluralizable messages
130 :param lineno: the line number on which the msgid line was found in the
131 PO file, if any
132 :param context: the message context
133 """
134 self.id = id
135 if not string and self.pluralizable:
136 string = ('', '')
137 self.string = string
138 self.locations = list(distinct(locations))
139 self.flags = set(flags)
140 if id and self.python_format:
141 self.flags.add('python-format')
142 else:
143 self.flags.discard('python-format')
144 self.auto_comments = list(distinct(auto_comments))
145 self.user_comments = list(distinct(user_comments))
146 if isinstance(previous_id, str):
147 self.previous_id = [previous_id]
148 else:
149 self.previous_id = list(previous_id)
150 self.lineno = lineno
151 self.context = context
153 def __repr__(self) -> str:
154 return f"<{type(self).__name__} {self.id!r} (flags: {list(self.flags)!r})>"
156 def __cmp__(self, other: object) -> int:
157 """Compare Messages, taking into account plural ids"""
158 def values_to_compare(obj):
159 if isinstance(obj, Message) and obj.pluralizable:
160 return obj.id[0], obj.context or ''
161 return obj.id, obj.context or ''
162 return _cmp(values_to_compare(self), values_to_compare(other))
164 def __gt__(self, other: object) -> bool:
165 return self.__cmp__(other) > 0
167 def __lt__(self, other: object) -> bool:
168 return self.__cmp__(other) < 0
170 def __ge__(self, other: object) -> bool:
171 return self.__cmp__(other) >= 0
173 def __le__(self, other: object) -> bool:
174 return self.__cmp__(other) <= 0
176 def __eq__(self, other: object) -> bool:
177 return self.__cmp__(other) == 0
179 def __ne__(self, other: object) -> bool:
180 return self.__cmp__(other) != 0
182 def is_identical(self, other: Message) -> bool:
183 """Checks whether messages are identical, taking into account all
184 properties.
185 """
186 assert isinstance(other, Message)
187 return self.__dict__ == other.__dict__
189 def clone(self) -> Message:
190 return Message(*map(copy, (self.id, self.string, self.locations,
191 self.flags, self.auto_comments,
192 self.user_comments, self.previous_id,
193 self.lineno, self.context)))
195 def check(self, catalog: Catalog | None = None) -> list[TranslationError]:
196 """Run various validation checks on the message. Some validations
197 are only performed if the catalog is provided. This method returns
198 a sequence of `TranslationError` objects.
200 :rtype: ``iterator``
201 :param catalog: A catalog instance that is passed to the checkers
202 :see: `Catalog.check` for a way to perform checks for all messages
203 in a catalog.
204 """
205 from babel.messages.checkers import checkers
206 errors: list[TranslationError] = []
207 for checker in checkers:
208 try:
209 checker(catalog, self)
210 except TranslationError as e:
211 errors.append(e)
212 return errors
214 @property
215 def fuzzy(self) -> bool:
216 """Whether the translation is fuzzy.
218 >>> Message('foo').fuzzy
219 False
220 >>> msg = Message('foo', 'foo', flags=['fuzzy'])
221 >>> msg.fuzzy
222 True
223 >>> msg
224 <Message 'foo' (flags: ['fuzzy'])>
226 :type: `bool`"""
227 return 'fuzzy' in self.flags
229 @property
230 def pluralizable(self) -> bool:
231 """Whether the message is plurizable.
233 >>> Message('foo').pluralizable
234 False
235 >>> Message(('foo', 'bar')).pluralizable
236 True
238 :type: `bool`"""
239 return isinstance(self.id, (list, tuple))
241 @property
242 def python_format(self) -> bool:
243 """Whether the message contains Python-style parameters.
245 >>> Message('foo %(name)s bar').python_format
246 True
247 >>> Message(('foo %(name)s', 'foo %(name)s')).python_format
248 True
250 :type: `bool`"""
251 ids = self.id
252 if not isinstance(ids, (list, tuple)):
253 ids = [ids]
254 return any(PYTHON_FORMAT.search(id) for id in ids)
257class TranslationError(Exception):
258 """Exception thrown by translation checkers when invalid message
259 translations are encountered."""
262DEFAULT_HEADER = """\
263# Translations template for PROJECT.
264# Copyright (C) YEAR ORGANIZATION
265# This file is distributed under the same license as the PROJECT project.
266# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
267#"""
270def parse_separated_header(value: str) -> dict[str, str]:
271 # Adapted from https://peps.python.org/pep-0594/#cgi
272 from email.message import Message
273 m = Message()
274 m['content-type'] = value
275 return dict(m.get_params())
278class Catalog:
279 """Representation of a message catalog."""
281 def __init__(
282 self,
283 locale: str | Locale | None = None,
284 domain: str | None = None,
285 header_comment: str | None = DEFAULT_HEADER,
286 project: str | None = None,
287 version: str | None = None,
288 copyright_holder: str | None = None,
289 msgid_bugs_address: str | None = None,
290 creation_date: datetime.datetime | str | None = None,
291 revision_date: datetime.datetime | datetime.time | float | str | None = None,
292 last_translator: str | None = None,
293 language_team: str | None = None,
294 charset: str | None = None,
295 fuzzy: bool = True,
296 ) -> None:
297 """Initialize the catalog object.
299 :param locale: the locale identifier or `Locale` object, or `None`
300 if the catalog is not bound to a locale (which basically
301 means it's a template)
302 :param domain: the message domain
303 :param header_comment: the header comment as string, or `None` for the
304 default header
305 :param project: the project's name
306 :param version: the project's version
307 :param copyright_holder: the copyright holder of the catalog
308 :param msgid_bugs_address: the email address or URL to submit bug
309 reports to
310 :param creation_date: the date the catalog was created
311 :param revision_date: the date the catalog was revised
312 :param last_translator: the name and email of the last translator
313 :param language_team: the name and email of the language team
314 :param charset: the encoding to use in the output (defaults to utf-8)
315 :param fuzzy: the fuzzy bit on the catalog header
316 """
317 self.domain = domain
318 self.locale = locale
319 self._header_comment = header_comment
320 self._messages: OrderedDict[str | tuple[str, str], Message] = OrderedDict()
322 self.project = project or 'PROJECT'
323 self.version = version or 'VERSION'
324 self.copyright_holder = copyright_holder or 'ORGANIZATION'
325 self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS'
327 self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>'
328 """Name and email address of the last translator."""
329 self.language_team = language_team or 'LANGUAGE <LL@li.org>'
330 """Name and email address of the language team."""
332 self.charset = charset or 'utf-8'
334 if creation_date is None:
335 creation_date = datetime.datetime.now(LOCALTZ)
336 elif isinstance(creation_date, datetime.datetime) and not creation_date.tzinfo:
337 creation_date = creation_date.replace(tzinfo=LOCALTZ)
338 self.creation_date = creation_date
339 if revision_date is None:
340 revision_date = 'YEAR-MO-DA HO:MI+ZONE'
341 elif isinstance(revision_date, datetime.datetime) and not revision_date.tzinfo:
342 revision_date = revision_date.replace(tzinfo=LOCALTZ)
343 self.revision_date = revision_date
344 self.fuzzy = fuzzy
346 # Dictionary of obsolete messages
347 self.obsolete: OrderedDict[str | tuple[str, str], Message] = OrderedDict()
348 self._num_plurals = None
349 self._plural_expr = None
351 def _set_locale(self, locale: Locale | str | None) -> None:
352 if locale is None:
353 self._locale_identifier = None
354 self._locale = None
355 return
357 if isinstance(locale, Locale):
358 self._locale_identifier = str(locale)
359 self._locale = locale
360 return
362 if isinstance(locale, str):
363 self._locale_identifier = str(locale)
364 try:
365 self._locale = Locale.parse(locale)
366 except UnknownLocaleError:
367 self._locale = None
368 return
370 raise TypeError(f"`locale` must be a Locale, a locale identifier string, or None; got {locale!r}")
372 def _get_locale(self) -> Locale | None:
373 return self._locale
375 def _get_locale_identifier(self) -> str | None:
376 return self._locale_identifier
378 locale = property(_get_locale, _set_locale)
379 locale_identifier = property(_get_locale_identifier)
381 def _get_header_comment(self) -> str:
382 comment = self._header_comment
383 year = datetime.datetime.now(LOCALTZ).strftime('%Y')
384 if hasattr(self.revision_date, 'strftime'):
385 year = self.revision_date.strftime('%Y')
386 comment = comment.replace('PROJECT', self.project) \
387 .replace('VERSION', self.version) \
388 .replace('YEAR', year) \
389 .replace('ORGANIZATION', self.copyright_holder)
390 locale_name = (self.locale.english_name if self.locale else self.locale_identifier)
391 if locale_name:
392 comment = comment.replace("Translations template", f"{locale_name} translations")
393 return comment
395 def _set_header_comment(self, string: str | None) -> None:
396 self._header_comment = string
398 header_comment = property(_get_header_comment, _set_header_comment, doc="""\
399 The header comment for the catalog.
401 >>> catalog = Catalog(project='Foobar', version='1.0',
402 ... copyright_holder='Foo Company')
403 >>> print(catalog.header_comment) #doctest: +ELLIPSIS
404 # Translations template for Foobar.
405 # Copyright (C) ... Foo Company
406 # This file is distributed under the same license as the Foobar project.
407 # FIRST AUTHOR <EMAIL@ADDRESS>, ....
408 #
410 The header can also be set from a string. Any known upper-case variables
411 will be replaced when the header is retrieved again:
413 >>> catalog = Catalog(project='Foobar', version='1.0',
414 ... copyright_holder='Foo Company')
415 >>> catalog.header_comment = '''\\
416 ... # The POT for my really cool PROJECT project.
417 ... # Copyright (C) 1990-2003 ORGANIZATION
418 ... # This file is distributed under the same license as the PROJECT
419 ... # project.
420 ... #'''
421 >>> print(catalog.header_comment)
422 # The POT for my really cool Foobar project.
423 # Copyright (C) 1990-2003 Foo Company
424 # This file is distributed under the same license as the Foobar
425 # project.
426 #
428 :type: `unicode`
429 """)
431 def _get_mime_headers(self) -> list[tuple[str, str]]:
432 headers: list[tuple[str, str]] = []
433 headers.append(("Project-Id-Version", f"{self.project} {self.version}"))
434 headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address))
435 headers.append(('POT-Creation-Date',
436 format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ',
437 locale='en')))
438 if isinstance(self.revision_date, (datetime.datetime, datetime.time, int, float)):
439 headers.append(('PO-Revision-Date',
440 format_datetime(self.revision_date,
441 'yyyy-MM-dd HH:mmZ', locale='en')))
442 else:
443 headers.append(('PO-Revision-Date', self.revision_date))
444 headers.append(('Last-Translator', self.last_translator))
445 if self.locale_identifier:
446 headers.append(('Language', str(self.locale_identifier)))
447 if self.locale_identifier and ('LANGUAGE' in self.language_team):
448 headers.append(('Language-Team',
449 self.language_team.replace('LANGUAGE',
450 str(self.locale_identifier))))
451 else:
452 headers.append(('Language-Team', self.language_team))
453 if self.locale is not None:
454 headers.append(('Plural-Forms', self.plural_forms))
455 headers.append(('MIME-Version', '1.0'))
456 headers.append(("Content-Type", f"text/plain; charset={self.charset}"))
457 headers.append(('Content-Transfer-Encoding', '8bit'))
458 headers.append(("Generated-By", f"Babel {VERSION}\n"))
459 return headers
461 def _force_text(self, s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict') -> str:
462 if isinstance(s, str):
463 return s
464 if isinstance(s, bytes):
465 return s.decode(encoding, errors)
466 return str(s)
468 def _set_mime_headers(self, headers: Iterable[tuple[str, str]]) -> None:
469 for name, value in headers:
470 name = self._force_text(name.lower(), encoding=self.charset)
471 value = self._force_text(value, encoding=self.charset)
472 if name == 'project-id-version':
473 parts = value.split(' ')
474 self.project = ' '.join(parts[:-1])
475 self.version = parts[-1]
476 elif name == 'report-msgid-bugs-to':
477 self.msgid_bugs_address = value
478 elif name == 'last-translator':
479 self.last_translator = value
480 elif name == 'language':
481 value = value.replace('-', '_')
482 self._set_locale(value)
483 elif name == 'language-team':
484 self.language_team = value
485 elif name == 'content-type':
486 params = parse_separated_header(value)
487 if 'charset' in params:
488 self.charset = params['charset'].lower()
489 elif name == 'plural-forms':
490 params = parse_separated_header(f" ;{value}")
491 self._num_plurals = int(params.get('nplurals', 2))
492 self._plural_expr = params.get('plural', '(n != 1)')
493 elif name == 'pot-creation-date':
494 self.creation_date = _parse_datetime_header(value)
495 elif name == 'po-revision-date':
496 # Keep the value if it's not the default one
497 if 'YEAR' not in value:
498 self.revision_date = _parse_datetime_header(value)
500 mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\
501 The MIME headers of the catalog, used for the special ``msgid ""`` entry.
503 The behavior of this property changes slightly depending on whether a locale
504 is set or not, the latter indicating that the catalog is actually a template
505 for actual translations.
507 Here's an example of the output for such a catalog template:
509 >>> from babel.dates import UTC
510 >>> from datetime import datetime
511 >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC)
512 >>> catalog = Catalog(project='Foobar', version='1.0',
513 ... creation_date=created)
514 >>> for name, value in catalog.mime_headers:
515 ... print('%s: %s' % (name, value))
516 Project-Id-Version: Foobar 1.0
517 Report-Msgid-Bugs-To: EMAIL@ADDRESS
518 POT-Creation-Date: 1990-04-01 15:30+0000
519 PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE
520 Last-Translator: FULL NAME <EMAIL@ADDRESS>
521 Language-Team: LANGUAGE <LL@li.org>
522 MIME-Version: 1.0
523 Content-Type: text/plain; charset=utf-8
524 Content-Transfer-Encoding: 8bit
525 Generated-By: Babel ...
527 And here's an example of the output when the locale is set:
529 >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC)
530 >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0',
531 ... creation_date=created, revision_date=revised,
532 ... last_translator='John Doe <jd@example.com>',
533 ... language_team='de_DE <de@example.com>')
534 >>> for name, value in catalog.mime_headers:
535 ... print('%s: %s' % (name, value))
536 Project-Id-Version: Foobar 1.0
537 Report-Msgid-Bugs-To: EMAIL@ADDRESS
538 POT-Creation-Date: 1990-04-01 15:30+0000
539 PO-Revision-Date: 1990-08-03 12:00+0000
540 Last-Translator: John Doe <jd@example.com>
541 Language: de_DE
542 Language-Team: de_DE <de@example.com>
543 Plural-Forms: nplurals=2; plural=(n != 1);
544 MIME-Version: 1.0
545 Content-Type: text/plain; charset=utf-8
546 Content-Transfer-Encoding: 8bit
547 Generated-By: Babel ...
549 :type: `list`
550 """)
552 @property
553 def num_plurals(self) -> int:
554 """The number of plurals used by the catalog or locale.
556 >>> Catalog(locale='en').num_plurals
557 2
558 >>> Catalog(locale='ga').num_plurals
559 5
561 :type: `int`"""
562 if self._num_plurals is None:
563 num = 2
564 if self.locale:
565 num = get_plural(self.locale)[0]
566 self._num_plurals = num
567 return self._num_plurals
569 @property
570 def plural_expr(self) -> str:
571 """The plural expression used by the catalog or locale.
573 >>> Catalog(locale='en').plural_expr
574 '(n != 1)'
575 >>> Catalog(locale='ga').plural_expr
576 '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)'
577 >>> Catalog(locale='ding').plural_expr # unknown locale
578 '(n != 1)'
580 :type: `str`"""
581 if self._plural_expr is None:
582 expr = '(n != 1)'
583 if self.locale:
584 expr = get_plural(self.locale)[1]
585 self._plural_expr = expr
586 return self._plural_expr
588 @property
589 def plural_forms(self) -> str:
590 """Return the plural forms declaration for the locale.
592 >>> Catalog(locale='en').plural_forms
593 'nplurals=2; plural=(n != 1);'
594 >>> Catalog(locale='pt_BR').plural_forms
595 'nplurals=2; plural=(n > 1);'
597 :type: `str`"""
598 return f"nplurals={self.num_plurals}; plural={self.plural_expr};"
600 def __contains__(self, id: _MessageID) -> bool:
601 """Return whether the catalog has a message with the specified ID."""
602 return self._key_for(id) in self._messages
604 def __len__(self) -> int:
605 """The number of messages in the catalog.
607 This does not include the special ``msgid ""`` entry."""
608 return len(self._messages)
610 def __iter__(self) -> Iterator[Message]:
611 """Iterates through all the entries in the catalog, in the order they
612 were added, yielding a `Message` object for every entry.
614 :rtype: ``iterator``"""
615 buf = []
616 for name, value in self.mime_headers:
617 buf.append(f"{name}: {value}")
618 flags = set()
619 if self.fuzzy:
620 flags |= {'fuzzy'}
621 yield Message('', '\n'.join(buf), flags=flags)
622 for key in self._messages:
623 yield self._messages[key]
625 def __repr__(self) -> str:
626 locale = ''
627 if self.locale:
628 locale = f" {self.locale}"
629 return f"<{type(self).__name__} {self.domain!r}{locale}>"
631 def __delitem__(self, id: _MessageID) -> None:
632 """Delete the message with the specified ID."""
633 self.delete(id)
635 def __getitem__(self, id: _MessageID) -> Message:
636 """Return the message with the specified ID.
638 :param id: the message ID
639 """
640 return self.get(id)
642 def __setitem__(self, id: _MessageID, message: Message) -> None:
643 """Add or update the message with the specified ID.
645 >>> catalog = Catalog()
646 >>> catalog[u'foo'] = Message(u'foo')
647 >>> catalog[u'foo']
648 <Message u'foo' (flags: [])>
650 If a message with that ID is already in the catalog, it is updated
651 to include the locations and flags of the new message.
653 >>> catalog = Catalog()
654 >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)])
655 >>> catalog[u'foo'].locations
656 [('main.py', 1)]
657 >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)])
658 >>> catalog[u'foo'].locations
659 [('main.py', 1), ('utils.py', 5)]
661 :param id: the message ID
662 :param message: the `Message` object
663 """
664 assert isinstance(message, Message), 'expected a Message object'
665 key = self._key_for(id, message.context)
666 current = self._messages.get(key)
667 if current:
668 if message.pluralizable and not current.pluralizable:
669 # The new message adds pluralization
670 current.id = message.id
671 current.string = message.string
672 current.locations = list(distinct(current.locations +
673 message.locations))
674 current.auto_comments = list(distinct(current.auto_comments +
675 message.auto_comments))
676 current.user_comments = list(distinct(current.user_comments +
677 message.user_comments))
678 current.flags |= message.flags
679 message = current
680 elif id == '':
681 # special treatment for the header message
682 self.mime_headers = message_from_string(message.string).items()
683 self.header_comment = "\n".join([f"# {c}".rstrip() for c in message.user_comments])
684 self.fuzzy = message.fuzzy
685 else:
686 if isinstance(id, (list, tuple)):
687 assert isinstance(message.string, (list, tuple)), \
688 f"Expected sequence but got {type(message.string)}"
689 self._messages[key] = message
691 def add(
692 self,
693 id: _MessageID,
694 string: _MessageID | None = None,
695 locations: Iterable[tuple[str, int]] = (),
696 flags: Iterable[str] = (),
697 auto_comments: Iterable[str] = (),
698 user_comments: Iterable[str] = (),
699 previous_id: _MessageID = (),
700 lineno: int | None = None,
701 context: str | None = None,
702 ) -> Message:
703 """Add or update the message with the specified ID.
705 >>> catalog = Catalog()
706 >>> catalog.add(u'foo')
707 <Message ...>
708 >>> catalog[u'foo']
709 <Message u'foo' (flags: [])>
711 This method simply constructs a `Message` object with the given
712 arguments and invokes `__setitem__` with that object.
714 :param id: the message ID, or a ``(singular, plural)`` tuple for
715 pluralizable messages
716 :param string: the translated message string, or a
717 ``(singular, plural)`` tuple for pluralizable messages
718 :param locations: a sequence of ``(filename, lineno)`` tuples
719 :param flags: a set or sequence of flags
720 :param auto_comments: a sequence of automatic comments
721 :param user_comments: a sequence of user comments
722 :param previous_id: the previous message ID, or a ``(singular, plural)``
723 tuple for pluralizable messages
724 :param lineno: the line number on which the msgid line was found in the
725 PO file, if any
726 :param context: the message context
727 """
728 message = Message(id, string, list(locations), flags, auto_comments,
729 user_comments, previous_id, lineno=lineno,
730 context=context)
731 self[id] = message
732 return message
734 def check(self) -> Iterable[tuple[Message, list[TranslationError]]]:
735 """Run various validation checks on the translations in the catalog.
737 For every message which fails validation, this method yield a
738 ``(message, errors)`` tuple, where ``message`` is the `Message` object
739 and ``errors`` is a sequence of `TranslationError` objects.
741 :rtype: ``generator`` of ``(message, errors)``
742 """
743 for message in self._messages.values():
744 errors = message.check(catalog=self)
745 if errors:
746 yield message, errors
748 def get(self, id: _MessageID, context: str | None = None) -> Message | None:
749 """Return the message with the specified ID and context.
751 :param id: the message ID
752 :param context: the message context, or ``None`` for no context
753 """
754 return self._messages.get(self._key_for(id, context))
756 def delete(self, id: _MessageID, context: str | None = None) -> None:
757 """Delete the message with the specified ID and context.
759 :param id: the message ID
760 :param context: the message context, or ``None`` for no context
761 """
762 key = self._key_for(id, context)
763 if key in self._messages:
764 del self._messages[key]
766 def update(
767 self,
768 template: Catalog,
769 no_fuzzy_matching: bool = False,
770 update_header_comment: bool = False,
771 keep_user_comments: bool = True,
772 ) -> None:
773 """Update the catalog based on the given template catalog.
775 >>> from babel.messages import Catalog
776 >>> template = Catalog()
777 >>> template.add('green', locations=[('main.py', 99)])
778 <Message ...>
779 >>> template.add('blue', locations=[('main.py', 100)])
780 <Message ...>
781 >>> template.add(('salad', 'salads'), locations=[('util.py', 42)])
782 <Message ...>
783 >>> catalog = Catalog(locale='de_DE')
784 >>> catalog.add('blue', u'blau', locations=[('main.py', 98)])
785 <Message ...>
786 >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)])
787 <Message ...>
788 >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'),
789 ... locations=[('util.py', 38)])
790 <Message ...>
792 >>> catalog.update(template)
793 >>> len(catalog)
794 3
796 >>> msg1 = catalog['green']
797 >>> msg1.string
798 >>> msg1.locations
799 [('main.py', 99)]
801 >>> msg2 = catalog['blue']
802 >>> msg2.string
803 u'blau'
804 >>> msg2.locations
805 [('main.py', 100)]
807 >>> msg3 = catalog['salad']
808 >>> msg3.string
809 (u'Salat', u'Salate')
810 >>> msg3.locations
811 [('util.py', 42)]
813 Messages that are in the catalog but not in the template are removed
814 from the main collection, but can still be accessed via the `obsolete`
815 member:
817 >>> 'head' in catalog
818 False
819 >>> list(catalog.obsolete.values())
820 [<Message 'head' (flags: [])>]
822 :param template: the reference catalog, usually read from a POT file
823 :param no_fuzzy_matching: whether to use fuzzy matching of message IDs
824 """
825 messages = self._messages
826 remaining = messages.copy()
827 self._messages = OrderedDict()
829 # Prepare for fuzzy matching
830 fuzzy_candidates = {}
831 if not no_fuzzy_matching:
832 for msgid in messages:
833 if msgid and messages[msgid].string:
834 key = self._key_for(msgid)
835 ctxt = messages[msgid].context
836 fuzzy_candidates[self._to_fuzzy_match_key(key)] = (key, ctxt)
837 fuzzy_matches = set()
839 def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, str] | str) -> None:
840 message = message.clone()
841 fuzzy = False
842 if oldkey != newkey:
843 fuzzy = True
844 fuzzy_matches.add(oldkey)
845 oldmsg = messages.get(oldkey)
846 assert oldmsg is not None
847 if isinstance(oldmsg.id, str):
848 message.previous_id = [oldmsg.id]
849 else:
850 message.previous_id = list(oldmsg.id)
851 else:
852 oldmsg = remaining.pop(oldkey, None)
853 assert oldmsg is not None
854 message.string = oldmsg.string
856 if keep_user_comments:
857 message.user_comments = list(distinct(oldmsg.user_comments))
859 if isinstance(message.id, (list, tuple)):
860 if not isinstance(message.string, (list, tuple)):
861 fuzzy = True
862 message.string = tuple(
863 [message.string] + ([''] * (len(message.id) - 1))
864 )
865 elif len(message.string) != self.num_plurals:
866 fuzzy = True
867 message.string = tuple(message.string[:len(oldmsg.string)])
868 elif isinstance(message.string, (list, tuple)):
869 fuzzy = True
870 message.string = message.string[0]
871 message.flags |= oldmsg.flags
872 if fuzzy:
873 message.flags |= {'fuzzy'}
874 self[message.id] = message
876 for message in template:
877 if message.id:
878 key = self._key_for(message.id, message.context)
879 if key in messages:
880 _merge(message, key, key)
881 else:
882 if not no_fuzzy_matching:
883 # do some fuzzy matching with difflib
884 matches = get_close_matches(
885 self._to_fuzzy_match_key(key),
886 fuzzy_candidates.keys(),
887 1,
888 )
889 if matches:
890 modified_key = matches[0]
891 newkey, newctxt = fuzzy_candidates[modified_key]
892 if newctxt is not None:
893 newkey = newkey, newctxt
894 _merge(message, newkey, key)
895 continue
897 self[message.id] = message
899 for msgid in remaining:
900 if no_fuzzy_matching or msgid not in fuzzy_matches:
901 self.obsolete[msgid] = remaining[msgid]
903 if update_header_comment:
904 # Allow the updated catalog's header to be rewritten based on the
905 # template's header
906 self.header_comment = template.header_comment
908 # Make updated catalog's POT-Creation-Date equal to the template
909 # used to update the catalog
910 self.creation_date = template.creation_date
912 def _to_fuzzy_match_key(self, key: tuple[str, str] | str) -> str:
913 """Converts a message key to a string suitable for fuzzy matching."""
914 if isinstance(key, tuple):
915 matchkey = key[0] # just the msgid, no context
916 else:
917 matchkey = key
918 return matchkey.lower().strip()
920 def _key_for(self, id: _MessageID, context: str | None = None) -> tuple[str, str] | str:
921 """The key for a message is just the singular ID even for pluralizable
922 messages, but is a ``(msgid, msgctxt)`` tuple for context-specific
923 messages.
924 """
925 key = id
926 if isinstance(key, (list, tuple)):
927 key = id[0]
928 if context is not None:
929 key = (key, context)
930 return key
932 def is_identical(self, other: Catalog) -> bool:
933 """Checks if catalogs are identical, taking into account messages and
934 headers.
935 """
936 assert isinstance(other, Catalog)
937 for key in self._messages.keys() | other._messages.keys():
938 message_1 = self.get(key)
939 message_2 = other.get(key)
940 if (
941 message_1 is None
942 or message_2 is None
943 or not message_1.is_identical(message_2)
944 ):
945 return False
946 return dict(self.mime_headers) == dict(other.mime_headers)