Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/babel/messages/catalog.py: 33%

398 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:39 +0000

1""" 

2 babel.messages.catalog 

3 ~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Data structures for message catalogs. 

6 

7 :copyright: (c) 2013-2023 by the Babel Team. 

8 :license: BSD, see LICENSE for more details. 

9""" 

10from __future__ import annotations 

11 

12import datetime 

13import re 

14from collections import OrderedDict 

15from collections.abc import Iterable, Iterator 

16from copy import copy 

17from difflib import SequenceMatcher 

18from email import message_from_string 

19from heapq import nlargest 

20from typing import TYPE_CHECKING 

21 

22from babel import __version__ as VERSION 

23from babel.core import Locale, UnknownLocaleError 

24from babel.dates import format_datetime 

25from babel.messages.plurals import get_plural 

26from babel.util import LOCALTZ, FixedOffsetTimezone, _cmp, distinct 

27 

28if TYPE_CHECKING: 

29 from typing_extensions import TypeAlias 

30 

31 _MessageID: TypeAlias = str | tuple[str, ...] | list[str] 

32 

33__all__ = ['Message', 'Catalog', 'TranslationError'] 

34 

35def get_close_matches(word, possibilities, n=3, cutoff=0.6): 

36 """A modified version of ``difflib.get_close_matches``. 

37 

38 It just passes ``autojunk=False`` to the ``SequenceMatcher``, to work 

39 around https://github.com/python/cpython/issues/90825. 

40 """ 

41 if not n > 0: # pragma: no cover 

42 raise ValueError(f"n must be > 0: {n!r}") 

43 if not 0.0 <= cutoff <= 1.0: # pragma: no cover 

44 raise ValueError(f"cutoff must be in [0.0, 1.0]: {cutoff!r}") 

45 result = [] 

46 s = SequenceMatcher(autojunk=False) # only line changed from difflib.py 

47 s.set_seq2(word) 

48 for x in possibilities: 

49 s.set_seq1(x) 

50 if s.real_quick_ratio() >= cutoff and \ 

51 s.quick_ratio() >= cutoff and \ 

52 s.ratio() >= cutoff: 

53 result.append((s.ratio(), x)) 

54 

55 # Move the best scorers to head of list 

56 result = nlargest(n, result) 

57 # Strip scores for the best n matches 

58 return [x for score, x in result] 

59 

60 

61PYTHON_FORMAT = re.compile(r''' 

62 \% 

63 (?:\(([\w]*)\))? 

64 ( 

65 [-#0\ +]?(?:\*|[\d]+)? 

66 (?:\.(?:\*|[\d]+))? 

67 [hlL]? 

68 ) 

69 ([diouxXeEfFgGcrs%]) 

70''', re.VERBOSE) 

71 

72 

73def _parse_datetime_header(value: str) -> datetime.datetime: 

74 match = re.match(r'^(?P<datetime>.*?)(?P<tzoffset>[+-]\d{4})?$', value) 

75 

76 dt = datetime.datetime.strptime(match.group('datetime'), '%Y-%m-%d %H:%M') 

77 

78 # Separate the offset into a sign component, hours, and # minutes 

79 tzoffset = match.group('tzoffset') 

80 if tzoffset is not None: 

81 plus_minus_s, rest = tzoffset[0], tzoffset[1:] 

82 hours_offset_s, mins_offset_s = rest[:2], rest[2:] 

83 

84 # Make them all integers 

85 plus_minus = int(f"{plus_minus_s}1") 

86 hours_offset = int(hours_offset_s) 

87 mins_offset = int(mins_offset_s) 

88 

89 # Calculate net offset 

90 net_mins_offset = hours_offset * 60 

91 net_mins_offset += mins_offset 

92 net_mins_offset *= plus_minus 

93 

94 # Create an offset object 

95 tzoffset = FixedOffsetTimezone(net_mins_offset) 

96 

97 # Store the offset in a datetime object 

98 dt = dt.replace(tzinfo=tzoffset) 

99 

100 return dt 

101 

102 

103class Message: 

104 """Representation of a single message in a catalog.""" 

105 

106 def __init__( 

107 self, 

108 id: _MessageID, 

109 string: _MessageID | None = '', 

110 locations: Iterable[tuple[str, int]] = (), 

111 flags: Iterable[str] = (), 

112 auto_comments: Iterable[str] = (), 

113 user_comments: Iterable[str] = (), 

114 previous_id: _MessageID = (), 

115 lineno: int | None = None, 

116 context: str | None = None, 

117 ) -> None: 

118 """Create the message object. 

119 

120 :param id: the message ID, or a ``(singular, plural)`` tuple for 

121 pluralizable messages 

122 :param string: the translated message string, or a 

123 ``(singular, plural)`` tuple for pluralizable messages 

124 :param locations: a sequence of ``(filename, lineno)`` tuples 

125 :param flags: a set or sequence of flags 

126 :param auto_comments: a sequence of automatic comments for the message 

127 :param user_comments: a sequence of user comments for the message 

128 :param previous_id: the previous message ID, or a ``(singular, plural)`` 

129 tuple for pluralizable messages 

130 :param lineno: the line number on which the msgid line was found in the 

131 PO file, if any 

132 :param context: the message context 

133 """ 

134 self.id = id 

135 if not string and self.pluralizable: 

136 string = ('', '') 

137 self.string = string 

138 self.locations = list(distinct(locations)) 

139 self.flags = set(flags) 

140 if id and self.python_format: 

141 self.flags.add('python-format') 

142 else: 

143 self.flags.discard('python-format') 

144 self.auto_comments = list(distinct(auto_comments)) 

145 self.user_comments = list(distinct(user_comments)) 

146 if isinstance(previous_id, str): 

147 self.previous_id = [previous_id] 

148 else: 

149 self.previous_id = list(previous_id) 

150 self.lineno = lineno 

151 self.context = context 

152 

153 def __repr__(self) -> str: 

154 return f"<{type(self).__name__} {self.id!r} (flags: {list(self.flags)!r})>" 

155 

156 def __cmp__(self, other: object) -> int: 

157 """Compare Messages, taking into account plural ids""" 

158 def values_to_compare(obj): 

159 if isinstance(obj, Message) and obj.pluralizable: 

160 return obj.id[0], obj.context or '' 

161 return obj.id, obj.context or '' 

162 return _cmp(values_to_compare(self), values_to_compare(other)) 

163 

164 def __gt__(self, other: object) -> bool: 

165 return self.__cmp__(other) > 0 

166 

167 def __lt__(self, other: object) -> bool: 

168 return self.__cmp__(other) < 0 

169 

170 def __ge__(self, other: object) -> bool: 

171 return self.__cmp__(other) >= 0 

172 

173 def __le__(self, other: object) -> bool: 

174 return self.__cmp__(other) <= 0 

175 

176 def __eq__(self, other: object) -> bool: 

177 return self.__cmp__(other) == 0 

178 

179 def __ne__(self, other: object) -> bool: 

180 return self.__cmp__(other) != 0 

181 

182 def is_identical(self, other: Message) -> bool: 

183 """Checks whether messages are identical, taking into account all 

184 properties. 

185 """ 

186 assert isinstance(other, Message) 

187 return self.__dict__ == other.__dict__ 

188 

189 def clone(self) -> Message: 

190 return Message(*map(copy, (self.id, self.string, self.locations, 

191 self.flags, self.auto_comments, 

192 self.user_comments, self.previous_id, 

193 self.lineno, self.context))) 

194 

195 def check(self, catalog: Catalog | None = None) -> list[TranslationError]: 

196 """Run various validation checks on the message. Some validations 

197 are only performed if the catalog is provided. This method returns 

198 a sequence of `TranslationError` objects. 

199 

200 :rtype: ``iterator`` 

201 :param catalog: A catalog instance that is passed to the checkers 

202 :see: `Catalog.check` for a way to perform checks for all messages 

203 in a catalog. 

204 """ 

205 from babel.messages.checkers import checkers 

206 errors: list[TranslationError] = [] 

207 for checker in checkers: 

208 try: 

209 checker(catalog, self) 

210 except TranslationError as e: 

211 errors.append(e) 

212 return errors 

213 

214 @property 

215 def fuzzy(self) -> bool: 

216 """Whether the translation is fuzzy. 

217 

218 >>> Message('foo').fuzzy 

219 False 

220 >>> msg = Message('foo', 'foo', flags=['fuzzy']) 

221 >>> msg.fuzzy 

222 True 

223 >>> msg 

224 <Message 'foo' (flags: ['fuzzy'])> 

225 

226 :type: `bool`""" 

227 return 'fuzzy' in self.flags 

228 

229 @property 

230 def pluralizable(self) -> bool: 

231 """Whether the message is plurizable. 

232 

233 >>> Message('foo').pluralizable 

234 False 

235 >>> Message(('foo', 'bar')).pluralizable 

236 True 

237 

238 :type: `bool`""" 

239 return isinstance(self.id, (list, tuple)) 

240 

241 @property 

242 def python_format(self) -> bool: 

243 """Whether the message contains Python-style parameters. 

244 

245 >>> Message('foo %(name)s bar').python_format 

246 True 

247 >>> Message(('foo %(name)s', 'foo %(name)s')).python_format 

248 True 

249 

250 :type: `bool`""" 

251 ids = self.id 

252 if not isinstance(ids, (list, tuple)): 

253 ids = [ids] 

254 return any(PYTHON_FORMAT.search(id) for id in ids) 

255 

256 

257class TranslationError(Exception): 

258 """Exception thrown by translation checkers when invalid message 

259 translations are encountered.""" 

260 

261 

262DEFAULT_HEADER = """\ 

263# Translations template for PROJECT. 

264# Copyright (C) YEAR ORGANIZATION 

265# This file is distributed under the same license as the PROJECT project. 

266# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. 

267#""" 

268 

269 

270def parse_separated_header(value: str) -> dict[str, str]: 

271 # Adapted from https://peps.python.org/pep-0594/#cgi 

272 from email.message import Message 

273 m = Message() 

274 m['content-type'] = value 

275 return dict(m.get_params()) 

276 

277 

278class Catalog: 

279 """Representation of a message catalog.""" 

280 

281 def __init__( 

282 self, 

283 locale: str | Locale | None = None, 

284 domain: str | None = None, 

285 header_comment: str | None = DEFAULT_HEADER, 

286 project: str | None = None, 

287 version: str | None = None, 

288 copyright_holder: str | None = None, 

289 msgid_bugs_address: str | None = None, 

290 creation_date: datetime.datetime | str | None = None, 

291 revision_date: datetime.datetime | datetime.time | float | str | None = None, 

292 last_translator: str | None = None, 

293 language_team: str | None = None, 

294 charset: str | None = None, 

295 fuzzy: bool = True, 

296 ) -> None: 

297 """Initialize the catalog object. 

298 

299 :param locale: the locale identifier or `Locale` object, or `None` 

300 if the catalog is not bound to a locale (which basically 

301 means it's a template) 

302 :param domain: the message domain 

303 :param header_comment: the header comment as string, or `None` for the 

304 default header 

305 :param project: the project's name 

306 :param version: the project's version 

307 :param copyright_holder: the copyright holder of the catalog 

308 :param msgid_bugs_address: the email address or URL to submit bug 

309 reports to 

310 :param creation_date: the date the catalog was created 

311 :param revision_date: the date the catalog was revised 

312 :param last_translator: the name and email of the last translator 

313 :param language_team: the name and email of the language team 

314 :param charset: the encoding to use in the output (defaults to utf-8) 

315 :param fuzzy: the fuzzy bit on the catalog header 

316 """ 

317 self.domain = domain 

318 self.locale = locale 

319 self._header_comment = header_comment 

320 self._messages: OrderedDict[str | tuple[str, str], Message] = OrderedDict() 

321 

322 self.project = project or 'PROJECT' 

323 self.version = version or 'VERSION' 

324 self.copyright_holder = copyright_holder or 'ORGANIZATION' 

325 self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS' 

326 

327 self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>' 

328 """Name and email address of the last translator.""" 

329 self.language_team = language_team or 'LANGUAGE <LL@li.org>' 

330 """Name and email address of the language team.""" 

331 

332 self.charset = charset or 'utf-8' 

333 

334 if creation_date is None: 

335 creation_date = datetime.datetime.now(LOCALTZ) 

336 elif isinstance(creation_date, datetime.datetime) and not creation_date.tzinfo: 

337 creation_date = creation_date.replace(tzinfo=LOCALTZ) 

338 self.creation_date = creation_date 

339 if revision_date is None: 

340 revision_date = 'YEAR-MO-DA HO:MI+ZONE' 

341 elif isinstance(revision_date, datetime.datetime) and not revision_date.tzinfo: 

342 revision_date = revision_date.replace(tzinfo=LOCALTZ) 

343 self.revision_date = revision_date 

344 self.fuzzy = fuzzy 

345 

346 # Dictionary of obsolete messages 

347 self.obsolete: OrderedDict[str | tuple[str, str], Message] = OrderedDict() 

348 self._num_plurals = None 

349 self._plural_expr = None 

350 

351 def _set_locale(self, locale: Locale | str | None) -> None: 

352 if locale is None: 

353 self._locale_identifier = None 

354 self._locale = None 

355 return 

356 

357 if isinstance(locale, Locale): 

358 self._locale_identifier = str(locale) 

359 self._locale = locale 

360 return 

361 

362 if isinstance(locale, str): 

363 self._locale_identifier = str(locale) 

364 try: 

365 self._locale = Locale.parse(locale) 

366 except UnknownLocaleError: 

367 self._locale = None 

368 return 

369 

370 raise TypeError(f"`locale` must be a Locale, a locale identifier string, or None; got {locale!r}") 

371 

372 def _get_locale(self) -> Locale | None: 

373 return self._locale 

374 

375 def _get_locale_identifier(self) -> str | None: 

376 return self._locale_identifier 

377 

378 locale = property(_get_locale, _set_locale) 

379 locale_identifier = property(_get_locale_identifier) 

380 

381 def _get_header_comment(self) -> str: 

382 comment = self._header_comment 

383 year = datetime.datetime.now(LOCALTZ).strftime('%Y') 

384 if hasattr(self.revision_date, 'strftime'): 

385 year = self.revision_date.strftime('%Y') 

386 comment = comment.replace('PROJECT', self.project) \ 

387 .replace('VERSION', self.version) \ 

388 .replace('YEAR', year) \ 

389 .replace('ORGANIZATION', self.copyright_holder) 

390 locale_name = (self.locale.english_name if self.locale else self.locale_identifier) 

391 if locale_name: 

392 comment = comment.replace("Translations template", f"{locale_name} translations") 

393 return comment 

394 

395 def _set_header_comment(self, string: str | None) -> None: 

396 self._header_comment = string 

397 

398 header_comment = property(_get_header_comment, _set_header_comment, doc="""\ 

399 The header comment for the catalog. 

400 

401 >>> catalog = Catalog(project='Foobar', version='1.0', 

402 ... copyright_holder='Foo Company') 

403 >>> print(catalog.header_comment) #doctest: +ELLIPSIS 

404 # Translations template for Foobar. 

405 # Copyright (C) ... Foo Company 

406 # This file is distributed under the same license as the Foobar project. 

407 # FIRST AUTHOR <EMAIL@ADDRESS>, .... 

408 # 

409 

410 The header can also be set from a string. Any known upper-case variables 

411 will be replaced when the header is retrieved again: 

412 

413 >>> catalog = Catalog(project='Foobar', version='1.0', 

414 ... copyright_holder='Foo Company') 

415 >>> catalog.header_comment = '''\\ 

416 ... # The POT for my really cool PROJECT project. 

417 ... # Copyright (C) 1990-2003 ORGANIZATION 

418 ... # This file is distributed under the same license as the PROJECT 

419 ... # project. 

420 ... #''' 

421 >>> print(catalog.header_comment) 

422 # The POT for my really cool Foobar project. 

423 # Copyright (C) 1990-2003 Foo Company 

424 # This file is distributed under the same license as the Foobar 

425 # project. 

426 # 

427 

428 :type: `unicode` 

429 """) 

430 

431 def _get_mime_headers(self) -> list[tuple[str, str]]: 

432 headers: list[tuple[str, str]] = [] 

433 headers.append(("Project-Id-Version", f"{self.project} {self.version}")) 

434 headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address)) 

435 headers.append(('POT-Creation-Date', 

436 format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ', 

437 locale='en'))) 

438 if isinstance(self.revision_date, (datetime.datetime, datetime.time, int, float)): 

439 headers.append(('PO-Revision-Date', 

440 format_datetime(self.revision_date, 

441 'yyyy-MM-dd HH:mmZ', locale='en'))) 

442 else: 

443 headers.append(('PO-Revision-Date', self.revision_date)) 

444 headers.append(('Last-Translator', self.last_translator)) 

445 if self.locale_identifier: 

446 headers.append(('Language', str(self.locale_identifier))) 

447 if self.locale_identifier and ('LANGUAGE' in self.language_team): 

448 headers.append(('Language-Team', 

449 self.language_team.replace('LANGUAGE', 

450 str(self.locale_identifier)))) 

451 else: 

452 headers.append(('Language-Team', self.language_team)) 

453 if self.locale is not None: 

454 headers.append(('Plural-Forms', self.plural_forms)) 

455 headers.append(('MIME-Version', '1.0')) 

456 headers.append(("Content-Type", f"text/plain; charset={self.charset}")) 

457 headers.append(('Content-Transfer-Encoding', '8bit')) 

458 headers.append(("Generated-By", f"Babel {VERSION}\n")) 

459 return headers 

460 

461 def _force_text(self, s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict') -> str: 

462 if isinstance(s, str): 

463 return s 

464 if isinstance(s, bytes): 

465 return s.decode(encoding, errors) 

466 return str(s) 

467 

468 def _set_mime_headers(self, headers: Iterable[tuple[str, str]]) -> None: 

469 for name, value in headers: 

470 name = self._force_text(name.lower(), encoding=self.charset) 

471 value = self._force_text(value, encoding=self.charset) 

472 if name == 'project-id-version': 

473 parts = value.split(' ') 

474 self.project = ' '.join(parts[:-1]) 

475 self.version = parts[-1] 

476 elif name == 'report-msgid-bugs-to': 

477 self.msgid_bugs_address = value 

478 elif name == 'last-translator': 

479 self.last_translator = value 

480 elif name == 'language': 

481 value = value.replace('-', '_') 

482 self._set_locale(value) 

483 elif name == 'language-team': 

484 self.language_team = value 

485 elif name == 'content-type': 

486 params = parse_separated_header(value) 

487 if 'charset' in params: 

488 self.charset = params['charset'].lower() 

489 elif name == 'plural-forms': 

490 params = parse_separated_header(f" ;{value}") 

491 self._num_plurals = int(params.get('nplurals', 2)) 

492 self._plural_expr = params.get('plural', '(n != 1)') 

493 elif name == 'pot-creation-date': 

494 self.creation_date = _parse_datetime_header(value) 

495 elif name == 'po-revision-date': 

496 # Keep the value if it's not the default one 

497 if 'YEAR' not in value: 

498 self.revision_date = _parse_datetime_header(value) 

499 

500 mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\ 

501 The MIME headers of the catalog, used for the special ``msgid ""`` entry. 

502 

503 The behavior of this property changes slightly depending on whether a locale 

504 is set or not, the latter indicating that the catalog is actually a template 

505 for actual translations. 

506 

507 Here's an example of the output for such a catalog template: 

508 

509 >>> from babel.dates import UTC 

510 >>> from datetime import datetime 

511 >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC) 

512 >>> catalog = Catalog(project='Foobar', version='1.0', 

513 ... creation_date=created) 

514 >>> for name, value in catalog.mime_headers: 

515 ... print('%s: %s' % (name, value)) 

516 Project-Id-Version: Foobar 1.0 

517 Report-Msgid-Bugs-To: EMAIL@ADDRESS 

518 POT-Creation-Date: 1990-04-01 15:30+0000 

519 PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE 

520 Last-Translator: FULL NAME <EMAIL@ADDRESS> 

521 Language-Team: LANGUAGE <LL@li.org> 

522 MIME-Version: 1.0 

523 Content-Type: text/plain; charset=utf-8 

524 Content-Transfer-Encoding: 8bit 

525 Generated-By: Babel ... 

526 

527 And here's an example of the output when the locale is set: 

528 

529 >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC) 

530 >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0', 

531 ... creation_date=created, revision_date=revised, 

532 ... last_translator='John Doe <jd@example.com>', 

533 ... language_team='de_DE <de@example.com>') 

534 >>> for name, value in catalog.mime_headers: 

535 ... print('%s: %s' % (name, value)) 

536 Project-Id-Version: Foobar 1.0 

537 Report-Msgid-Bugs-To: EMAIL@ADDRESS 

538 POT-Creation-Date: 1990-04-01 15:30+0000 

539 PO-Revision-Date: 1990-08-03 12:00+0000 

540 Last-Translator: John Doe <jd@example.com> 

541 Language: de_DE 

542 Language-Team: de_DE <de@example.com> 

543 Plural-Forms: nplurals=2; plural=(n != 1); 

544 MIME-Version: 1.0 

545 Content-Type: text/plain; charset=utf-8 

546 Content-Transfer-Encoding: 8bit 

547 Generated-By: Babel ... 

548 

549 :type: `list` 

550 """) 

551 

552 @property 

553 def num_plurals(self) -> int: 

554 """The number of plurals used by the catalog or locale. 

555 

556 >>> Catalog(locale='en').num_plurals 

557 2 

558 >>> Catalog(locale='ga').num_plurals 

559 5 

560 

561 :type: `int`""" 

562 if self._num_plurals is None: 

563 num = 2 

564 if self.locale: 

565 num = get_plural(self.locale)[0] 

566 self._num_plurals = num 

567 return self._num_plurals 

568 

569 @property 

570 def plural_expr(self) -> str: 

571 """The plural expression used by the catalog or locale. 

572 

573 >>> Catalog(locale='en').plural_expr 

574 '(n != 1)' 

575 >>> Catalog(locale='ga').plural_expr 

576 '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)' 

577 >>> Catalog(locale='ding').plural_expr # unknown locale 

578 '(n != 1)' 

579 

580 :type: `str`""" 

581 if self._plural_expr is None: 

582 expr = '(n != 1)' 

583 if self.locale: 

584 expr = get_plural(self.locale)[1] 

585 self._plural_expr = expr 

586 return self._plural_expr 

587 

588 @property 

589 def plural_forms(self) -> str: 

590 """Return the plural forms declaration for the locale. 

591 

592 >>> Catalog(locale='en').plural_forms 

593 'nplurals=2; plural=(n != 1);' 

594 >>> Catalog(locale='pt_BR').plural_forms 

595 'nplurals=2; plural=(n > 1);' 

596 

597 :type: `str`""" 

598 return f"nplurals={self.num_plurals}; plural={self.plural_expr};" 

599 

600 def __contains__(self, id: _MessageID) -> bool: 

601 """Return whether the catalog has a message with the specified ID.""" 

602 return self._key_for(id) in self._messages 

603 

604 def __len__(self) -> int: 

605 """The number of messages in the catalog. 

606 

607 This does not include the special ``msgid ""`` entry.""" 

608 return len(self._messages) 

609 

610 def __iter__(self) -> Iterator[Message]: 

611 """Iterates through all the entries in the catalog, in the order they 

612 were added, yielding a `Message` object for every entry. 

613 

614 :rtype: ``iterator``""" 

615 buf = [] 

616 for name, value in self.mime_headers: 

617 buf.append(f"{name}: {value}") 

618 flags = set() 

619 if self.fuzzy: 

620 flags |= {'fuzzy'} 

621 yield Message('', '\n'.join(buf), flags=flags) 

622 for key in self._messages: 

623 yield self._messages[key] 

624 

625 def __repr__(self) -> str: 

626 locale = '' 

627 if self.locale: 

628 locale = f" {self.locale}" 

629 return f"<{type(self).__name__} {self.domain!r}{locale}>" 

630 

631 def __delitem__(self, id: _MessageID) -> None: 

632 """Delete the message with the specified ID.""" 

633 self.delete(id) 

634 

635 def __getitem__(self, id: _MessageID) -> Message: 

636 """Return the message with the specified ID. 

637 

638 :param id: the message ID 

639 """ 

640 return self.get(id) 

641 

642 def __setitem__(self, id: _MessageID, message: Message) -> None: 

643 """Add or update the message with the specified ID. 

644 

645 >>> catalog = Catalog() 

646 >>> catalog[u'foo'] = Message(u'foo') 

647 >>> catalog[u'foo'] 

648 <Message u'foo' (flags: [])> 

649 

650 If a message with that ID is already in the catalog, it is updated 

651 to include the locations and flags of the new message. 

652 

653 >>> catalog = Catalog() 

654 >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)]) 

655 >>> catalog[u'foo'].locations 

656 [('main.py', 1)] 

657 >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)]) 

658 >>> catalog[u'foo'].locations 

659 [('main.py', 1), ('utils.py', 5)] 

660 

661 :param id: the message ID 

662 :param message: the `Message` object 

663 """ 

664 assert isinstance(message, Message), 'expected a Message object' 

665 key = self._key_for(id, message.context) 

666 current = self._messages.get(key) 

667 if current: 

668 if message.pluralizable and not current.pluralizable: 

669 # The new message adds pluralization 

670 current.id = message.id 

671 current.string = message.string 

672 current.locations = list(distinct(current.locations + 

673 message.locations)) 

674 current.auto_comments = list(distinct(current.auto_comments + 

675 message.auto_comments)) 

676 current.user_comments = list(distinct(current.user_comments + 

677 message.user_comments)) 

678 current.flags |= message.flags 

679 message = current 

680 elif id == '': 

681 # special treatment for the header message 

682 self.mime_headers = message_from_string(message.string).items() 

683 self.header_comment = "\n".join([f"# {c}".rstrip() for c in message.user_comments]) 

684 self.fuzzy = message.fuzzy 

685 else: 

686 if isinstance(id, (list, tuple)): 

687 assert isinstance(message.string, (list, tuple)), \ 

688 f"Expected sequence but got {type(message.string)}" 

689 self._messages[key] = message 

690 

691 def add( 

692 self, 

693 id: _MessageID, 

694 string: _MessageID | None = None, 

695 locations: Iterable[tuple[str, int]] = (), 

696 flags: Iterable[str] = (), 

697 auto_comments: Iterable[str] = (), 

698 user_comments: Iterable[str] = (), 

699 previous_id: _MessageID = (), 

700 lineno: int | None = None, 

701 context: str | None = None, 

702 ) -> Message: 

703 """Add or update the message with the specified ID. 

704 

705 >>> catalog = Catalog() 

706 >>> catalog.add(u'foo') 

707 <Message ...> 

708 >>> catalog[u'foo'] 

709 <Message u'foo' (flags: [])> 

710 

711 This method simply constructs a `Message` object with the given 

712 arguments and invokes `__setitem__` with that object. 

713 

714 :param id: the message ID, or a ``(singular, plural)`` tuple for 

715 pluralizable messages 

716 :param string: the translated message string, or a 

717 ``(singular, plural)`` tuple for pluralizable messages 

718 :param locations: a sequence of ``(filename, lineno)`` tuples 

719 :param flags: a set or sequence of flags 

720 :param auto_comments: a sequence of automatic comments 

721 :param user_comments: a sequence of user comments 

722 :param previous_id: the previous message ID, or a ``(singular, plural)`` 

723 tuple for pluralizable messages 

724 :param lineno: the line number on which the msgid line was found in the 

725 PO file, if any 

726 :param context: the message context 

727 """ 

728 message = Message(id, string, list(locations), flags, auto_comments, 

729 user_comments, previous_id, lineno=lineno, 

730 context=context) 

731 self[id] = message 

732 return message 

733 

734 def check(self) -> Iterable[tuple[Message, list[TranslationError]]]: 

735 """Run various validation checks on the translations in the catalog. 

736 

737 For every message which fails validation, this method yield a 

738 ``(message, errors)`` tuple, where ``message`` is the `Message` object 

739 and ``errors`` is a sequence of `TranslationError` objects. 

740 

741 :rtype: ``generator`` of ``(message, errors)`` 

742 """ 

743 for message in self._messages.values(): 

744 errors = message.check(catalog=self) 

745 if errors: 

746 yield message, errors 

747 

748 def get(self, id: _MessageID, context: str | None = None) -> Message | None: 

749 """Return the message with the specified ID and context. 

750 

751 :param id: the message ID 

752 :param context: the message context, or ``None`` for no context 

753 """ 

754 return self._messages.get(self._key_for(id, context)) 

755 

756 def delete(self, id: _MessageID, context: str | None = None) -> None: 

757 """Delete the message with the specified ID and context. 

758 

759 :param id: the message ID 

760 :param context: the message context, or ``None`` for no context 

761 """ 

762 key = self._key_for(id, context) 

763 if key in self._messages: 

764 del self._messages[key] 

765 

766 def update( 

767 self, 

768 template: Catalog, 

769 no_fuzzy_matching: bool = False, 

770 update_header_comment: bool = False, 

771 keep_user_comments: bool = True, 

772 ) -> None: 

773 """Update the catalog based on the given template catalog. 

774 

775 >>> from babel.messages import Catalog 

776 >>> template = Catalog() 

777 >>> template.add('green', locations=[('main.py', 99)]) 

778 <Message ...> 

779 >>> template.add('blue', locations=[('main.py', 100)]) 

780 <Message ...> 

781 >>> template.add(('salad', 'salads'), locations=[('util.py', 42)]) 

782 <Message ...> 

783 >>> catalog = Catalog(locale='de_DE') 

784 >>> catalog.add('blue', u'blau', locations=[('main.py', 98)]) 

785 <Message ...> 

786 >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)]) 

787 <Message ...> 

788 >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'), 

789 ... locations=[('util.py', 38)]) 

790 <Message ...> 

791 

792 >>> catalog.update(template) 

793 >>> len(catalog) 

794 3 

795 

796 >>> msg1 = catalog['green'] 

797 >>> msg1.string 

798 >>> msg1.locations 

799 [('main.py', 99)] 

800 

801 >>> msg2 = catalog['blue'] 

802 >>> msg2.string 

803 u'blau' 

804 >>> msg2.locations 

805 [('main.py', 100)] 

806 

807 >>> msg3 = catalog['salad'] 

808 >>> msg3.string 

809 (u'Salat', u'Salate') 

810 >>> msg3.locations 

811 [('util.py', 42)] 

812 

813 Messages that are in the catalog but not in the template are removed 

814 from the main collection, but can still be accessed via the `obsolete` 

815 member: 

816 

817 >>> 'head' in catalog 

818 False 

819 >>> list(catalog.obsolete.values()) 

820 [<Message 'head' (flags: [])>] 

821 

822 :param template: the reference catalog, usually read from a POT file 

823 :param no_fuzzy_matching: whether to use fuzzy matching of message IDs 

824 """ 

825 messages = self._messages 

826 remaining = messages.copy() 

827 self._messages = OrderedDict() 

828 

829 # Prepare for fuzzy matching 

830 fuzzy_candidates = {} 

831 if not no_fuzzy_matching: 

832 for msgid in messages: 

833 if msgid and messages[msgid].string: 

834 key = self._key_for(msgid) 

835 ctxt = messages[msgid].context 

836 fuzzy_candidates[self._to_fuzzy_match_key(key)] = (key, ctxt) 

837 fuzzy_matches = set() 

838 

839 def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, str] | str) -> None: 

840 message = message.clone() 

841 fuzzy = False 

842 if oldkey != newkey: 

843 fuzzy = True 

844 fuzzy_matches.add(oldkey) 

845 oldmsg = messages.get(oldkey) 

846 assert oldmsg is not None 

847 if isinstance(oldmsg.id, str): 

848 message.previous_id = [oldmsg.id] 

849 else: 

850 message.previous_id = list(oldmsg.id) 

851 else: 

852 oldmsg = remaining.pop(oldkey, None) 

853 assert oldmsg is not None 

854 message.string = oldmsg.string 

855 

856 if keep_user_comments: 

857 message.user_comments = list(distinct(oldmsg.user_comments)) 

858 

859 if isinstance(message.id, (list, tuple)): 

860 if not isinstance(message.string, (list, tuple)): 

861 fuzzy = True 

862 message.string = tuple( 

863 [message.string] + ([''] * (len(message.id) - 1)) 

864 ) 

865 elif len(message.string) != self.num_plurals: 

866 fuzzy = True 

867 message.string = tuple(message.string[:len(oldmsg.string)]) 

868 elif isinstance(message.string, (list, tuple)): 

869 fuzzy = True 

870 message.string = message.string[0] 

871 message.flags |= oldmsg.flags 

872 if fuzzy: 

873 message.flags |= {'fuzzy'} 

874 self[message.id] = message 

875 

876 for message in template: 

877 if message.id: 

878 key = self._key_for(message.id, message.context) 

879 if key in messages: 

880 _merge(message, key, key) 

881 else: 

882 if not no_fuzzy_matching: 

883 # do some fuzzy matching with difflib 

884 matches = get_close_matches( 

885 self._to_fuzzy_match_key(key), 

886 fuzzy_candidates.keys(), 

887 1, 

888 ) 

889 if matches: 

890 modified_key = matches[0] 

891 newkey, newctxt = fuzzy_candidates[modified_key] 

892 if newctxt is not None: 

893 newkey = newkey, newctxt 

894 _merge(message, newkey, key) 

895 continue 

896 

897 self[message.id] = message 

898 

899 for msgid in remaining: 

900 if no_fuzzy_matching or msgid not in fuzzy_matches: 

901 self.obsolete[msgid] = remaining[msgid] 

902 

903 if update_header_comment: 

904 # Allow the updated catalog's header to be rewritten based on the 

905 # template's header 

906 self.header_comment = template.header_comment 

907 

908 # Make updated catalog's POT-Creation-Date equal to the template 

909 # used to update the catalog 

910 self.creation_date = template.creation_date 

911 

912 def _to_fuzzy_match_key(self, key: tuple[str, str] | str) -> str: 

913 """Converts a message key to a string suitable for fuzzy matching.""" 

914 if isinstance(key, tuple): 

915 matchkey = key[0] # just the msgid, no context 

916 else: 

917 matchkey = key 

918 return matchkey.lower().strip() 

919 

920 def _key_for(self, id: _MessageID, context: str | None = None) -> tuple[str, str] | str: 

921 """The key for a message is just the singular ID even for pluralizable 

922 messages, but is a ``(msgid, msgctxt)`` tuple for context-specific 

923 messages. 

924 """ 

925 key = id 

926 if isinstance(key, (list, tuple)): 

927 key = id[0] 

928 if context is not None: 

929 key = (key, context) 

930 return key 

931 

932 def is_identical(self, other: Catalog) -> bool: 

933 """Checks if catalogs are identical, taking into account messages and 

934 headers. 

935 """ 

936 assert isinstance(other, Catalog) 

937 for key in self._messages.keys() | other._messages.keys(): 

938 message_1 = self.get(key) 

939 message_2 = other.get(key) 

940 if ( 

941 message_1 is None 

942 or message_2 is None 

943 or not message_1.is_identical(message_2) 

944 ): 

945 return False 

946 return dict(self.mime_headers) == dict(other.mime_headers)