Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/babel/messages/pofile.py: 38%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

315 statements  

1""" 

2 babel.messages.pofile 

3 ~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Reading and writing of files in the ``gettext`` PO (portable object) 

6 format. 

7 

8 :copyright: (c) 2013-2024 by the Babel Team. 

9 :license: BSD, see LICENSE for more details. 

10""" 

11from __future__ import annotations 

12 

13import os 

14import re 

15from collections.abc import Iterable 

16from typing import TYPE_CHECKING 

17 

18from babel.core import Locale 

19from babel.messages.catalog import Catalog, Message 

20from babel.util import _cmp, wraptext 

21 

22if TYPE_CHECKING: 

23 from typing import IO, AnyStr 

24 

25 from _typeshed import SupportsWrite 

26 from typing_extensions import Literal 

27 

28 

29def unescape(string: str) -> str: 

30 r"""Reverse `escape` the given string. 

31 

32 >>> print(unescape('"Say:\\n \\"hello, world!\\"\\n"')) 

33 Say: 

34 "hello, world!" 

35 <BLANKLINE> 

36 

37 :param string: the string to unescape 

38 """ 

39 def replace_escapes(match): 

40 m = match.group(1) 

41 if m == 'n': 

42 return '\n' 

43 elif m == 't': 

44 return '\t' 

45 elif m == 'r': 

46 return '\r' 

47 # m is \ or " 

48 return m 

49 return re.compile(r'\\([\\trn"])').sub(replace_escapes, string[1:-1]) 

50 

51 

52def denormalize(string: str) -> str: 

53 r"""Reverse the normalization done by the `normalize` function. 

54 

55 >>> print(denormalize(r'''"" 

56 ... "Say:\n" 

57 ... " \"hello, world!\"\n"''')) 

58 Say: 

59 "hello, world!" 

60 <BLANKLINE> 

61 

62 >>> print(denormalize(r'''"" 

63 ... "Say:\n" 

64 ... " \"Lorem ipsum dolor sit " 

65 ... "amet, consectetur adipisicing" 

66 ... " elit, \"\n"''')) 

67 Say: 

68 "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " 

69 <BLANKLINE> 

70 

71 :param string: the string to denormalize 

72 """ 

73 if '\n' in string: 

74 escaped_lines = string.splitlines() 

75 if string.startswith('""'): 

76 escaped_lines = escaped_lines[1:] 

77 lines = map(unescape, escaped_lines) 

78 return ''.join(lines) 

79 else: 

80 return unescape(string) 

81 

82 

83class PoFileError(Exception): 

84 """Exception thrown by PoParser when an invalid po file is encountered.""" 

85 

86 def __init__(self, message: str, catalog: Catalog, line: str, lineno: int) -> None: 

87 super().__init__(f'{message} on {lineno}') 

88 self.catalog = catalog 

89 self.line = line 

90 self.lineno = lineno 

91 

92 

93class _NormalizedString: 

94 

95 def __init__(self, *args: str) -> None: 

96 self._strs: list[str] = [] 

97 for arg in args: 

98 self.append(arg) 

99 

100 def append(self, s: str) -> None: 

101 self._strs.append(s.strip()) 

102 

103 def denormalize(self) -> str: 

104 return ''.join(map(unescape, self._strs)) 

105 

106 def __bool__(self) -> bool: 

107 return bool(self._strs) 

108 

109 def __repr__(self) -> str: 

110 return os.linesep.join(self._strs) 

111 

112 def __cmp__(self, other: object) -> int: 

113 if not other: 

114 return 1 

115 

116 return _cmp(str(self), str(other)) 

117 

118 def __gt__(self, other: object) -> bool: 

119 return self.__cmp__(other) > 0 

120 

121 def __lt__(self, other: object) -> bool: 

122 return self.__cmp__(other) < 0 

123 

124 def __ge__(self, other: object) -> bool: 

125 return self.__cmp__(other) >= 0 

126 

127 def __le__(self, other: object) -> bool: 

128 return self.__cmp__(other) <= 0 

129 

130 def __eq__(self, other: object) -> bool: 

131 return self.__cmp__(other) == 0 

132 

133 def __ne__(self, other: object) -> bool: 

134 return self.__cmp__(other) != 0 

135 

136 

137class PoFileParser: 

138 """Support class to read messages from a ``gettext`` PO (portable object) file 

139 and add them to a `Catalog` 

140 

141 See `read_po` for simple cases. 

142 """ 

143 

144 _keywords = [ 

145 'msgid', 

146 'msgstr', 

147 'msgctxt', 

148 'msgid_plural', 

149 ] 

150 

151 def __init__(self, catalog: Catalog, ignore_obsolete: bool = False, abort_invalid: bool = False) -> None: 

152 self.catalog = catalog 

153 self.ignore_obsolete = ignore_obsolete 

154 self.counter = 0 

155 self.offset = 0 

156 self.abort_invalid = abort_invalid 

157 self._reset_message_state() 

158 

159 def _reset_message_state(self) -> None: 

160 self.messages = [] 

161 self.translations = [] 

162 self.locations = [] 

163 self.flags = [] 

164 self.user_comments = [] 

165 self.auto_comments = [] 

166 self.context = None 

167 self.obsolete = False 

168 self.in_msgid = False 

169 self.in_msgstr = False 

170 self.in_msgctxt = False 

171 

172 def _add_message(self) -> None: 

173 """ 

174 Add a message to the catalog based on the current parser state and 

175 clear the state ready to process the next message. 

176 """ 

177 self.translations.sort() 

178 if len(self.messages) > 1: 

179 msgid = tuple(m.denormalize() for m in self.messages) 

180 else: 

181 msgid = self.messages[0].denormalize() 

182 if isinstance(msgid, (list, tuple)): 

183 string = ['' for _ in range(self.catalog.num_plurals)] 

184 for idx, translation in self.translations: 

185 if idx >= self.catalog.num_plurals: 

186 self._invalid_pofile("", self.offset, "msg has more translations than num_plurals of catalog") 

187 continue 

188 string[idx] = translation.denormalize() 

189 string = tuple(string) 

190 else: 

191 string = self.translations[0][1].denormalize() 

192 msgctxt = self.context.denormalize() if self.context else None 

193 message = Message(msgid, string, list(self.locations), set(self.flags), 

194 self.auto_comments, self.user_comments, lineno=self.offset + 1, 

195 context=msgctxt) 

196 if self.obsolete: 

197 if not self.ignore_obsolete: 

198 self.catalog.obsolete[msgid] = message 

199 else: 

200 self.catalog[msgid] = message 

201 self.counter += 1 

202 self._reset_message_state() 

203 

204 def _finish_current_message(self) -> None: 

205 if self.messages: 

206 self._add_message() 

207 

208 def _process_message_line(self, lineno, line, obsolete=False) -> None: 

209 if line.startswith('"'): 

210 self._process_string_continuation_line(line, lineno) 

211 else: 

212 self._process_keyword_line(lineno, line, obsolete) 

213 

214 def _process_keyword_line(self, lineno, line, obsolete=False) -> None: 

215 

216 for keyword in self._keywords: 

217 try: 

218 if line.startswith(keyword) and line[len(keyword)] in [' ', '[']: 

219 arg = line[len(keyword):] 

220 break 

221 except IndexError: 

222 self._invalid_pofile(line, lineno, "Keyword must be followed by a string") 

223 else: 

224 self._invalid_pofile(line, lineno, "Start of line didn't match any expected keyword.") 

225 return 

226 

227 if keyword in ['msgid', 'msgctxt']: 

228 self._finish_current_message() 

229 

230 self.obsolete = obsolete 

231 

232 # The line that has the msgid is stored as the offset of the msg 

233 # should this be the msgctxt if it has one? 

234 if keyword == 'msgid': 

235 self.offset = lineno 

236 

237 if keyword in ['msgid', 'msgid_plural']: 

238 self.in_msgctxt = False 

239 self.in_msgid = True 

240 self.messages.append(_NormalizedString(arg)) 

241 

242 elif keyword == 'msgstr': 

243 self.in_msgid = False 

244 self.in_msgstr = True 

245 if arg.startswith('['): 

246 idx, msg = arg[1:].split(']', 1) 

247 self.translations.append([int(idx), _NormalizedString(msg)]) 

248 else: 

249 self.translations.append([0, _NormalizedString(arg)]) 

250 

251 elif keyword == 'msgctxt': 

252 self.in_msgctxt = True 

253 self.context = _NormalizedString(arg) 

254 

255 def _process_string_continuation_line(self, line, lineno) -> None: 

256 if self.in_msgid: 

257 s = self.messages[-1] 

258 elif self.in_msgstr: 

259 s = self.translations[-1][1] 

260 elif self.in_msgctxt: 

261 s = self.context 

262 else: 

263 self._invalid_pofile(line, lineno, "Got line starting with \" but not in msgid, msgstr or msgctxt") 

264 return 

265 s.append(line) 

266 

267 def _process_comment(self, line) -> None: 

268 

269 self._finish_current_message() 

270 

271 if line[1:].startswith(':'): 

272 for location in line[2:].lstrip().split(): 

273 pos = location.rfind(':') 

274 if pos >= 0: 

275 try: 

276 lineno = int(location[pos + 1:]) 

277 except ValueError: 

278 continue 

279 self.locations.append((location[:pos], lineno)) 

280 else: 

281 self.locations.append((location, None)) 

282 elif line[1:].startswith(','): 

283 for flag in line[2:].lstrip().split(','): 

284 self.flags.append(flag.strip()) 

285 elif line[1:].startswith('.'): 

286 # These are called auto-comments 

287 comment = line[2:].strip() 

288 if comment: # Just check that we're not adding empty comments 

289 self.auto_comments.append(comment) 

290 else: 

291 # These are called user comments 

292 self.user_comments.append(line[1:].strip()) 

293 

294 def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None: 

295 """ 

296 Reads from the file-like object `fileobj` and adds any po file 

297 units found in it to the `Catalog` supplied to the constructor. 

298 """ 

299 

300 for lineno, line in enumerate(fileobj): 

301 line = line.strip() 

302 if not isinstance(line, str): 

303 line = line.decode(self.catalog.charset) 

304 if not line: 

305 continue 

306 if line.startswith('#'): 

307 if line[1:].startswith('~'): 

308 self._process_message_line(lineno, line[2:].lstrip(), obsolete=True) 

309 else: 

310 self._process_comment(line) 

311 else: 

312 self._process_message_line(lineno, line) 

313 

314 self._finish_current_message() 

315 

316 # No actual messages found, but there was some info in comments, from which 

317 # we'll construct an empty header message 

318 if not self.counter and (self.flags or self.user_comments or self.auto_comments): 

319 self.messages.append(_NormalizedString('""')) 

320 self.translations.append([0, _NormalizedString('""')]) 

321 self._add_message() 

322 

323 def _invalid_pofile(self, line, lineno, msg) -> None: 

324 assert isinstance(line, str) 

325 if self.abort_invalid: 

326 raise PoFileError(msg, self.catalog, line, lineno) 

327 print("WARNING:", msg) 

328 print(f"WARNING: Problem on line {lineno + 1}: {line!r}") 

329 

330 

331def read_po( 

332 fileobj: IO[AnyStr] | Iterable[AnyStr], 

333 locale: str | Locale | None = None, 

334 domain: str | None = None, 

335 ignore_obsolete: bool = False, 

336 charset: str | None = None, 

337 abort_invalid: bool = False, 

338) -> Catalog: 

339 """Read messages from a ``gettext`` PO (portable object) file from the given 

340 file-like object (or an iterable of lines) and return a `Catalog`. 

341 

342 >>> from datetime import datetime 

343 >>> from io import StringIO 

344 >>> buf = StringIO(''' 

345 ... #: main.py:1 

346 ... #, fuzzy, python-format 

347 ... msgid "foo %(name)s" 

348 ... msgstr "quux %(name)s" 

349 ... 

350 ... # A user comment 

351 ... #. An auto comment 

352 ... #: main.py:3 

353 ... msgid "bar" 

354 ... msgid_plural "baz" 

355 ... msgstr[0] "bar" 

356 ... msgstr[1] "baaz" 

357 ... ''') 

358 >>> catalog = read_po(buf) 

359 >>> catalog.revision_date = datetime(2007, 4, 1) 

360 

361 >>> for message in catalog: 

362 ... if message.id: 

363 ... print((message.id, message.string)) 

364 ... print(' ', (message.locations, sorted(list(message.flags)))) 

365 ... print(' ', (message.user_comments, message.auto_comments)) 

366 (u'foo %(name)s', u'quux %(name)s') 

367 ([(u'main.py', 1)], [u'fuzzy', u'python-format']) 

368 ([], []) 

369 ((u'bar', u'baz'), (u'bar', u'baaz')) 

370 ([(u'main.py', 3)], []) 

371 ([u'A user comment'], [u'An auto comment']) 

372 

373 .. versionadded:: 1.0 

374 Added support for explicit charset argument. 

375 

376 :param fileobj: the file-like object (or iterable of lines) to read the PO file from 

377 :param locale: the locale identifier or `Locale` object, or `None` 

378 if the catalog is not bound to a locale (which basically 

379 means it's a template) 

380 :param domain: the message domain 

381 :param ignore_obsolete: whether to ignore obsolete messages in the input 

382 :param charset: the character set of the catalog. 

383 :param abort_invalid: abort read if po file is invalid 

384 """ 

385 catalog = Catalog(locale=locale, domain=domain, charset=charset) 

386 parser = PoFileParser(catalog, ignore_obsolete, abort_invalid=abort_invalid) 

387 parser.parse(fileobj) 

388 return catalog 

389 

390 

391WORD_SEP = re.compile('(' 

392 r'\s+|' # any whitespace 

393 r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words 

394 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash 

395 ')') 

396 

397 

398def escape(string: str) -> str: 

399 r"""Escape the given string so that it can be included in double-quoted 

400 strings in ``PO`` files. 

401 

402 >>> escape('''Say: 

403 ... "hello, world!" 

404 ... ''') 

405 '"Say:\\n \\"hello, world!\\"\\n"' 

406 

407 :param string: the string to escape 

408 """ 

409 return '"%s"' % string.replace('\\', '\\\\') \ 

410 .replace('\t', '\\t') \ 

411 .replace('\r', '\\r') \ 

412 .replace('\n', '\\n') \ 

413 .replace('\"', '\\"') 

414 

415 

416def normalize(string: str, prefix: str = '', width: int = 76) -> str: 

417 r"""Convert a string into a format that is appropriate for .po files. 

418 

419 >>> print(normalize('''Say: 

420 ... "hello, world!" 

421 ... ''', width=None)) 

422 "" 

423 "Say:\n" 

424 " \"hello, world!\"\n" 

425 

426 >>> print(normalize('''Say: 

427 ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " 

428 ... ''', width=32)) 

429 "" 

430 "Say:\n" 

431 " \"Lorem ipsum dolor sit " 

432 "amet, consectetur adipisicing" 

433 " elit, \"\n" 

434 

435 :param string: the string to normalize 

436 :param prefix: a string that should be prepended to every line 

437 :param width: the maximum line width; use `None`, 0, or a negative number 

438 to completely disable line wrapping 

439 """ 

440 if width and width > 0: 

441 prefixlen = len(prefix) 

442 lines = [] 

443 for line in string.splitlines(True): 

444 if len(escape(line)) + prefixlen > width: 

445 chunks = WORD_SEP.split(line) 

446 chunks.reverse() 

447 while chunks: 

448 buf = [] 

449 size = 2 

450 while chunks: 

451 length = len(escape(chunks[-1])) - 2 + prefixlen 

452 if size + length < width: 

453 buf.append(chunks.pop()) 

454 size += length 

455 else: 

456 if not buf: 

457 # handle long chunks by putting them on a 

458 # separate line 

459 buf.append(chunks.pop()) 

460 break 

461 lines.append(''.join(buf)) 

462 else: 

463 lines.append(line) 

464 else: 

465 lines = string.splitlines(True) 

466 

467 if len(lines) <= 1: 

468 return escape(string) 

469 

470 # Remove empty trailing line 

471 if lines and not lines[-1]: 

472 del lines[-1] 

473 lines[-1] += '\n' 

474 return '""\n' + '\n'.join([(prefix + escape(line)) for line in lines]) 

475 

476 

477def write_po( 

478 fileobj: SupportsWrite[bytes], 

479 catalog: Catalog, 

480 width: int = 76, 

481 no_location: bool = False, 

482 omit_header: bool = False, 

483 sort_output: bool = False, 

484 sort_by_file: bool = False, 

485 ignore_obsolete: bool = False, 

486 include_previous: bool = False, 

487 include_lineno: bool = True, 

488) -> None: 

489 r"""Write a ``gettext`` PO (portable object) template file for a given 

490 message catalog to the provided file-like object. 

491 

492 >>> catalog = Catalog() 

493 >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)], 

494 ... flags=('fuzzy',)) 

495 <Message...> 

496 >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)]) 

497 <Message...> 

498 >>> from io import BytesIO 

499 >>> buf = BytesIO() 

500 >>> write_po(buf, catalog, omit_header=True) 

501 >>> print(buf.getvalue().decode("utf8")) 

502 #: main.py:1 

503 #, fuzzy, python-format 

504 msgid "foo %(name)s" 

505 msgstr "" 

506 <BLANKLINE> 

507 #: main.py:3 

508 msgid "bar" 

509 msgid_plural "baz" 

510 msgstr[0] "" 

511 msgstr[1] "" 

512 <BLANKLINE> 

513 <BLANKLINE> 

514 

515 :param fileobj: the file-like object to write to 

516 :param catalog: the `Catalog` instance 

517 :param width: the maximum line width for the generated output; use `None`, 

518 0, or a negative number to completely disable line wrapping 

519 :param no_location: do not emit a location comment for every message 

520 :param omit_header: do not include the ``msgid ""`` entry at the top of the 

521 output 

522 :param sort_output: whether to sort the messages in the output by msgid 

523 :param sort_by_file: whether to sort the messages in the output by their 

524 locations 

525 :param ignore_obsolete: whether to ignore obsolete messages and not include 

526 them in the output; by default they are included as 

527 comments 

528 :param include_previous: include the old msgid as a comment when 

529 updating the catalog 

530 :param include_lineno: include line number in the location comment 

531 """ 

532 

533 sort_by = None 

534 if sort_output: 

535 sort_by = "message" 

536 elif sort_by_file: 

537 sort_by = "location" 

538 

539 for line in generate_po( 

540 catalog, 

541 ignore_obsolete=ignore_obsolete, 

542 include_lineno=include_lineno, 

543 include_previous=include_previous, 

544 no_location=no_location, 

545 omit_header=omit_header, 

546 sort_by=sort_by, 

547 width=width, 

548 ): 

549 if isinstance(line, str): 

550 line = line.encode(catalog.charset, 'backslashreplace') 

551 fileobj.write(line) 

552 

553 

554def generate_po( 

555 catalog: Catalog, 

556 *, 

557 ignore_obsolete: bool = False, 

558 include_lineno: bool = True, 

559 include_previous: bool = False, 

560 no_location: bool = False, 

561 omit_header: bool = False, 

562 sort_by: Literal["message", "location"] | None = None, 

563 width: int = 76, 

564) -> Iterable[str]: 

565 r"""Yield text strings representing a ``gettext`` PO (portable object) file. 

566 

567 See `write_po()` for a more detailed description. 

568 """ 

569 # xgettext always wraps comments even if --no-wrap is passed; 

570 # provide the same behaviour 

571 comment_width = width if width and width > 0 else 76 

572 

573 def _format_comment(comment, prefix=''): 

574 for line in wraptext(comment, comment_width): 

575 yield f"#{prefix} {line.strip()}\n" 

576 

577 def _format_message(message, prefix=''): 

578 if isinstance(message.id, (list, tuple)): 

579 if message.context: 

580 yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n" 

581 yield f"{prefix}msgid {normalize(message.id[0], prefix=prefix, width=width)}\n" 

582 yield f"{prefix}msgid_plural {normalize(message.id[1], prefix=prefix, width=width)}\n" 

583 

584 for idx in range(catalog.num_plurals): 

585 try: 

586 string = message.string[idx] 

587 except IndexError: 

588 string = '' 

589 yield f"{prefix}msgstr[{idx:d}] {normalize(string, prefix=prefix, width=width)}\n" 

590 else: 

591 if message.context: 

592 yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n" 

593 yield f"{prefix}msgid {normalize(message.id, prefix=prefix, width=width)}\n" 

594 yield f"{prefix}msgstr {normalize(message.string or '', prefix=prefix, width=width)}\n" 

595 

596 for message in _sort_messages(catalog, sort_by=sort_by): 

597 if not message.id: # This is the header "message" 

598 if omit_header: 

599 continue 

600 comment_header = catalog.header_comment 

601 if width and width > 0: 

602 lines = [] 

603 for line in comment_header.splitlines(): 

604 lines += wraptext(line, width=width, 

605 subsequent_indent='# ') 

606 comment_header = '\n'.join(lines) 

607 yield f"{comment_header}\n" 

608 

609 for comment in message.user_comments: 

610 yield from _format_comment(comment) 

611 for comment in message.auto_comments: 

612 yield from _format_comment(comment, prefix='.') 

613 

614 if not no_location: 

615 locs = [] 

616 

617 # sort locations by filename and lineno. 

618 # if there's no <int> as lineno, use `-1`. 

619 # if no sorting possible, leave unsorted. 

620 # (see issue #606) 

621 try: 

622 locations = sorted(message.locations, 

623 key=lambda x: (x[0], isinstance(x[1], int) and x[1] or -1)) 

624 except TypeError: # e.g. "TypeError: unorderable types: NoneType() < int()" 

625 locations = message.locations 

626 

627 for filename, lineno in locations: 

628 location = filename.replace(os.sep, '/') 

629 if lineno and include_lineno: 

630 location = f"{location}:{lineno:d}" 

631 if location not in locs: 

632 locs.append(location) 

633 yield from _format_comment(' '.join(locs), prefix=':') 

634 if message.flags: 

635 yield f"#{', '.join(['', *sorted(message.flags)])}\n" 

636 

637 if message.previous_id and include_previous: 

638 yield from _format_comment( 

639 f'msgid {normalize(message.previous_id[0], width=width)}', 

640 prefix='|', 

641 ) 

642 if len(message.previous_id) > 1: 

643 norm_previous_id = normalize(message.previous_id[1], width=width) 

644 yield from _format_comment(f'msgid_plural {norm_previous_id}', prefix='|') 

645 

646 yield from _format_message(message) 

647 yield '\n' 

648 

649 if not ignore_obsolete: 

650 for message in _sort_messages( 

651 catalog.obsolete.values(), 

652 sort_by=sort_by, 

653 ): 

654 for comment in message.user_comments: 

655 yield from _format_comment(comment) 

656 yield from _format_message(message, prefix='#~ ') 

657 yield '\n' 

658 

659 

660def _sort_messages(messages: Iterable[Message], sort_by: Literal["message", "location"] | None) -> list[Message]: 

661 """ 

662 Sort the given message iterable by the given criteria. 

663 

664 Always returns a list. 

665 

666 :param messages: An iterable of Messages. 

667 :param sort_by: Sort by which criteria? Options are `message` and `location`. 

668 :return: list[Message] 

669 """ 

670 messages = list(messages) 

671 if sort_by == "message": 

672 messages.sort() 

673 elif sort_by == "location": 

674 messages.sort(key=lambda m: m.locations) 

675 return messages