Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/babel/messages/pofile.py: 38%

1"""

2 babel.messages.pofile

3 ~~~~~~~~~~~~~~~~~~~~~

5 Reading and writing of files in the ``gettext`` PO (portable object)

6 format.

9 :license: BSD, see LICENSE for more details.

10"""

11from __future__ import annotations

13import os

14import re

15from collections.abc import Iterable

16from typing import TYPE_CHECKING

18from babel.core import Locale

19from babel.messages.catalog import Catalog, Message

20from babel.util import _cmp, wraptext

22if TYPE_CHECKING:

23 from typing import IO, AnyStr

25 from _typeshed import SupportsWrite

26 from typing_extensions import Literal

29def unescape(string: str) -> str:

30 r"""Reverse `escape` the given string.

32 >>> print(unescape('"Say:\\n \\"hello, world!\\"\\n"'))

33 Say:

34 "hello, world!"

35 <BLANKLINE>

37 :param string: the string to unescape

38 """

39 def replace_escapes(match):

40 m = match.group(1)

41 if m == 'n':

42 return '\n'

43 elif m == 't':

44 return '\t'

45 elif m == 'r':

46 return '\r'

47 # m is \ or "

48 return m

49 return re.compile(r'\\([\\trn"])').sub(replace_escapes, string[1:-1])

52def denormalize(string: str) -> str:

53 r"""Reverse the normalization done by the `normalize` function.

55 >>> print(denormalize(r'''""

56 ... "Say:\n"

57 ... " \"hello, world!\"\n"'''))

58 Say:

59 "hello, world!"

60 <BLANKLINE>

62 >>> print(denormalize(r'''""

63 ... "Say:\n"

64 ... " \"Lorem ipsum dolor sit "

65 ... "amet, consectetur adipisicing"

66 ... " elit, \"\n"'''))

67 Say:

68 "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "

69 <BLANKLINE>

71 :param string: the string to denormalize

72 """

73 if '\n' in string:

74 escaped_lines = string.splitlines()

75 if string.startswith('""'):

76 escaped_lines = escaped_lines[1:]

77 lines = map(unescape, escaped_lines)

78 return ''.join(lines)

79 else:

80 return unescape(string)

83class PoFileError(Exception):

84 """Exception thrown by PoParser when an invalid po file is encountered."""

86 def __init__(self, message: str, catalog: Catalog, line: str, lineno: int) -> None:

87 super().__init__(f'{message} on {lineno}')

88 self.catalog = catalog

89 self.line = line

90 self.lineno = lineno

93class _NormalizedString:

95 def __init__(self, *args: str) -> None:

96 self._strs: list[str] = []

97 for arg in args:

98 self.append(arg)

100 def append(self, s: str) -> None:

101 self._strs.append(s.strip())

102

103 def denormalize(self) -> str:

104 return ''.join(map(unescape, self._strs))

105

106 def __bool__(self) -> bool:

107 return bool(self._strs)

108

109 def __repr__(self) -> str:

110 return os.linesep.join(self._strs)

111

112 def __cmp__(self, other: object) -> int:

113 if not other:

114 return 1

115

116 return _cmp(str(self), str(other))

117

118 def __gt__(self, other: object) -> bool:

119 return self.__cmp__(other) > 0

120

121 def __lt__(self, other: object) -> bool:

122 return self.__cmp__(other) < 0

123

124 def __ge__(self, other: object) -> bool:

125 return self.__cmp__(other) >= 0

126

127 def __le__(self, other: object) -> bool:

128 return self.__cmp__(other) <= 0

129

130 def __eq__(self, other: object) -> bool:

131 return self.__cmp__(other) == 0

132

133 def __ne__(self, other: object) -> bool:

134 return self.__cmp__(other) != 0

135

136

137class PoFileParser:

138 """Support class to read messages from a ``gettext`` PO (portable object) file

139 and add them to a `Catalog`

140

141 See `read_po` for simple cases.

142 """

143

144 _keywords = [

145 'msgid',

146 'msgstr',

147 'msgctxt',

148 'msgid_plural',

149 ]

150

151 def __init__(self, catalog: Catalog, ignore_obsolete: bool = False, abort_invalid: bool = False) -> None:

152 self.catalog = catalog

153 self.ignore_obsolete = ignore_obsolete

154 self.counter = 0

155 self.offset = 0

156 self.abort_invalid = abort_invalid

157 self._reset_message_state()

158

159 def _reset_message_state(self) -> None:

160 self.messages = []

161 self.translations = []

162 self.locations = []

163 self.flags = []

164 self.user_comments = []

165 self.auto_comments = []

166 self.context = None

167 self.obsolete = False

168 self.in_msgid = False

169 self.in_msgstr = False

170 self.in_msgctxt = False

171

172 def _add_message(self) -> None:

173 """

174 Add a message to the catalog based on the current parser state and

175 clear the state ready to process the next message.

176 """

177 self.translations.sort()

178 if len(self.messages) > 1:

179 msgid = tuple(m.denormalize() for m in self.messages)

180 else:

181 msgid = self.messages[0].denormalize()

182 if isinstance(msgid, (list, tuple)):

183 string = ['' for _ in range(self.catalog.num_plurals)]

184 for idx, translation in self.translations:

185 if idx >= self.catalog.num_plurals:

186 self._invalid_pofile("", self.offset, "msg has more translations than num_plurals of catalog")

187 continue

188 string[idx] = translation.denormalize()

189 string = tuple(string)

190 else:

191 string = self.translations[0][1].denormalize()

192 msgctxt = self.context.denormalize() if self.context else None

193 message = Message(msgid, string, list(self.locations), set(self.flags),

194 self.auto_comments, self.user_comments, lineno=self.offset + 1,

195 context=msgctxt)

196 if self.obsolete:

197 if not self.ignore_obsolete:

198 self.catalog.obsolete[msgid] = message

199 else:

200 self.catalog[msgid] = message

201 self.counter += 1

202 self._reset_message_state()

203

204 def _finish_current_message(self) -> None:

205 if self.messages:

206 self._add_message()

207

208 def _process_message_line(self, lineno, line, obsolete=False) -> None:

209 if line.startswith('"'):

210 self._process_string_continuation_line(line, lineno)

211 else:

212 self._process_keyword_line(lineno, line, obsolete)

213

214 def _process_keyword_line(self, lineno, line, obsolete=False) -> None:

215

216 for keyword in self._keywords:

217 try:

218 if line.startswith(keyword) and line[len(keyword)] in [' ', '[']:

219 arg = line[len(keyword):]

220 break

221 except IndexError:

222 self._invalid_pofile(line, lineno, "Keyword must be followed by a string")

223 else:

224 self._invalid_pofile(line, lineno, "Start of line didn't match any expected keyword.")

225 return

226

227 if keyword in ['msgid', 'msgctxt']:

228 self._finish_current_message()

229

230 self.obsolete = obsolete

231

232 # The line that has the msgid is stored as the offset of the msg

233 # should this be the msgctxt if it has one?

234 if keyword == 'msgid':

235 self.offset = lineno

236

237 if keyword in ['msgid', 'msgid_plural']:

238 self.in_msgctxt = False

239 self.in_msgid = True

240 self.messages.append(_NormalizedString(arg))

241

242 elif keyword == 'msgstr':

243 self.in_msgid = False

244 self.in_msgstr = True

245 if arg.startswith('['):

246 idx, msg = arg[1:].split(']', 1)

247 self.translations.append([int(idx), _NormalizedString(msg)])

248 else:

249 self.translations.append([0, _NormalizedString(arg)])

250

251 elif keyword == 'msgctxt':

252 self.in_msgctxt = True

253 self.context = _NormalizedString(arg)

254

255 def _process_string_continuation_line(self, line, lineno) -> None:

256 if self.in_msgid:

257 s = self.messages[-1]

258 elif self.in_msgstr:

259 s = self.translations[-1][1]

260 elif self.in_msgctxt:

261 s = self.context

262 else:

263 self._invalid_pofile(line, lineno, "Got line starting with \" but not in msgid, msgstr or msgctxt")

264 return

265 s.append(line)

266

267 def _process_comment(self, line) -> None:

268

269 self._finish_current_message()

270

271 if line[1:].startswith(':'):

272 for location in line[2:].lstrip().split():

273 pos = location.rfind(':')

274 if pos >= 0:

275 try:

276 lineno = int(location[pos + 1:])

277 except ValueError:

278 continue

279 self.locations.append((location[:pos], lineno))

280 else:

281 self.locations.append((location, None))

282 elif line[1:].startswith(','):

283 for flag in line[2:].lstrip().split(','):

284 self.flags.append(flag.strip())

285 elif line[1:].startswith('.'):

286 # These are called auto-comments

287 comment = line[2:].strip()

288 if comment: # Just check that we're not adding empty comments

289 self.auto_comments.append(comment)

290 else:

291 # These are called user comments

292 self.user_comments.append(line[1:].strip())

293

294 def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None:

295 """

296 Reads from the file-like object `fileobj` and adds any po file

297 units found in it to the `Catalog` supplied to the constructor.

298 """

299

300 for lineno, line in enumerate(fileobj):

301 line = line.strip()

302 if not isinstance(line, str):

303 line = line.decode(self.catalog.charset)

304 if not line:

305 continue

306 if line.startswith('#'):

307 if line[1:].startswith('~'):

308 self._process_message_line(lineno, line[2:].lstrip(), obsolete=True)

309 else:

310 self._process_comment(line)

311 else:

312 self._process_message_line(lineno, line)

313

314 self._finish_current_message()

315

316 # No actual messages found, but there was some info in comments, from which

317 # we'll construct an empty header message

318 if not self.counter and (self.flags or self.user_comments or self.auto_comments):

319 self.messages.append(_NormalizedString('""'))

320 self.translations.append([0, _NormalizedString('""')])

321 self._add_message()

322

323 def _invalid_pofile(self, line, lineno, msg) -> None:

324 assert isinstance(line, str)

325 if self.abort_invalid:

326 raise PoFileError(msg, self.catalog, line, lineno)

327 print("WARNING:", msg)

328 print(f"WARNING: Problem on line {lineno + 1}: {line!r}")

329

330

331def read_po(

332 fileobj: IO[AnyStr] | Iterable[AnyStr],

333 locale: str | Locale | None = None,

334 domain: str | None = None,

335 ignore_obsolete: bool = False,

336 charset: str | None = None,

337 abort_invalid: bool = False,

338) -> Catalog:

339 """Read messages from a ``gettext`` PO (portable object) file from the given

340 file-like object (or an iterable of lines) and return a `Catalog`.

341

342 >>> from datetime import datetime

343 >>> from io import StringIO

344 >>> buf = StringIO('''

345 ... #: main.py:1

346 ... #, fuzzy, python-format

347 ... msgid "foo %(name)s"

348 ... msgstr "quux %(name)s"

349 ...

350 ... # A user comment

351 ... #. An auto comment

352 ... #: main.py:3

353 ... msgid "bar"

354 ... msgid_plural "baz"

355 ... msgstr[0] "bar"

356 ... msgstr[1] "baaz"

357 ... ''')

358 >>> catalog = read_po(buf)

359 >>> catalog.revision_date = datetime(2007, 4, 1)

360

361 >>> for message in catalog:

362 ... if message.id:

363 ... print((message.id, message.string))

364 ... print(' ', (message.locations, sorted(list(message.flags))))

365 ... print(' ', (message.user_comments, message.auto_comments))

366 (u'foo %(name)s', u'quux %(name)s')

367 ([(u'main.py', 1)], [u'fuzzy', u'python-format'])

368 ([], [])

369 ((u'bar', u'baz'), (u'bar', u'baaz'))

370 ([(u'main.py', 3)], [])

371 ([u'A user comment'], [u'An auto comment'])

372

373 .. versionadded:: 1.0

374 Added support for explicit charset argument.

375

376 :param fileobj: the file-like object (or iterable of lines) to read the PO file from

377 :param locale: the locale identifier or `Locale` object, or `None`

378 if the catalog is not bound to a locale (which basically

379 means it's a template)

380 :param domain: the message domain

381 :param ignore_obsolete: whether to ignore obsolete messages in the input

382 :param charset: the character set of the catalog.

383 :param abort_invalid: abort read if po file is invalid

384 """

385 catalog = Catalog(locale=locale, domain=domain, charset=charset)

386 parser = PoFileParser(catalog, ignore_obsolete, abort_invalid=abort_invalid)

387 parser.parse(fileobj)

388 return catalog

389

390

391WORD_SEP = re.compile('('

392 r'\s+|' # any whitespace

393 r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words

394 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash

395 ')')

396

397

398def escape(string: str) -> str:

399 r"""Escape the given string so that it can be included in double-quoted

400 strings in ``PO`` files.

401

402 >>> escape('''Say:

403 ... "hello, world!"

404 ... ''')

405 '"Say:\\n \\"hello, world!\\"\\n"'

406

407 :param string: the string to escape

408 """

409 return '"%s"' % string.replace('\\', '\\\\') \

410 .replace('\t', '\\t') \

411 .replace('\r', '\\r') \

412 .replace('\n', '\\n') \

413 .replace('\"', '\\"')

414

415

416def normalize(string: str, prefix: str = '', width: int = 76) -> str:

417 r"""Convert a string into a format that is appropriate for .po files.

418

419 >>> print(normalize('''Say:

420 ... "hello, world!"

421 ... ''', width=None))

422 ""

423 "Say:\n"

424 " \"hello, world!\"\n"

425

426 >>> print(normalize('''Say:

427 ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "

428 ... ''', width=32))

429 ""

430 "Say:\n"

431 " \"Lorem ipsum dolor sit "

432 "amet, consectetur adipisicing"

433 " elit, \"\n"

434

435 :param string: the string to normalize

436 :param prefix: a string that should be prepended to every line

437 :param width: the maximum line width; use `None`, 0, or a negative number

438 to completely disable line wrapping

439 """

440 if width and width > 0:

441 prefixlen = len(prefix)

442 lines = []

443 for line in string.splitlines(True):

444 if len(escape(line)) + prefixlen > width:

445 chunks = WORD_SEP.split(line)

446 chunks.reverse()

447 while chunks:

448 buf = []

449 size = 2

450 while chunks:

451 length = len(escape(chunks[-1])) - 2 + prefixlen

452 if size + length < width:

453 buf.append(chunks.pop())

454 size += length

455 else:

456 if not buf:

457 # handle long chunks by putting them on a

458 # separate line

459 buf.append(chunks.pop())

460 break

461 lines.append(''.join(buf))

462 else:

463 lines.append(line)

464 else:

465 lines = string.splitlines(True)

466

467 if len(lines) <= 1:

468 return escape(string)

469

470 # Remove empty trailing line

471 if lines and not lines[-1]:

472 del lines[-1]

473 lines[-1] += '\n'

474 return '""\n' + '\n'.join([(prefix + escape(line)) for line in lines])

475

476

477def write_po(

478 fileobj: SupportsWrite[bytes],

479 catalog: Catalog,

480 width: int = 76,

481 no_location: bool = False,

482 omit_header: bool = False,

483 sort_output: bool = False,

484 sort_by_file: bool = False,

485 ignore_obsolete: bool = False,

486 include_previous: bool = False,

487 include_lineno: bool = True,

488) -> None:

489 r"""Write a ``gettext`` PO (portable object) template file for a given

490 message catalog to the provided file-like object.

491

492 >>> catalog = Catalog()

493 >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)],

494 ... flags=('fuzzy',))

495 <Message...>

496 >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)])

497 <Message...>

498 >>> from io import BytesIO

499 >>> buf = BytesIO()

500 >>> write_po(buf, catalog, omit_header=True)

501 >>> print(buf.getvalue().decode("utf8"))

502 #: main.py:1

503 #, fuzzy, python-format

504 msgid "foo %(name)s"

505 msgstr ""

506 <BLANKLINE>

507 #: main.py:3

508 msgid "bar"

509 msgid_plural "baz"

510 msgstr[0] ""

511 msgstr[1] ""

512 <BLANKLINE>

513 <BLANKLINE>

514

515 :param fileobj: the file-like object to write to

516 :param catalog: the `Catalog` instance

517 :param width: the maximum line width for the generated output; use `None`,

518 0, or a negative number to completely disable line wrapping

519 :param no_location: do not emit a location comment for every message

520 :param omit_header: do not include the ``msgid ""`` entry at the top of the

521 output

522 :param sort_output: whether to sort the messages in the output by msgid

523 :param sort_by_file: whether to sort the messages in the output by their

524 locations

525 :param ignore_obsolete: whether to ignore obsolete messages and not include

526 them in the output; by default they are included as

527 comments

528 :param include_previous: include the old msgid as a comment when

529 updating the catalog

530 :param include_lineno: include line number in the location comment

531 """

532

533 sort_by = None

534 if sort_output:

535 sort_by = "message"

536 elif sort_by_file:

537 sort_by = "location"

538

539 for line in generate_po(

540 catalog,

541 ignore_obsolete=ignore_obsolete,

542 include_lineno=include_lineno,

543 include_previous=include_previous,

544 no_location=no_location,

545 omit_header=omit_header,

546 sort_by=sort_by,

547 width=width,

548 ):

549 if isinstance(line, str):

550 line = line.encode(catalog.charset, 'backslashreplace')

551 fileobj.write(line)

552

553

554def generate_po(

555 catalog: Catalog,

556 *,

557 ignore_obsolete: bool = False,

558 include_lineno: bool = True,

559 include_previous: bool = False,

560 no_location: bool = False,

561 omit_header: bool = False,

562 sort_by: Literal["message", "location"] | None = None,

563 width: int = 76,

564) -> Iterable[str]:

565 r"""Yield text strings representing a ``gettext`` PO (portable object) file.

566

567 See `write_po()` for a more detailed description.

568 """

569 # xgettext always wraps comments even if --no-wrap is passed;

570 # provide the same behaviour

571 comment_width = width if width and width > 0 else 76

572

573 def _format_comment(comment, prefix=''):

574 for line in wraptext(comment, comment_width):

575 yield f"#{prefix} {line.strip()}\n"

576

577 def _format_message(message, prefix=''):

578 if isinstance(message.id, (list, tuple)):

579 if message.context:

580 yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n"

581 yield f"{prefix}msgid {normalize(message.id[0], prefix=prefix, width=width)}\n"

582 yield f"{prefix}msgid_plural {normalize(message.id[1], prefix=prefix, width=width)}\n"

583

584 for idx in range(catalog.num_plurals):

585 try:

586 string = message.string[idx]

587 except IndexError:

588 string = ''

589 yield f"{prefix}msgstr[{idx:d}] {normalize(string, prefix=prefix, width=width)}\n"

590 else:

591 if message.context:

592 yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n"

593 yield f"{prefix}msgid {normalize(message.id, prefix=prefix, width=width)}\n"

594 yield f"{prefix}msgstr {normalize(message.string or '', prefix=prefix, width=width)}\n"

595

596 for message in _sort_messages(catalog, sort_by=sort_by):

597 if not message.id: # This is the header "message"

598 if omit_header:

599 continue

600 comment_header = catalog.header_comment

601 if width and width > 0:

602 lines = []

603 for line in comment_header.splitlines():

604 lines += wraptext(line, width=width,

605 subsequent_indent='# ')

606 comment_header = '\n'.join(lines)

607 yield f"{comment_header}\n"

608

609 for comment in message.user_comments:

610 yield from _format_comment(comment)

611 for comment in message.auto_comments:

612 yield from _format_comment(comment, prefix='.')

613

614 if not no_location:

615 locs = []

616

617 # sort locations by filename and lineno.

618 # if there's no <int> as lineno, use `-1`.

619 # if no sorting possible, leave unsorted.

620 # (see issue #606)

621 try:

622 locations = sorted(message.locations,

623 key=lambda x: (x[0], isinstance(x[1], int) and x[1] or -1))

624 except TypeError: # e.g. "TypeError: unorderable types: NoneType() < int()"

625 locations = message.locations

626

627 for filename, lineno in locations:

628 location = filename.replace(os.sep, '/')

629 if lineno and include_lineno:

630 location = f"{location}:{lineno:d}"

631 if location not in locs:

632 locs.append(location)

633 yield from _format_comment(' '.join(locs), prefix=':')

634 if message.flags:

635 yield f"#{', '.join(['', *sorted(message.flags)])}\n"

636

637 if message.previous_id and include_previous:

638 yield from _format_comment(

639 f'msgid {normalize(message.previous_id[0], width=width)}',

640 prefix='|',

641 )

642 if len(message.previous_id) > 1:

643 norm_previous_id = normalize(message.previous_id[1], width=width)

644 yield from _format_comment(f'msgid_plural {norm_previous_id}', prefix='|')

645

646 yield from _format_message(message)

647 yield '\n'

648

649 if not ignore_obsolete:

650 for message in _sort_messages(

651 catalog.obsolete.values(),

652 sort_by=sort_by,

653 ):

654 for comment in message.user_comments:

655 yield from _format_comment(comment)

656 yield from _format_message(message, prefix='#~ ')

657 yield '\n'

658

659

660def _sort_messages(messages: Iterable[Message], sort_by: Literal["message", "location"] | None) -> list[Message]:

661 """

662 Sort the given message iterable by the given criteria.

663

664 Always returns a list.

665

666 :param messages: An iterable of Messages.

667 :param sort_by: Sort by which criteria? Options are `message` and `location`.

668 :return: list[Message]

669 """

670 messages = list(messages)

671 if sort_by == "message":

672 messages.sort()

673 elif sort_by == "location":

674 messages.sort(key=lambda m: m.locations)

675 return messages