Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/babel/messages/pofile.py: 34%

1"""

2 babel.messages.pofile

3 ~~~~~~~~~~~~~~~~~~~~~

5 Reading and writing of files in the ``gettext`` PO (portable object)

6 format.

9 :license: BSD, see LICENSE for more details.

10"""

11from __future__ import annotations

13import os

14import re

15from collections.abc import Iterable

16from typing import TYPE_CHECKING, Literal

18from babel.core import Locale

19from babel.messages.catalog import Catalog, Message

20from babel.util import TextWrapper

22if TYPE_CHECKING:

23 from typing import IO, AnyStr

25 from _typeshed import SupportsWrite

28_unescape_re = re.compile(r'\\([\\trn"])')

31def unescape(string: str) -> str:

32 r"""Reverse `escape` the given string.

34 >>> print(unescape('"Say:\\n \\"hello, world!\\"\\n"'))

35 Say:

36 "hello, world!"

37 <BLANKLINE>

39 :param string: the string to unescape

40 """

41 def replace_escapes(match):

42 m = match.group(1)

43 if m == 'n':

44 return '\n'

45 elif m == 't':

46 return '\t'

47 elif m == 'r':

48 return '\r'

49 # m is \ or "

50 return m

52 if "\\" not in string: # Fast path: there's nothing to unescape

53 return string[1:-1]

54 return _unescape_re.sub(replace_escapes, string[1:-1])

57def denormalize(string: str) -> str:

58 r"""Reverse the normalization done by the `normalize` function.

60 >>> print(denormalize(r'''""

61 ... "Say:\n"

62 ... " \"hello, world!\"\n"'''))

63 Say:

64 "hello, world!"

65 <BLANKLINE>

67 >>> print(denormalize(r'''""

68 ... "Say:\n"

69 ... " \"Lorem ipsum dolor sit "

70 ... "amet, consectetur adipisicing"

71 ... " elit, \"\n"'''))

72 Say:

73 "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "

74 <BLANKLINE>

76 :param string: the string to denormalize

77 """

78 if '\n' in string:

79 escaped_lines = string.splitlines()

80 if string.startswith('""'):

81 escaped_lines = escaped_lines[1:]

82 return ''.join(map(unescape, escaped_lines))

83 else:

84 return unescape(string)

87def _extract_locations(line: str) -> list[str]:

88 """Extract locations from location comments.

90 Locations are extracted while properly handling First Strong

91 Isolate (U+2068) and Pop Directional Isolate (U+2069), used by

92 gettext to enclose filenames with spaces and tabs in their names.

93 """

94 if "\u2068" not in line and "\u2069" not in line:

95 return line.lstrip().split()

97 locations = []

98 location = ""

99 in_filename = False

100 for c in line:

101 if c == "\u2068":

102 if in_filename:

103 raise ValueError("location comment contains more First Strong Isolate "

104 "characters, than Pop Directional Isolate characters")

105 in_filename = True

106 continue

107 elif c == "\u2069":

108 if not in_filename:

109 raise ValueError("location comment contains more Pop Directional Isolate "

110 "characters, than First Strong Isolate characters")

111 in_filename = False

112 continue

113 elif c == " ":

114 if in_filename:

115 location += c

116 elif location:

117 locations.append(location)

118 location = ""

119 else:

120 location += c

121 else:

122 if location:

123 if in_filename:

124 raise ValueError("location comment contains more First Strong Isolate "

125 "characters, than Pop Directional Isolate characters")

126 locations.append(location)

127

128 return locations

129

130

131class PoFileError(Exception):

132 """Exception thrown by PoParser when an invalid po file is encountered."""

133

134 def __init__(self, message: str, catalog: Catalog, line: str, lineno: int) -> None:

135 super().__init__(f'{message} on {lineno}')

136 self.catalog = catalog

137 self.line = line

138 self.lineno = lineno

139

140

141class _NormalizedString(list):

142 def __init__(self, *args: str) -> None:

143 super().__init__(map(str.strip, args))

144

145 def denormalize(self) -> str:

146 if not self:

147 return ""

148 return ''.join(map(unescape, self))

149

150

151class PoFileParser:

152 """Support class to read messages from a ``gettext`` PO (portable object) file

153 and add them to a `Catalog`

154

155 See `read_po` for simple cases.

156 """

157

158 def __init__(self, catalog: Catalog, ignore_obsolete: bool = False, abort_invalid: bool = False) -> None:

159 self.catalog = catalog

160 self.ignore_obsolete = ignore_obsolete

161 self.counter = 0

162 self.offset = 0

163 self.abort_invalid = abort_invalid

164 self._reset_message_state()

165

166 def _reset_message_state(self) -> None:

167 self.messages = []

168 self.translations = []

169 self.locations = []

170 self.flags = []

171 self.user_comments = []

172 self.auto_comments = []

173 self.context = None

174 self.obsolete = False

175 self.in_msgid = False

176 self.in_msgstr = False

177 self.in_msgctxt = False

178

179 def _add_message(self) -> None:

180 """

181 Add a message to the catalog based on the current parser state and

182 clear the state ready to process the next message.

183 """

184 if len(self.messages) > 1:

185 msgid = tuple(m.denormalize() for m in self.messages)

186 string = ['' for _ in range(self.catalog.num_plurals)]

187 for idx, translation in sorted(self.translations):

188 if idx >= self.catalog.num_plurals:

189 self._invalid_pofile("", self.offset, "msg has more translations than num_plurals of catalog")

190 continue

191 string[idx] = translation.denormalize()

192 string = tuple(string)

193 else:

194 msgid = self.messages[0].denormalize()

195 string = self.translations[0][1].denormalize()

196 msgctxt = self.context.denormalize() if self.context else None

197 message = Message(msgid, string, self.locations, self.flags,

198 self.auto_comments, self.user_comments, lineno=self.offset + 1,

199 context=msgctxt)

200 if self.obsolete:

201 if not self.ignore_obsolete:

202 self.catalog.obsolete[self.catalog._key_for(msgid, msgctxt)] = message

203 else:

204 self.catalog[msgid] = message

205 self.counter += 1

206 self._reset_message_state()

207

208 def _finish_current_message(self) -> None:

209 if self.messages:

210 if not self.translations:

211 self._invalid_pofile("", self.offset, f"missing msgstr for msgid '{self.messages[0].denormalize()}'")

212 self.translations.append([0, _NormalizedString()])

213 self._add_message()

214

215 def _process_message_line(self, lineno, line, obsolete=False) -> None:

216 if not line:

217 return

218 if line[0] == '"':

219 self._process_string_continuation_line(line, lineno)

220 else:

221 self._process_keyword_line(lineno, line, obsolete)

222

223 def _process_keyword_line(self, lineno, line, obsolete=False) -> None:

224 keyword, _, arg = line.partition(' ')

225

226 if keyword in ['msgid', 'msgctxt']:

227 self._finish_current_message()

228

229 self.obsolete = obsolete

230

231 # The line that has the msgid is stored as the offset of the msg

232 # should this be the msgctxt if it has one?

233 if keyword == 'msgid':

234 self.offset = lineno

235

236 if keyword in ['msgid', 'msgid_plural']:

237 self.in_msgctxt = False

238 self.in_msgid = True

239 self.messages.append(_NormalizedString(arg))

240 return

241

242 if keyword == 'msgctxt':

243 self.in_msgctxt = True

244 self.context = _NormalizedString(arg)

245 return

246

247 if keyword == 'msgstr' or keyword.startswith('msgstr['):

248 self.in_msgid = False

249 self.in_msgstr = True

250 kwarg, has_bracket, idxarg = keyword.partition('[')

251 idx = int(idxarg[:-1]) if has_bracket else 0

252 s = _NormalizedString(arg) if arg != '""' else _NormalizedString()

253 self.translations.append([idx, s])

254 return

255

256 self._invalid_pofile(line, lineno, "Unknown or misformatted keyword")

257

258 def _process_string_continuation_line(self, line, lineno) -> None:

259 if self.in_msgid:

260 s = self.messages[-1]

261 elif self.in_msgstr:

262 s = self.translations[-1][1]

263 elif self.in_msgctxt:

264 s = self.context

265 else:

266 self._invalid_pofile(line, lineno, "Got line starting with \" but not in msgid, msgstr or msgctxt")

267 return

268 s.append(line.strip()) # For performance reasons, `NormalizedString` doesn't strip internally

269

270 def _process_comment(self, line) -> None:

271

272 self._finish_current_message()

273

274 prefix = line[:2]

275 if prefix == '#:':

276 for location in _extract_locations(line[2:]):

277 a, colon, b = location.rpartition(':')

278 if colon:

279 try:

280 self.locations.append((a, int(b)))

281 except ValueError:

282 continue

283 else: # No line number specified

284 self.locations.append((location, None))

285 return

286

287 if prefix == '#,':

288 self.flags.extend(flag.strip() for flag in line[2:].lstrip().split(','))

289 return

290

291 if prefix == '#.':

292 # These are called auto-comments

293 comment = line[2:].strip()

294 if comment: # Just check that we're not adding empty comments

295 self.auto_comments.append(comment)

296 return

297

298 # These are called user comments

299 self.user_comments.append(line[1:].strip())

300

301 def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None:

302 """

303 Reads from the file-like object (or iterable of string-likes) `fileobj`

304 and adds any po file units found in it to the `Catalog`

305 supplied to the constructor.

306

307 All of the items in the iterable must be the same type; either `str`

308 or `bytes` (decoded with the catalog charset), but not a mixture.

309 """

310 needs_decode = None

311

312 for lineno, line in enumerate(fileobj):

313 line = line.strip()

314 if needs_decode is None:

315 # If we don't yet know whether we need to decode,

316 # let's find out now.

317 needs_decode = not isinstance(line, str)

318 if not line:

319 continue

320 if needs_decode:

321 line = line.decode(self.catalog.charset)

322 if line[0] == '#':

323 if line[:2] == '#~':

324 self._process_message_line(lineno, line[2:].lstrip(), obsolete=True)

325 else:

326 try:

327 self._process_comment(line)

328 except ValueError as exc:

329 self._invalid_pofile(line, lineno, str(exc))

330 else:

331 self._process_message_line(lineno, line)

332

333 self._finish_current_message()

334

335 # No actual messages found, but there was some info in comments, from which

336 # we'll construct an empty header message

337 if not self.counter and (self.flags or self.user_comments or self.auto_comments):

338 self.messages.append(_NormalizedString())

339 self.translations.append([0, _NormalizedString()])

340 self._add_message()

341

342 def _invalid_pofile(self, line, lineno, msg) -> None:

343 assert isinstance(line, str)

344 if self.abort_invalid:

345 raise PoFileError(msg, self.catalog, line, lineno)

346 print("WARNING:", msg)

347 print(f"WARNING: Problem on line {lineno + 1}: {line!r}")

348

349

350def read_po(

351 fileobj: IO[AnyStr] | Iterable[AnyStr],

352 locale: Locale | str | None = None,

353 domain: str | None = None,

354 ignore_obsolete: bool = False,

355 charset: str | None = None,

356 abort_invalid: bool = False,

357) -> Catalog:

358 """Read messages from a ``gettext`` PO (portable object) file from the given

359 file-like object (or an iterable of lines) and return a `Catalog`.

360

361 >>> from datetime import datetime

362 >>> from io import StringIO

363 >>> buf = StringIO('''

364 ... #: main.py:1

365 ... #, fuzzy, python-format

366 ... msgid "foo %(name)s"

367 ... msgstr "quux %(name)s"

368 ...

369 ... # A user comment

370 ... #. An auto comment

371 ... #: main.py:3

372 ... msgid "bar"

373 ... msgid_plural "baz"

374 ... msgstr[0] "bar"

375 ... msgstr[1] "baaz"

376 ... ''')

377 >>> catalog = read_po(buf)

378 >>> catalog.revision_date = datetime(2007, 4, 1)

379

380 >>> for message in catalog:

381 ... if message.id:

382 ... print((message.id, message.string))

383 ... print(' ', (message.locations, sorted(list(message.flags))))

384 ... print(' ', (message.user_comments, message.auto_comments))

385 ('foo %(name)s', 'quux %(name)s')

386 ([('main.py', 1)], ['fuzzy', 'python-format'])

387 ([], [])

388 (('bar', 'baz'), ('bar', 'baaz'))

389 ([('main.py', 3)], [])

390 (['A user comment'], ['An auto comment'])

391

392 .. versionadded:: 1.0

393 Added support for explicit charset argument.

394

395 :param fileobj: the file-like object (or iterable of lines) to read the PO file from

396 :param locale: the locale identifier or `Locale` object, or `None`

397 if the catalog is not bound to a locale (which basically

398 means it's a template)

399 :param domain: the message domain

400 :param ignore_obsolete: whether to ignore obsolete messages in the input

401 :param charset: the character set of the catalog.

402 :param abort_invalid: abort read if po file is invalid

403 """

404 catalog = Catalog(locale=locale, domain=domain, charset=charset)

405 parser = PoFileParser(catalog, ignore_obsolete, abort_invalid=abort_invalid)

406 parser.parse(fileobj)

407 return catalog

408

409

410WORD_SEP = re.compile('('

411 r'\s+|' # any whitespace

412 r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words

413 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash

414 ')')

415

416

417def escape(string: str) -> str:

418 r"""Escape the given string so that it can be included in double-quoted

419 strings in ``PO`` files.

420

421 >>> escape('''Say:

422 ... "hello, world!"

423 ... ''')

424 '"Say:\\n \\"hello, world!\\"\\n"'

425

426 :param string: the string to escape

427 """

428 return '"%s"' % string.replace('\\', '\\\\') \

429 .replace('\t', '\\t') \

430 .replace('\r', '\\r') \

431 .replace('\n', '\\n') \

432 .replace('\"', '\\"')

433

434

435def normalize(string: str, prefix: str = '', width: int = 76) -> str:

436 r"""Convert a string into a format that is appropriate for .po files.

437

438 >>> print(normalize('''Say:

439 ... "hello, world!"

440 ... ''', width=None))

441 ""

442 "Say:\n"

443 " \"hello, world!\"\n"

444

445 >>> print(normalize('''Say:

446 ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "

447 ... ''', width=32))

448 ""

449 "Say:\n"

450 " \"Lorem ipsum dolor sit "

451 "amet, consectetur adipisicing"

452 " elit, \"\n"

453

454 :param string: the string to normalize

455 :param prefix: a string that should be prepended to every line

456 :param width: the maximum line width; use `None`, 0, or a negative number

457 to completely disable line wrapping

458 """

459 if width and width > 0:

460 prefixlen = len(prefix)

461 lines = []

462 for line in string.splitlines(True):

463 if len(escape(line)) + prefixlen > width:

464 chunks = WORD_SEP.split(line)

465 chunks.reverse()

466 while chunks:

467 buf = []

468 size = 2

469 while chunks:

470 length = len(escape(chunks[-1])) - 2 + prefixlen

471 if size + length < width:

472 buf.append(chunks.pop())

473 size += length

474 else:

475 if not buf:

476 # handle long chunks by putting them on a

477 # separate line

478 buf.append(chunks.pop())

479 break

480 lines.append(''.join(buf))

481 else:

482 lines.append(line)

483 else:

484 lines = string.splitlines(True)

485

486 if len(lines) <= 1:

487 return escape(string)

488

489 # Remove empty trailing line

490 if lines and not lines[-1]:

491 del lines[-1]

492 lines[-1] += '\n'

493 return '""\n' + '\n'.join([(prefix + escape(line)) for line in lines])

494

495

496def _enclose_filename_if_necessary(filename: str) -> str:

497 """Enclose filenames which include white spaces or tabs.

498

499 Do the same as gettext and enclose filenames which contain white

500 spaces or tabs with First Strong Isolate (U+2068) and Pop

501 Directional Isolate (U+2069).

502 """

503 if " " not in filename and "\t" not in filename:

504 return filename

505

506 if not filename.startswith("\u2068"):

507 filename = "\u2068" + filename

508 if not filename.endswith("\u2069"):

509 filename += "\u2069"

510 return filename

511

512

513def write_po(

514 fileobj: SupportsWrite[bytes],

515 catalog: Catalog,

516 width: int = 76,

517 no_location: bool = False,

518 omit_header: bool = False,

519 sort_output: bool = False,

520 sort_by_file: bool = False,

521 ignore_obsolete: bool = False,

522 include_previous: bool = False,

523 include_lineno: bool = True,

524) -> None:

525 r"""Write a ``gettext`` PO (portable object) template file for a given

526 message catalog to the provided file-like object.

527

528 >>> catalog = Catalog()

529 >>> catalog.add('foo %(name)s', locations=[('main.py', 1)],

530 ... flags=('fuzzy',))

531 <Message...>

532 >>> catalog.add(('bar', 'baz'), locations=[('main.py', 3)])

533 <Message...>

534 >>> from io import BytesIO

535 >>> buf = BytesIO()

536 >>> write_po(buf, catalog, omit_header=True)

537 >>> print(buf.getvalue().decode("utf8"))

538 #: main.py:1

539 #, fuzzy, python-format

540 msgid "foo %(name)s"

541 msgstr ""

542 <BLANKLINE>

543 #: main.py:3

544 msgid "bar"

545 msgid_plural "baz"

546 msgstr[0] ""

547 msgstr[1] ""

548 <BLANKLINE>

549 <BLANKLINE>

550

551 :param fileobj: the file-like object to write to

552 :param catalog: the `Catalog` instance

553 :param width: the maximum line width for the generated output; use `None`,

554 0, or a negative number to completely disable line wrapping

555 :param no_location: do not emit a location comment for every message

556 :param omit_header: do not include the ``msgid ""`` entry at the top of the

557 output

558 :param sort_output: whether to sort the messages in the output by msgid

559 :param sort_by_file: whether to sort the messages in the output by their

560 locations

561 :param ignore_obsolete: whether to ignore obsolete messages and not include

562 them in the output; by default they are included as

563 comments

564 :param include_previous: include the old msgid as a comment when

565 updating the catalog

566 :param include_lineno: include line number in the location comment

567 """

568

569 sort_by = None

570 if sort_output:

571 sort_by = "message"

572 elif sort_by_file:

573 sort_by = "location"

574

575 for line in generate_po(

576 catalog,

577 ignore_obsolete=ignore_obsolete,

578 include_lineno=include_lineno,

579 include_previous=include_previous,

580 no_location=no_location,

581 omit_header=omit_header,

582 sort_by=sort_by,

583 width=width,

584 ):

585 if isinstance(line, str):

586 line = line.encode(catalog.charset, 'backslashreplace')

587 fileobj.write(line)

588

589

590def generate_po(

591 catalog: Catalog,

592 *,

593 ignore_obsolete: bool = False,

594 include_lineno: bool = True,

595 include_previous: bool = False,

596 no_location: bool = False,

597 omit_header: bool = False,

598 sort_by: Literal["message", "location"] | None = None,

599 width: int = 76,

600) -> Iterable[str]:

601 r"""Yield text strings representing a ``gettext`` PO (portable object) file.

602

603 See `write_po()` for a more detailed description.

604 """

605 # xgettext always wraps comments even if --no-wrap is passed;

606 # provide the same behaviour

607 comment_width = width if width and width > 0 else 76

608

609 comment_wrapper = TextWrapper(width=comment_width, break_long_words=False)

610 header_wrapper = TextWrapper(width=width, subsequent_indent="# ", break_long_words=False)

611

612 def _format_comment(comment, prefix=''):

613 for line in comment_wrapper.wrap(comment):

614 yield f"#{prefix} {line.strip()}\n"

615

616 def _format_message(message, prefix=''):

617 if isinstance(message.id, (list, tuple)):

618 if message.context:

619 yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n"

620 yield f"{prefix}msgid {normalize(message.id[0], prefix=prefix, width=width)}\n"

621 yield f"{prefix}msgid_plural {normalize(message.id[1], prefix=prefix, width=width)}\n"

622

623 for idx in range(catalog.num_plurals):

624 try:

625 string = message.string[idx]

626 except IndexError:

627 string = ''

628 yield f"{prefix}msgstr[{idx:d}] {normalize(string, prefix=prefix, width=width)}\n"

629 else:

630 if message.context:

631 yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n"

632 yield f"{prefix}msgid {normalize(message.id, prefix=prefix, width=width)}\n"

633 yield f"{prefix}msgstr {normalize(message.string or '', prefix=prefix, width=width)}\n"

634

635 for message in _sort_messages(catalog, sort_by=sort_by):

636 if not message.id: # This is the header "message"

637 if omit_header:

638 continue

639 comment_header = catalog.header_comment

640 if width and width > 0:

641 lines = []

642 for line in comment_header.splitlines():

643 lines += header_wrapper.wrap(line)

644 comment_header = '\n'.join(lines)

645 yield f"{comment_header}\n"

646

647 for comment in message.user_comments:

648 yield from _format_comment(comment)

649 for comment in message.auto_comments:

650 yield from _format_comment(comment, prefix='.')

651

652 if not no_location:

653 locs = []

654

655 # sort locations by filename and lineno.

656 # if there's no <int> as lineno, use `-1`.

657 # if no sorting possible, leave unsorted.

658 # (see issue #606)

659 try:

660 locations = sorted(message.locations,

661 key=lambda x: (x[0], isinstance(x[1], int) and x[1] or -1))

662 except TypeError: # e.g. "TypeError: unorderable types: NoneType() < int()"

663 locations = message.locations

664

665 for filename, lineno in locations:

666 location = filename.replace(os.sep, '/')

667 location = _enclose_filename_if_necessary(location)

668 if lineno and include_lineno:

669 location = f"{location}:{lineno:d}"

670 if location not in locs:

671 locs.append(location)

672 yield from _format_comment(' '.join(locs), prefix=':')

673 if message.flags:

674 yield f"#{', '.join(['', *sorted(message.flags)])}\n"

675

676 if message.previous_id and include_previous:

677 yield from _format_comment(

678 f'msgid {normalize(message.previous_id[0], width=width)}',

679 prefix='|',

680 )

681 if len(message.previous_id) > 1:

682 norm_previous_id = normalize(message.previous_id[1], width=width)

683 yield from _format_comment(f'msgid_plural {norm_previous_id}', prefix='|')

684

685 yield from _format_message(message)

686 yield '\n'

687

688 if not ignore_obsolete:

689 for message in _sort_messages(

690 catalog.obsolete.values(),

691 sort_by=sort_by,

692 ):

693 for comment in message.user_comments:

694 yield from _format_comment(comment)

695 yield from _format_message(message, prefix='#~ ')

696 yield '\n'

697

698

699def _sort_messages(messages: Iterable[Message], sort_by: Literal["message", "location"] | None) -> list[Message]:

700 """

701 Sort the given message iterable by the given criteria.

702

703 Always returns a list.

704

705 :param messages: An iterable of Messages.

706 :param sort_by: Sort by which criteria? Options are `message` and `location`.

707 :return: list[Message]

708 """

709 messages = list(messages)

710 if sort_by == "message":

711 messages.sort()

712 elif sort_by == "location":

713 messages.sort(key=lambda m: m.locations)

714 return messages