Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/generic/

4# Redistribution and use in source and binary forms, with or without

5# modification, are permitted provided that the following conditions are

6# met:

8# * Redistributions of source code must retain the above copyright notice,

9# this list of conditions and the following disclaimer.

10# * Redistributions in binary form must reproduce the above copyright notice,

11# this list of conditions and the following disclaimer in the documentation

12# and/or other materials provided with the distribution.

13# * The name of the author may not be used to endorse or promote products

14# derived from this software without specific prior written permission.

15#

16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

17# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

18# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

19# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

20# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

21# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

22# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

23# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

24# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

25# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

26# POSSIBILITY OF SUCH DAMAGE.

27import binascii

28import codecs

29import hashlib

30import re

31import sys

32from binascii import unhexlify

33from collections.abc import Sequence

34from math import log10

35from struct import iter_unpack

36from typing import Any, Callable, ClassVar, Optional, Union, cast

38if sys.version_info[:2] >= (3, 10):

39 from typing import TypeGuard

40else:

41 from typing_extensions import TypeGuard # PEP 647

43from .._codecs import _pdfdoc_encoding_rev

44from .._protocols import PdfObjectProtocol, PdfWriterProtocol

45from .._utils import (

46 StreamType,

47 classproperty,

48 deprecation_no_replacement,

49 deprecation_with_replacement,

50 logger_warning,

51 read_non_whitespace,

52 read_until_regex,

53)

54from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError

56__author__ = "Mathieu Fenniak"

57__author_email__ = "biziqe@mathieu.fenniak.net"

60class PdfObject(PdfObjectProtocol):

61 # function for calculating a hash value

62 hash_func: Callable[..., "hashlib._Hash"] = hashlib.sha1

63 indirect_reference: Optional["IndirectObject"]

65 def hash_bin(self) -> int:

66 """

67 Used to detect modified object.

69 Returns:

70 Hash considering type and value.

72 """

73 raise NotImplementedError(

74 f"{self.__class__.__name__} does not implement .hash_bin() so far"

75 )

77 def hash_value_data(self) -> bytes:

78 return f"{self}".encode()

80 def hash_value(self) -> bytes:

81 return (

82 f"{self.__class__.__name__}:"

83 f"{self.hash_func(self.hash_value_data()).hexdigest()}"

84 ).encode()

86 def replicate(

87 self,

88 pdf_dest: PdfWriterProtocol,

89 ) -> "PdfObject":

90 """

91 Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter)

92 without ensuring links. This is used in clone_document_from_root with incremental = True.

94 Args:

95 pdf_dest: Target to clone to.

97 Returns:

98 The cloned PdfObject

100 """

101 return self.clone(pdf_dest)

102

103 def clone(

104 self,

105 pdf_dest: PdfWriterProtocol,

106 force_duplicate: bool = False,

107 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

108 ) -> "PdfObject":

109 """

110 Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter).

111

112 By default, this method will call ``_reference_clone`` (see ``_reference``).

113

114

115 Args:

116 pdf_dest: Target to clone to.

117 force_duplicate: By default, if the object has already been cloned and referenced,

118 the copy will be returned; when ``True``, a new copy will be created.

119 (Default value = ``False``)

120 ignore_fields: List/tuple of field names (for dictionaries) that will be ignored

121 during cloning (applies to children duplication as well). If fields are to be

122 considered for a limited number of levels, you have to add it as integer, for

123 example ``[1,"/B","/TOTO"]`` means that ``"/B"`` will be ignored at the first

124 level only but ``"/TOTO"`` on all levels.

125

126 Returns:

127 The cloned PdfObject

128

129 """

130 raise NotImplementedError(

131 f"{self.__class__.__name__} does not implement .clone so far"

132 )

133

134 def _reference_clone(

135 self, clone: Any, pdf_dest: PdfWriterProtocol, force_duplicate: bool = False

136 ) -> PdfObjectProtocol:

137 """

138 Reference the object within the _objects of pdf_dest only if

139 indirect_reference attribute exists (which means the objects was

140 already identified in xref/xobjstm) if object has been already

141 referenced do nothing.

142

143 Args:

144 clone:

145 pdf_dest:

146

147 Returns:

148 The clone

149

150 """

151 try:

152 if not force_duplicate and clone.indirect_reference.pdf == pdf_dest:

153 return clone

154 except Exception:

155 pass

156 # if hasattr(clone, "indirect_reference"):

157 try:

158 ind = self.indirect_reference

159 except AttributeError:

160 return clone

161 if (

162 pdf_dest.incremental

163 and ind is not None

164 and ind.pdf == pdf_dest._reader

165 and ind.idnum <= len(pdf_dest._objects)

166 ):

167 i = ind.idnum

168 else:

169 i = len(pdf_dest._objects) + 1

170 if ind is not None:

171 if id(ind.pdf) not in pdf_dest._id_translated:

172 pdf_dest._id_translated[id(ind.pdf)] = {}

173 pdf_dest._id_translated[id(ind.pdf)]["PreventGC"] = ind.pdf # type: ignore[index]

174 if (

175 not force_duplicate

176 and ind.idnum in pdf_dest._id_translated[id(ind.pdf)]

177 ):

178 obj = pdf_dest.get_object(

179 pdf_dest._id_translated[id(ind.pdf)][ind.idnum]

180 )

181 assert obj is not None

182 return obj

183 pdf_dest._id_translated[id(ind.pdf)][ind.idnum] = i

184 try:

185 pdf_dest._objects[i - 1] = clone

186 except IndexError:

187 pdf_dest._objects.append(clone)

188 i = len(pdf_dest._objects)

189 clone.indirect_reference = IndirectObject(i, 0, pdf_dest)

190 return clone

191

192 def get_object(self) -> Optional["PdfObject"]:

193 """Resolve indirect references."""

194 return self

195

196 def write_to_stream(

197 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

198 ) -> None:

199 raise NotImplementedError

200

201

202class NullObject(PdfObject):

203 def clone(

204 self,

205 pdf_dest: PdfWriterProtocol,

206 force_duplicate: bool = False,

207 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

208 ) -> "NullObject":

209 """Clone object into pdf_dest."""

210 return cast(

211 "NullObject", self._reference_clone(NullObject(), pdf_dest, force_duplicate)

212 )

213

214 def hash_bin(self) -> int:

215 """

216 Used to detect modified object.

217

218 Returns:

219 Hash considering type and value.

220

221 """

222 return hash((self.__class__,))

223

224 def write_to_stream(

225 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

226 ) -> None:

227 if encryption_key is not None: # deprecated

228 deprecation_no_replacement(

229 "the encryption_key parameter of write_to_stream", "5.0.0"

230 )

231 stream.write(b"null")

232

233 @staticmethod

234 def read_from_stream(stream: StreamType) -> "NullObject":

235 nulltxt = stream.read(4)

236 if nulltxt != b"null":

237 raise PdfReadError("Could not read Null object")

238 return NullObject()

239

240 def __repr__(self) -> str:

241 return "NullObject"

242

243 def __eq__(self, other: object) -> bool:

244 return isinstance(other, NullObject)

245

246 def __hash__(self) -> int:

247 return self.hash_bin()

248

249

250class BooleanObject(PdfObject):

251 def __init__(self, value: Any) -> None:

252 self.value = value

253

254 def clone(

255 self,

256 pdf_dest: PdfWriterProtocol,

257 force_duplicate: bool = False,

258 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

259 ) -> "BooleanObject":

260 """Clone object into pdf_dest."""

261 return cast(

262 "BooleanObject",

263 self._reference_clone(BooleanObject(self.value), pdf_dest, force_duplicate),

264 )

265

266 def hash_bin(self) -> int:

267 """

268 Used to detect modified object.

269

270 Returns:

271 Hash considering type and value.

272

273 """

274 return hash((self.__class__, self.value))

275

276 def __eq__(self, o: object, /) -> bool:

277 if isinstance(o, BooleanObject):

278 return self.value == o.value

279 if isinstance(o, bool):

280 return self.value == o

281 return False

282

283 def __hash__(self) -> int:

284 return self.hash_bin()

285

286 def __repr__(self) -> str:

287 return "True" if self.value else "False"

288

289 def write_to_stream(

290 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

291 ) -> None:

292 if encryption_key is not None: # deprecated

293 deprecation_no_replacement(

294 "the encryption_key parameter of write_to_stream", "5.0.0"

295 )

296 if self.value:

297 stream.write(b"true")

298 else:

299 stream.write(b"false")

300

301 @staticmethod

302 def read_from_stream(stream: StreamType) -> "BooleanObject":

303 word = stream.read(4)

304 if word == b"true":

305 return BooleanObject(True)

306 if word == b"fals":

307 stream.read(1)

308 return BooleanObject(False)

309 raise PdfReadError("Could not read Boolean object")

310

311

312class IndirectObject(PdfObject):

313 def __init__(self, idnum: int, generation: int, pdf: Any) -> None: # PdfReader

314 self.idnum = idnum

315 self.generation = generation

316 self.pdf = pdf

317

318 def __hash__(self) -> int:

319 return hash((self.idnum, self.generation, id(self.pdf)))

320

321 def hash_bin(self) -> int:

322 """

323 Used to detect modified object.

324

325 Returns:

326 Hash considering type and value.

327

328 """

329 return hash((self.__class__, self.idnum, self.generation, id(self.pdf)))

330

331 def replicate(

332 self,

333 pdf_dest: PdfWriterProtocol,

334 ) -> "PdfObject":

335 return IndirectObject(self.idnum, self.generation, pdf_dest)

336

337 def clone(

338 self,

339 pdf_dest: PdfWriterProtocol,

340 force_duplicate: bool = False,

341 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

342 ) -> "IndirectObject":

343 """Clone object into pdf_dest."""

344 if self.pdf == pdf_dest and not force_duplicate:

345 # Already duplicated and no extra duplication required

346 return self

347 if id(self.pdf) not in pdf_dest._id_translated:

348 pdf_dest._id_translated[id(self.pdf)] = {}

349 pdf_dest._id_translated[id(self.pdf)]["PreventGC"] = self.pdf # type: ignore[index]

350

351 if self.idnum in pdf_dest._id_translated[id(self.pdf)]:

352 dup = pdf_dest.get_object(pdf_dest._id_translated[id(self.pdf)][self.idnum])

353 if force_duplicate:

354 assert dup is not None

355 assert dup.indirect_reference is not None

356 idref = dup.indirect_reference

357 return IndirectObject(idref.idnum, idref.generation, idref.pdf)

358 else:

359 obj = self.get_object()

360 # case observed : a pointed object can not be found

361 if obj is None:

362 # this normally

363 obj = NullObject()

364 assert isinstance(self, (IndirectObject,))

365 obj.indirect_reference = self

366 dup = pdf_dest._add_object(

367 obj.clone(pdf_dest, force_duplicate, ignore_fields)

368 )

369 assert dup is not None, "mypy"

370 assert dup.indirect_reference is not None, "mypy"

371 return dup.indirect_reference

372

373 @property

374 def indirect_reference(self) -> "IndirectObject": # type: ignore[override]

375 return self

376

377 def get_object(self) -> Optional["PdfObject"]:

378 return self.pdf.get_object(self)

379

380 def __deepcopy__(self, memo: Any) -> "IndirectObject":

381 return IndirectObject(self.idnum, self.generation, self.pdf)

382

383 def _get_object_with_check(self) -> Optional["PdfObject"]:

384 o = self.get_object()

385 # the check is done here to not slow down get_object()

386 if isinstance(o, IndirectObject):

387 raise PdfStreamError(

388 f"{self.__repr__()} references an IndirectObject {o.__repr__()}"

389 )

390 return o

391

392 def __getattr__(self, name: str) -> Any:

393 # Attribute not found in object: look in pointed object

394 try:

395 return getattr(self._get_object_with_check(), name)

396 except AttributeError:

397 raise AttributeError(

398 f"No attribute {name} found in IndirectObject or pointed object"

399 )

400

401 def __getitem__(self, key: Any) -> Any:

402 # items should be extracted from pointed Object

403 return self._get_object_with_check()[key] # type: ignore

404

405 def __contains__(self, key: Any) -> bool:

406 return key in self._get_object_with_check() # type: ignore

407

408 def __iter__(self) -> Any:

409 return self._get_object_with_check().__iter__() # type: ignore

410

411 def __float__(self) -> str:

412 # in this case we are looking for the pointed data

413 return self.get_object().__float__() # type: ignore

414

415 def __int__(self) -> int:

416 # in this case we are looking for the pointed data

417 return self.get_object().__int__() # type: ignore

418

419 def __str__(self) -> str:

420 # in this case we are looking for the pointed data

421 return self.get_object().__str__()

422

423 def __repr__(self) -> str:

424 return f"IndirectObject({self.idnum!r}, {self.generation!r}, {id(self.pdf)})"

425

426 def __eq__(self, other: object) -> bool:

427 return (

428 other is not None

429 and isinstance(other, IndirectObject)

430 and self.idnum == other.idnum

431 and self.generation == other.generation

432 and self.pdf is other.pdf

433 )

434

435 def __ne__(self, other: object) -> bool:

436 return not self.__eq__(other)

437

438 def write_to_stream(

439 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

440 ) -> None:

441 if encryption_key is not None: # deprecated

442 deprecation_no_replacement(

443 "the encryption_key parameter of write_to_stream", "5.0.0"

444 )

445 stream.write(f"{self.idnum} {self.generation} R".encode())

446

447 @staticmethod

448 def read_from_stream(stream: StreamType, pdf: Any) -> "IndirectObject": # PdfReader

449 idnum = b""

450 while True:

451 tok = stream.read(1)

452 if not tok:

453 raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)

454 if tok.isspace():

455 break

456 idnum += tok

457 generation = b""

458 while True:

459 tok = stream.read(1)

460 if not tok:

461 raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)

462 if tok.isspace():

463 if not generation:

464 continue

465 break

466 generation += tok

467 r = read_non_whitespace(stream)

468 if r != b"R":

469 raise PdfReadError(

470 f"Error reading indirect object reference at byte {hex(stream.tell())}"

471 )

472 return IndirectObject(int(idnum), int(generation), pdf)

473

474

475FLOAT_WRITE_PRECISION = 8 # shall be min 5 digits max, allow user adj

476

477

478class FloatObject(float, PdfObject):

479 def __new__(

480 cls, value: Any = "0.0", context: Optional[Any] = None

481 ) -> "FloatObject":

482 try:

483 value = float(value)

484 return float.__new__(cls, value)

485 except Exception as e:

486 # If this isn't a valid decimal (happens in malformed PDFs)

487 # fallback to 0

488 logger_warning(

489 f"{e} : FloatObject ({value}) invalid; use 0.0 instead", __name__

490 )

491 return float.__new__(cls, 0.0)

492

493 def clone(

494 self,

495 pdf_dest: Any,

496 force_duplicate: bool = False,

497 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

498 ) -> "FloatObject":

499 """Clone object into pdf_dest."""

500 return cast(

501 "FloatObject",

502 self._reference_clone(FloatObject(self), pdf_dest, force_duplicate),

503 )

504

505 def hash_bin(self) -> int:

506 """

507 Used to detect modified object.

508

509 Returns:

510 Hash considering type and value.

511

512 """

513 return hash((self.__class__, self.as_numeric))

514

515 def myrepr(self) -> str:

516 if self == 0:

517 return "0.0"

518 nb = FLOAT_WRITE_PRECISION - int(log10(abs(self)))

519 return f"{self:.{max(1, nb)}f}".rstrip("0").rstrip(".")

520

521 def __repr__(self) -> str:

522 return self.myrepr() # repr(float(self))

523

524 def as_numeric(self) -> float:

525 return float(self)

526

527 def write_to_stream(

528 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

529 ) -> None:

530 if encryption_key is not None: # deprecated

531 deprecation_no_replacement(

532 "the encryption_key parameter of write_to_stream", "5.0.0"

533 )

534 stream.write(self.myrepr().encode("utf8"))

535

536

537class NumberObject(int, PdfObject):

538 NumberPattern = re.compile(b"[^+-.0-9]")

539

540 def __new__(cls, value: Any) -> "NumberObject":

541 try:

542 return int.__new__(cls, int(value))

543 except ValueError:

544 logger_warning(f"NumberObject({value}) invalid; use 0 instead", __name__)

545 return int.__new__(cls, 0)

546

547 def clone(

548 self,

549 pdf_dest: Any,

550 force_duplicate: bool = False,

551 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

552 ) -> "NumberObject":

553 """Clone object into pdf_dest."""

554 return cast(

555 "NumberObject",

556 self._reference_clone(NumberObject(self), pdf_dest, force_duplicate),

557 )

558

559 def hash_bin(self) -> int:

560 """

561 Used to detect modified object.

562

563 Returns:

564 Hash considering type and value.

565

566 """

567 return hash((self.__class__, self.as_numeric()))

568

569 def as_numeric(self) -> int:

570 return int(repr(self).encode("utf8"))

571

572 def write_to_stream(

573 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

574 ) -> None:

575 if encryption_key is not None: # deprecated

576 deprecation_no_replacement(

577 "the encryption_key parameter of write_to_stream", "5.0.0"

578 )

579 stream.write(repr(self).encode("utf8"))

580

581 @staticmethod

582 def read_from_stream(stream: StreamType) -> Union["NumberObject", "FloatObject"]:

583 num = read_until_regex(stream, NumberObject.NumberPattern)

584 if b"." in num:

585 return FloatObject(num)

586 return NumberObject(num)

587

588

589class ByteStringObject(bytes, PdfObject):

590 """

591 Represents a string object where the text encoding could not be determined.

592

593 This occurs quite often, as the PDF spec doesn't provide an alternate way to

594 represent strings -- for example, the encryption data stored in files (like

595 /O) is clearly not text, but is still stored in a "String" object.

596 """

597

598 def clone(

599 self,

600 pdf_dest: Any,

601 force_duplicate: bool = False,

602 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

603 ) -> "ByteStringObject":

604 """Clone object into pdf_dest."""

605 return cast(

606 "ByteStringObject",

607 self._reference_clone(

608 ByteStringObject(bytes(self)), pdf_dest, force_duplicate

609 ),

610 )

611

612 def hash_bin(self) -> int:

613 """

614 Used to detect modified object.

615

616 Returns:

617 Hash considering type and value.

618

619 """

620 return hash((self.__class__, bytes(self)))

621

622 @property

623 def original_bytes(self) -> bytes:

624 """For compatibility with TextStringObject.original_bytes."""

625 return self

626

627 def write_to_stream(

628 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

629 ) -> None:

630 if encryption_key is not None: # deprecated

631 deprecation_no_replacement(

632 "the encryption_key parameter of write_to_stream", "5.0.0"

633 )

634 stream.write(b"<")

635 stream.write(binascii.hexlify(self))

636 stream.write(b">")

637

638 def __str__(self) -> str:

639 charset_to_try = ["utf-16", *list(NameObject.CHARSETS)]

640 for enc in charset_to_try:

641 try:

642 return self.decode(enc)

643 except UnicodeDecodeError:

644 pass

645 raise PdfReadError("Cannot decode ByteStringObject.")

646

647

648class TextStringObject(str, PdfObject): # noqa: SLOT000

649 """

650 A string object that has been decoded into a real unicode string.

651

652 If read from a PDF document, this string appeared to match the

653 PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding

654 to occur.

655 """

656

657 autodetect_pdfdocencoding: bool

658 autodetect_utf16: bool

659 utf16_bom: bytes

660 _original_bytes: Optional[bytes] = None

661

662 def __new__(cls, value: Any) -> "TextStringObject":

663 org = None

664 if isinstance(value, bytes):

665 org = value

666 value = value.decode("charmap")

667 o = str.__new__(cls, value)

668 o._original_bytes = org

669 o.autodetect_utf16 = False

670 o.autodetect_pdfdocencoding = False

671 o.utf16_bom = b""

672 if o.startswith(("\xfe\xff", "\xff\xfe")):

673 assert org is not None, "mypy"

674 try:

675 o = str.__new__(cls, org.decode("utf-16"))

676 except UnicodeDecodeError as exc:

677 logger_warning(

678 f"{exc!s}\ninitial string:{exc.object!r}",

679 __name__,

680 )

681 o = str.__new__(cls, exc.object[: exc.start].decode("utf-16"))

682 o._original_bytes = org

683 o.autodetect_utf16 = True

684 o.utf16_bom = org[:2]

685 else:

686 try:

687 encode_pdfdocencoding(o)

688 o.autodetect_pdfdocencoding = True

689 except UnicodeEncodeError:

690 o.autodetect_utf16 = True

691 o.utf16_bom = codecs.BOM_UTF16_BE

692 return o

693

694 def clone(

695 self,

696 pdf_dest: Any,

697 force_duplicate: bool = False,

698 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

699 ) -> "TextStringObject":

700 """Clone object into pdf_dest."""

701 obj = TextStringObject(self)

702 obj._original_bytes = self._original_bytes

703 obj.autodetect_pdfdocencoding = self.autodetect_pdfdocencoding

704 obj.autodetect_utf16 = self.autodetect_utf16

705 obj.utf16_bom = self.utf16_bom

706 return cast(

707 "TextStringObject", self._reference_clone(obj, pdf_dest, force_duplicate)

708 )

709

710 def hash_bin(self) -> int:

711 """

712 Used to detect modified object.

713

714 Returns:

715 Hash considering type and value.

716

717 """

718 return hash((self.__class__, self.original_bytes))

719

720 @property

721 def original_bytes(self) -> bytes:

722 """

723 It is occasionally possible that a text string object gets created where

724 a byte string object was expected due to the autodetection mechanism --

725 if that occurs, this "original_bytes" property can be used to

726 back-calculate what the original encoded bytes were.

727 """

728 if self._original_bytes is not None:

729 return self._original_bytes

730 return self.get_original_bytes()

731

732 def get_original_bytes(self) -> bytes:

733 # We're a text string object, but the library is trying to get our raw

734 # bytes. This can happen if we auto-detected this string as text, but

735 # we were wrong. It's pretty common. Return the original bytes that

736 # would have been used to create this object, based upon the autodetect

737 # method.

738 if self.autodetect_utf16:

739 if self.utf16_bom == codecs.BOM_UTF16_LE:

740 return codecs.BOM_UTF16_LE + self.encode("utf-16le")

741 if self.utf16_bom == codecs.BOM_UTF16_BE:

742 return codecs.BOM_UTF16_BE + self.encode("utf-16be")

743 return self.encode("utf-16be")

744 if self.autodetect_pdfdocencoding:

745 return encode_pdfdocencoding(self)

746 raise Exception("no information about original bytes") # pragma: no cover

747

748 def get_encoded_bytes(self) -> bytes:

749 # Try to write the string out as a PDFDocEncoding encoded string. It's

750 # nicer to look at in the PDF file. Sadly, we take a performance hit

751 # here for trying...

752 try:

753 if self._original_bytes is not None:

754 return self._original_bytes

755 if self.autodetect_utf16:

756 raise UnicodeEncodeError("", "forced", -1, -1, "")

757 bytearr = encode_pdfdocencoding(self)

758 except UnicodeEncodeError:

759 if self.utf16_bom == codecs.BOM_UTF16_LE:

760 bytearr = codecs.BOM_UTF16_LE + self.encode("utf-16le")

761 elif self.utf16_bom == codecs.BOM_UTF16_BE:

762 bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")

763 else:

764 bytearr = self.encode("utf-16be")

765 return bytearr

766

767 def write_to_stream(

768 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

769 ) -> None:

770 if encryption_key is not None: # deprecated

771 deprecation_no_replacement(

772 "the encryption_key parameter of write_to_stream", "5.0.0"

773 )

774 bytearr = self.get_encoded_bytes()

775 stream.write(b"(")

776 for c_ in iter_unpack("c", bytearr):

777 c = cast(bytes, c_[0])

778 if not c.isalnum() and c != b" ":

779 # This:

780 # stream.write(rf"\{c:0>3o}".encode())

781 # gives

782 # https://github.com/davidhalter/parso/issues/207

783 stream.write(b"\\%03o" % ord(c))

784 else:

785 stream.write(c)

786 stream.write(b")")

787

788

789class NameObject(str, PdfObject): # noqa: SLOT000

790 delimiter_pattern = re.compile(rb"\s+|[\(\)<>\[\]{}/%]")

791 prefix = b"/"

792 renumber_table: ClassVar[dict[str, bytes]] = {

793 **{chr(i): f"#{i:02X}".encode() for i in b"#()<>[]{}/%"},

794 **{chr(i): f"#{i:02X}".encode() for i in range(33)},

795 }

796

797 def clone(

798 self,

799 pdf_dest: Any,

800 force_duplicate: bool = False,

801 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

802 ) -> "NameObject":

803 """Clone object into pdf_dest."""

804 return cast(

805 "NameObject",

806 self._reference_clone(NameObject(self), pdf_dest, force_duplicate),

807 )

808

809 def hash_bin(self) -> int:

810 """

811 Used to detect modified object.

812

813 Returns:

814 Hash considering type and value.

815

816 """

817 return hash((self.__class__, self))

818

819 def write_to_stream(

820 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

821 ) -> None:

822 if encryption_key is not None: # deprecated

823 deprecation_no_replacement(

824 "the encryption_key parameter of write_to_stream", "5.0.0"

825 )

826 stream.write(self.renumber())

827

828 def renumber(self) -> bytes:

829 out = self[0].encode("utf-8")

830 if out != b"/":

831 deprecation_no_replacement(

832 f"Incorrect first char in NameObject, should start with '/': ({self})",

833 "5.0.0",

834 )

835 for c in self[1:]:

836 if c > "~":

837 for x in c.encode("utf-8"):

838 out += f"#{x:02X}".encode()

839 else:

840 try:

841 out += self.renumber_table[c]

842 except KeyError:

843 out += c.encode("utf-8")

844 return out

845

846 def _sanitize(self) -> "NameObject":

847 """

848 Sanitize the NameObject's name to be a valid PDF name part

849 (alphanumeric, underscore, hyphen). The _sanitize method replaces

850 spaces and any non-alphanumeric/non-underscore/non-hyphen with

851 underscores.

852

853 Returns:

854 NameObject with sanitized name.

855 """

856 name = str(self).removeprefix("/")

857 name = re.sub(r"\ ", "_", name)

858 name = re.sub(r"[^a-zA-Z0-9_-]", "_", name)

859 return NameObject("/" + name)

860

861 @classproperty

862 def surfix(cls) -> bytes: # noqa: N805

863 deprecation_with_replacement("surfix", "prefix", "5.0.0")

864 return b"/"

865

866 @staticmethod

867 def unnumber(sin: bytes) -> bytes:

868 i = sin.find(b"#", 0)

869 while i >= 0:

870 try:

871 sin = sin[:i] + unhexlify(sin[i + 1 : i + 3]) + sin[i + 3 :]

872 i = sin.find(b"#", i + 1)

873 except ValueError:

874 # if the 2 characters after # can not be converted to hex

875 # we change nothing and carry on

876 i = i + 1

877 return sin

878

879 CHARSETS = ("utf-8", "gbk", "latin1")

880

881 @staticmethod

882 def read_from_stream(stream: StreamType, pdf: Any) -> "NameObject": # PdfReader

883 name = stream.read(1)

884 if name != NameObject.prefix:

885 raise PdfReadError("Name read error")

886 name += read_until_regex(stream, NameObject.delimiter_pattern)

887 try:

888 # Name objects should represent irregular characters

889 # with a '#' followed by the symbol's hex number

890 name = NameObject.unnumber(name)

891 for enc in NameObject.CHARSETS:

892 try:

893 ret = name.decode(enc)

894 return NameObject(ret)

895 except Exception:

896 pass

897 raise UnicodeDecodeError("", name, 0, 0, "Code Not Found")

898 except (UnicodeEncodeError, UnicodeDecodeError) as e:

899 if not pdf.strict:

900 logger_warning(

901 f"Illegal character in NameObject ({name!r}), "

902 "you may need to adjust NameObject.CHARSETS",

903 __name__,

904 )

905 return NameObject(name.decode("charmap"))

906 raise PdfReadError(

907 f"Illegal character in NameObject ({name!r}). "

908 "You may need to adjust NameObject.CHARSETS.",

909 ) from e

910

911

912def encode_pdfdocencoding(unicode_string: str) -> bytes:

913 try:

914 return bytes([_pdfdoc_encoding_rev[k] for k in unicode_string])

915 except KeyError:

916 raise UnicodeEncodeError(

917 "pdfdocencoding",

918 unicode_string,

919 -1,

920 -1,

921 "does not exist in translation table",

922 )

923

924

925def is_null_or_none(x: Any) -> TypeGuard[Union[None, NullObject, IndirectObject]]:

926 """

927 Returns:

928 True if x is None or NullObject.

929

930 """

931 return x is None or (

932 isinstance(x, PdfObject)

933 and (x.get_object() is None or isinstance(x.get_object(), NullObject))

934 )

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/generic/_base.py: 53%

439 statements