Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/generic/

4# Redistribution and use in source and binary forms, with or without

5# modification, are permitted provided that the following conditions are

6# met:

8# * Redistributions of source code must retain the above copyright notice,

9# this list of conditions and the following disclaimer.

10# * Redistributions in binary form must reproduce the above copyright notice,

11# this list of conditions and the following disclaimer in the documentation

12# and/or other materials provided with the distribution.

13# * The name of the author may not be used to endorse or promote products

14# derived from this software without specific prior written permission.

15#

16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

17# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

18# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

19# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

20# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

21# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

22# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

23# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

24# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

25# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

26# POSSIBILITY OF SUCH DAMAGE.

27import binascii

28import codecs

29import hashlib

30import re

31import sys

32from binascii import unhexlify

33from math import log10

34from struct import iter_unpack

35from typing import Any, Callable, ClassVar, Dict, Optional, Sequence, Union, cast

37if sys.version_info[:2] >= (3, 10):

38 from typing import TypeGuard

39else:

40 from typing_extensions import TypeGuard # PEP 647

42from .._codecs import _pdfdoc_encoding_rev

43from .._protocols import PdfObjectProtocol, PdfWriterProtocol

44from .._utils import (

45 StreamType,

46 classproperty,

47 deprecate_no_replacement,

48 deprecate_with_replacement,

49 logger_warning,

50 read_non_whitespace,

51 read_until_regex,

52)

53from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError

55__author__ = "Mathieu Fenniak"

56__author_email__ = "biziqe@mathieu.fenniak.net"

59class PdfObject(PdfObjectProtocol):

60 # function for calculating a hash value

61 hash_func: Callable[..., "hashlib._Hash"] = hashlib.sha1

62 indirect_reference: Optional["IndirectObject"]

64 def hash_bin(self) -> int:

65 """

66 Used to detect modified object.

68 Returns:

69 Hash considering type and value.

71 """

72 raise NotImplementedError(

73 f"{self.__class__.__name__} does not implement .hash_bin() so far"

74 )

76 def hash_value_data(self) -> bytes:

77 return f"{self}".encode()

79 def hash_value(self) -> bytes:

80 return (

81 f"{self.__class__.__name__}:"

82 f"{self.hash_func(self.hash_value_data()).hexdigest()}"

83 ).encode()

85 def replicate(

86 self,

87 pdf_dest: PdfWriterProtocol,

88 ) -> "PdfObject":

89 """

90 Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter)

91 without ensuring links. This is used in clone_document_from_root with incremental = True.

93 Args:

94 pdf_dest: Target to clone to.

96 Returns:

97 The cloned PdfObject

99 """

100 return self.clone(pdf_dest)

101

102 def clone(

103 self,

104 pdf_dest: PdfWriterProtocol,

105 force_duplicate: bool = False,

106 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

107 ) -> "PdfObject":

108 """

109 Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter).

110

111 By default, this method will call ``_reference_clone`` (see ``_reference``).

112

113

114 Args:

115 pdf_dest: Target to clone to.

116 force_duplicate: By default, if the object has already been cloned and referenced,

117 the copy will be returned; when ``True``, a new copy will be created.

118 (Default value = ``False``)

119 ignore_fields: List/tuple of field names (for dictionaries) that will be ignored

120 during cloning (applies to children duplication as well). If fields are to be

121 considered for a limited number of levels, you have to add it as integer, for

122 example ``[1,"/B","/TOTO"]`` means that ``"/B"`` will be ignored at the first

123 level only but ``"/TOTO"`` on all levels.

124

125 Returns:

126 The cloned PdfObject

127

128 """

129 raise NotImplementedError(

130 f"{self.__class__.__name__} does not implement .clone so far"

131 )

132

133 def _reference_clone(

134 self, clone: Any, pdf_dest: PdfWriterProtocol, force_duplicate: bool = False

135 ) -> PdfObjectProtocol:

136 """

137 Reference the object within the _objects of pdf_dest only if

138 indirect_reference attribute exists (which means the objects was

139 already identified in xref/xobjstm) if object has been already

140 referenced do nothing.

141

142 Args:

143 clone:

144 pdf_dest:

145

146 Returns:

147 The clone

148

149 """

150 try:

151 if not force_duplicate and clone.indirect_reference.pdf == pdf_dest:

152 return clone

153 except Exception:

154 pass

155 # if hasattr(clone, "indirect_reference"):

156 try:

157 ind = self.indirect_reference

158 except AttributeError:

159 return clone

160 if (

161 pdf_dest.incremental

162 and ind is not None

163 and ind.pdf == pdf_dest._reader

164 and ind.idnum <= len(pdf_dest._objects)

165 ):

166 i = ind.idnum

167 else:

168 i = len(pdf_dest._objects) + 1

169 if ind is not None:

170 if id(ind.pdf) not in pdf_dest._id_translated:

171 pdf_dest._id_translated[id(ind.pdf)] = {}

172 pdf_dest._id_translated[id(ind.pdf)]["PreventGC"] = ind.pdf # type: ignore

173 if (

174 not force_duplicate

175 and ind.idnum in pdf_dest._id_translated[id(ind.pdf)]

176 ):

177 obj = pdf_dest.get_object(

178 pdf_dest._id_translated[id(ind.pdf)][ind.idnum]

179 )

180 assert obj is not None

181 return obj

182 pdf_dest._id_translated[id(ind.pdf)][ind.idnum] = i

183 try:

184 pdf_dest._objects[i - 1] = clone

185 except IndexError:

186 pdf_dest._objects.append(clone)

187 i = len(pdf_dest._objects)

188 clone.indirect_reference = IndirectObject(i, 0, pdf_dest)

189 return clone

190

191 def get_object(self) -> Optional["PdfObject"]:

192 """Resolve indirect references."""

193 return self

194

195 def write_to_stream(

196 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

197 ) -> None:

198 raise NotImplementedError

199

200

201class NullObject(PdfObject):

202 def clone(

203 self,

204 pdf_dest: PdfWriterProtocol,

205 force_duplicate: bool = False,

206 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

207 ) -> "NullObject":

208 """Clone object into pdf_dest."""

209 return cast(

210 "NullObject", self._reference_clone(NullObject(), pdf_dest, force_duplicate)

211 )

212

213 def hash_bin(self) -> int:

214 """

215 Used to detect modified object.

216

217 Returns:

218 Hash considering type and value.

219

220 """

221 return hash((self.__class__,))

222

223 def write_to_stream(

224 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

225 ) -> None:

226 if encryption_key is not None: # deprecated

227 deprecate_no_replacement(

228 "the encryption_key parameter of write_to_stream", "5.0.0"

229 )

230 stream.write(b"null")

231

232 @staticmethod

233 def read_from_stream(stream: StreamType) -> "NullObject":

234 nulltxt = stream.read(4)

235 if nulltxt != b"null":

236 raise PdfReadError("Could not read Null object")

237 return NullObject()

238

239 def __repr__(self) -> str:

240 return "NullObject"

241

242 def __eq__(self, other: object) -> bool:

243 return isinstance(other, NullObject)

244

245 def __hash__(self) -> int:

246 return self.hash_bin()

247

248

249class BooleanObject(PdfObject):

250 def __init__(self, value: Any) -> None:

251 self.value = value

252

253 def clone(

254 self,

255 pdf_dest: PdfWriterProtocol,

256 force_duplicate: bool = False,

257 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

258 ) -> "BooleanObject":

259 """Clone object into pdf_dest."""

260 return cast(

261 "BooleanObject",

262 self._reference_clone(BooleanObject(self.value), pdf_dest, force_duplicate),

263 )

264

265 def hash_bin(self) -> int:

266 """

267 Used to detect modified object.

268

269 Returns:

270 Hash considering type and value.

271

272 """

273 return hash((self.__class__, self.value))

274

275 def __eq__(self, o: object, /) -> bool:

276 if isinstance(o, BooleanObject):

277 return self.value == o.value

278 if isinstance(o, bool):

279 return self.value == o

280 return False

281

282 def __hash__(self) -> int:

283 return self.hash_bin()

284

285 def __repr__(self) -> str:

286 return "True" if self.value else "False"

287

288 def write_to_stream(

289 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

290 ) -> None:

291 if encryption_key is not None: # deprecated

292 deprecate_no_replacement(

293 "the encryption_key parameter of write_to_stream", "5.0.0"

294 )

295 if self.value:

296 stream.write(b"true")

297 else:

298 stream.write(b"false")

299

300 @staticmethod

301 def read_from_stream(stream: StreamType) -> "BooleanObject":

302 word = stream.read(4)

303 if word == b"true":

304 return BooleanObject(True)

305 if word == b"fals":

306 stream.read(1)

307 return BooleanObject(False)

308 raise PdfReadError("Could not read Boolean object")

309

310

311class IndirectObject(PdfObject):

312 def __init__(self, idnum: int, generation: int, pdf: Any) -> None: # PdfReader

313 self.idnum = idnum

314 self.generation = generation

315 self.pdf = pdf

316

317 def __hash__(self) -> int:

318 return hash((self.idnum, self.generation, id(self.pdf)))

319

320 def hash_bin(self) -> int:

321 """

322 Used to detect modified object.

323

324 Returns:

325 Hash considering type and value.

326

327 """

328 return hash((self.__class__, self.idnum, self.generation, id(self.pdf)))

329

330 def replicate(

331 self,

332 pdf_dest: PdfWriterProtocol,

333 ) -> "PdfObject":

334 return IndirectObject(self.idnum, self.generation, pdf_dest)

335

336 def clone(

337 self,

338 pdf_dest: PdfWriterProtocol,

339 force_duplicate: bool = False,

340 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

341 ) -> "IndirectObject":

342 """Clone object into pdf_dest."""

343 if self.pdf == pdf_dest and not force_duplicate:

344 # Already duplicated and no extra duplication required

345 return self

346 if id(self.pdf) not in pdf_dest._id_translated:

347 pdf_dest._id_translated[id(self.pdf)] = {}

348

349 if self.idnum in pdf_dest._id_translated[id(self.pdf)]:

350 dup = pdf_dest.get_object(pdf_dest._id_translated[id(self.pdf)][self.idnum])

351 if force_duplicate:

352 assert dup is not None

353 assert dup.indirect_reference is not None

354 idref = dup.indirect_reference

355 return IndirectObject(idref.idnum, idref.generation, idref.pdf)

356 else:

357 obj = self.get_object()

358 # case observed : a pointed object can not be found

359 if obj is None:

360 # this normally

361 obj = NullObject()

362 assert isinstance(self, (IndirectObject,))

363 obj.indirect_reference = self

364 dup = pdf_dest._add_object(

365 obj.clone(pdf_dest, force_duplicate, ignore_fields)

366 )

367 assert dup is not None, "mypy"

368 assert dup.indirect_reference is not None, "mypy"

369 return dup.indirect_reference

370

371 @property

372 def indirect_reference(self) -> "IndirectObject": # type: ignore[override]

373 return self

374

375 def get_object(self) -> Optional["PdfObject"]:

376 return self.pdf.get_object(self)

377

378 def __deepcopy__(self, memo: Any) -> "IndirectObject":

379 return IndirectObject(self.idnum, self.generation, self.pdf)

380

381 def _get_object_with_check(self) -> Optional["PdfObject"]:

382 o = self.get_object()

383 # the check is done here to not slow down get_object()

384 if isinstance(o, IndirectObject):

385 raise PdfStreamError(

386 f"{self.__repr__()} references an IndirectObject {o.__repr__()}"

387 )

388 return o

389

390 def __getattr__(self, name: str) -> Any:

391 # Attribute not found in object: look in pointed object

392 try:

393 return getattr(self._get_object_with_check(), name)

394 except AttributeError:

395 raise AttributeError(

396 f"No attribute {name} found in IndirectObject or pointed object"

397 )

398

399 def __getitem__(self, key: Any) -> Any:

400 # items should be extracted from pointed Object

401 return self._get_object_with_check()[key] # type: ignore

402

403 def __contains__(self, key: Any) -> bool:

404 return key in self._get_object_with_check() # type: ignore

405

406 def __iter__(self) -> Any:

407 return self._get_object_with_check().__iter__() # type: ignore

408

409 def __float__(self) -> str:

410 # in this case we are looking for the pointed data

411 return self.get_object().__float__() # type: ignore

412

413 def __int__(self) -> int:

414 # in this case we are looking for the pointed data

415 return self.get_object().__int__() # type: ignore

416

417 def __str__(self) -> str:

418 # in this case we are looking for the pointed data

419 return self.get_object().__str__()

420

421 def __repr__(self) -> str:

422 return f"IndirectObject({self.idnum!r}, {self.generation!r}, {id(self.pdf)})"

423

424 def __eq__(self, other: object) -> bool:

425 return (

426 other is not None

427 and isinstance(other, IndirectObject)

428 and self.idnum == other.idnum

429 and self.generation == other.generation

430 and self.pdf is other.pdf

431 )

432

433 def __ne__(self, other: object) -> bool:

434 return not self.__eq__(other)

435

436 def write_to_stream(

437 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

438 ) -> None:

439 if encryption_key is not None: # deprecated

440 deprecate_no_replacement(

441 "the encryption_key parameter of write_to_stream", "5.0.0"

442 )

443 stream.write(f"{self.idnum} {self.generation} R".encode())

444

445 @staticmethod

446 def read_from_stream(stream: StreamType, pdf: Any) -> "IndirectObject": # PdfReader

447 idnum = b""

448 while True:

449 tok = stream.read(1)

450 if not tok:

451 raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)

452 if tok.isspace():

453 break

454 idnum += tok

455 generation = b""

456 while True:

457 tok = stream.read(1)

458 if not tok:

459 raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)

460 if tok.isspace():

461 if not generation:

462 continue

463 break

464 generation += tok

465 r = read_non_whitespace(stream)

466 if r != b"R":

467 raise PdfReadError(

468 f"Error reading indirect object reference at byte {hex(stream.tell())}"

469 )

470 return IndirectObject(int(idnum), int(generation), pdf)

471

472

473FLOAT_WRITE_PRECISION = 8 # shall be min 5 digits max, allow user adj

474

475

476class FloatObject(float, PdfObject):

477 def __new__(

478 cls, value: Any = "0.0", context: Optional[Any] = None

479 ) -> "FloatObject":

480 try:

481 value = float(value)

482 return float.__new__(cls, value)

483 except Exception as e:

484 # If this isn't a valid decimal (happens in malformed PDFs)

485 # fallback to 0

486 logger_warning(

487 f"{e} : FloatObject ({value}) invalid; use 0.0 instead", __name__

488 )

489 return float.__new__(cls, 0.0)

490

491 def clone(

492 self,

493 pdf_dest: Any,

494 force_duplicate: bool = False,

495 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

496 ) -> "FloatObject":

497 """Clone object into pdf_dest."""

498 return cast(

499 "FloatObject",

500 self._reference_clone(FloatObject(self), pdf_dest, force_duplicate),

501 )

502

503 def hash_bin(self) -> int:

504 """

505 Used to detect modified object.

506

507 Returns:

508 Hash considering type and value.

509

510 """

511 return hash((self.__class__, self.as_numeric))

512

513 def myrepr(self) -> str:

514 if self == 0:

515 return "0.0"

516 nb = FLOAT_WRITE_PRECISION - int(log10(abs(self)))

517 return f"{self:.{max(1, nb)}f}".rstrip("0").rstrip(".")

518

519 def __repr__(self) -> str:

520 return self.myrepr() # repr(float(self))

521

522 def as_numeric(self) -> float:

523 return float(self)

524

525 def write_to_stream(

526 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

527 ) -> None:

528 if encryption_key is not None: # deprecated

529 deprecate_no_replacement(

530 "the encryption_key parameter of write_to_stream", "5.0.0"

531 )

532 stream.write(self.myrepr().encode("utf8"))

533

534

535class NumberObject(int, PdfObject):

536 NumberPattern = re.compile(b"[^+-.0-9]")

537

538 def __new__(cls, value: Any) -> "NumberObject":

539 try:

540 return int.__new__(cls, int(value))

541 except ValueError:

542 logger_warning(f"NumberObject({value}) invalid; use 0 instead", __name__)

543 return int.__new__(cls, 0)

544

545 def clone(

546 self,

547 pdf_dest: Any,

548 force_duplicate: bool = False,

549 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

550 ) -> "NumberObject":

551 """Clone object into pdf_dest."""

552 return cast(

553 "NumberObject",

554 self._reference_clone(NumberObject(self), pdf_dest, force_duplicate),

555 )

556

557 def hash_bin(self) -> int:

558 """

559 Used to detect modified object.

560

561 Returns:

562 Hash considering type and value.

563

564 """

565 return hash((self.__class__, self.as_numeric()))

566

567 def as_numeric(self) -> int:

568 return int(repr(self).encode("utf8"))

569

570 def write_to_stream(

571 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

572 ) -> None:

573 if encryption_key is not None: # deprecated

574 deprecate_no_replacement(

575 "the encryption_key parameter of write_to_stream", "5.0.0"

576 )

577 stream.write(repr(self).encode("utf8"))

578

579 @staticmethod

580 def read_from_stream(stream: StreamType) -> Union["NumberObject", "FloatObject"]:

581 num = read_until_regex(stream, NumberObject.NumberPattern)

582 if b"." in num:

583 return FloatObject(num)

584 return NumberObject(num)

585

586

587class ByteStringObject(bytes, PdfObject):

588 """

589 Represents a string object where the text encoding could not be determined.

590

591 This occurs quite often, as the PDF spec doesn't provide an alternate way to

592 represent strings -- for example, the encryption data stored in files (like

593 /O) is clearly not text, but is still stored in a "String" object.

594 """

595

596 def clone(

597 self,

598 pdf_dest: Any,

599 force_duplicate: bool = False,

600 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

601 ) -> "ByteStringObject":

602 """Clone object into pdf_dest."""

603 return cast(

604 "ByteStringObject",

605 self._reference_clone(

606 ByteStringObject(bytes(self)), pdf_dest, force_duplicate

607 ),

608 )

609

610 def hash_bin(self) -> int:

611 """

612 Used to detect modified object.

613

614 Returns:

615 Hash considering type and value.

616

617 """

618 return hash((self.__class__, bytes(self)))

619

620 @property

621 def original_bytes(self) -> bytes:

622 """For compatibility with TextStringObject.original_bytes."""

623 return self

624

625 def write_to_stream(

626 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

627 ) -> None:

628 if encryption_key is not None: # deprecated

629 deprecate_no_replacement(

630 "the encryption_key parameter of write_to_stream", "5.0.0"

631 )

632 stream.write(b"<")

633 stream.write(binascii.hexlify(self))

634 stream.write(b">")

635

636 def __str__(self) -> str:

637 charset_to_try = ["utf-16", *list(NameObject.CHARSETS)]

638 for enc in charset_to_try:

639 try:

640 return self.decode(enc)

641 except UnicodeDecodeError:

642 pass

643 raise PdfReadError("Cannot decode ByteStringObject.")

644

645

646class TextStringObject(str, PdfObject): # noqa: SLOT000

647 """

648 A string object that has been decoded into a real unicode string.

649

650 If read from a PDF document, this string appeared to match the

651 PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding

652 to occur.

653 """

654

655 autodetect_pdfdocencoding: bool

656 autodetect_utf16: bool

657 utf16_bom: bytes

658 _original_bytes: Optional[bytes] = None

659

660 def __new__(cls, value: Any) -> "TextStringObject":

661 org = None

662 if isinstance(value, bytes):

663 org = value

664 value = value.decode("charmap")

665 o = str.__new__(cls, value)

666 o._original_bytes = org

667 o.autodetect_utf16 = False

668 o.autodetect_pdfdocencoding = False

669 o.utf16_bom = b""

670 if o.startswith(("\xfe\xff", "\xff\xfe")):

671 assert org is not None, "mypy"

672 try:

673 o = str.__new__(cls, org.decode("utf-16"))

674 except UnicodeDecodeError as exc:

675 logger_warning(

676 f"{exc!s}\ninitial string:{exc.object!r}",

677 __name__,

678 )

679 o = str.__new__(cls, exc.object[: exc.start].decode("utf-16"))

680 o._original_bytes = org

681 o.autodetect_utf16 = True

682 o.utf16_bom = org[:2]

683 else:

684 try:

685 encode_pdfdocencoding(o)

686 o.autodetect_pdfdocencoding = True

687 except UnicodeEncodeError:

688 o.autodetect_utf16 = True

689 o.utf16_bom = codecs.BOM_UTF16_BE

690 return o

691

692 def clone(

693 self,

694 pdf_dest: Any,

695 force_duplicate: bool = False,

696 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

697 ) -> "TextStringObject":

698 """Clone object into pdf_dest."""

699 obj = TextStringObject(self)

700 obj._original_bytes = self._original_bytes

701 obj.autodetect_pdfdocencoding = self.autodetect_pdfdocencoding

702 obj.autodetect_utf16 = self.autodetect_utf16

703 obj.utf16_bom = self.utf16_bom

704 return cast(

705 "TextStringObject", self._reference_clone(obj, pdf_dest, force_duplicate)

706 )

707

708 def hash_bin(self) -> int:

709 """

710 Used to detect modified object.

711

712 Returns:

713 Hash considering type and value.

714

715 """

716 return hash((self.__class__, self.original_bytes))

717

718 @property

719 def original_bytes(self) -> bytes:

720 """

721 It is occasionally possible that a text string object gets created where

722 a byte string object was expected due to the autodetection mechanism --

723 if that occurs, this "original_bytes" property can be used to

724 back-calculate what the original encoded bytes were.

725 """

726 if self._original_bytes is not None:

727 return self._original_bytes

728 return self.get_original_bytes()

729

730 def get_original_bytes(self) -> bytes:

731 # We're a text string object, but the library is trying to get our raw

732 # bytes. This can happen if we auto-detected this string as text, but

733 # we were wrong. It's pretty common. Return the original bytes that

734 # would have been used to create this object, based upon the autodetect

735 # method.

736 if self.autodetect_utf16:

737 if self.utf16_bom == codecs.BOM_UTF16_LE:

738 return codecs.BOM_UTF16_LE + self.encode("utf-16le")

739 if self.utf16_bom == codecs.BOM_UTF16_BE:

740 return codecs.BOM_UTF16_BE + self.encode("utf-16be")

741 return self.encode("utf-16be")

742 if self.autodetect_pdfdocencoding:

743 return encode_pdfdocencoding(self)

744 raise Exception("no information about original bytes") # pragma: no cover

745

746 def get_encoded_bytes(self) -> bytes:

747 # Try to write the string out as a PDFDocEncoding encoded string. It's

748 # nicer to look at in the PDF file. Sadly, we take a performance hit

749 # here for trying...

750 try:

751 if self._original_bytes is not None:

752 return self._original_bytes

753 if self.autodetect_utf16:

754 raise UnicodeEncodeError("", "forced", -1, -1, "")

755 bytearr = encode_pdfdocencoding(self)

756 except UnicodeEncodeError:

757 if self.utf16_bom == codecs.BOM_UTF16_LE:

758 bytearr = codecs.BOM_UTF16_LE + self.encode("utf-16le")

759 elif self.utf16_bom == codecs.BOM_UTF16_BE:

760 bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")

761 else:

762 bytearr = self.encode("utf-16be")

763 return bytearr

764

765 def write_to_stream(

766 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

767 ) -> None:

768 if encryption_key is not None: # deprecated

769 deprecate_no_replacement(

770 "the encryption_key parameter of write_to_stream", "5.0.0"

771 )

772 bytearr = self.get_encoded_bytes()

773 stream.write(b"(")

774 for c_ in iter_unpack("c", bytearr):

775 c = cast(bytes, c_[0])

776 if not c.isalnum() and c != b" ":

777 # This:

778 # stream.write(rf"\{c:0>3o}".encode())

779 # gives

780 # https://github.com/davidhalter/parso/issues/207

781 stream.write(b"\\%03o" % ord(c))

782 else:

783 stream.write(c)

784 stream.write(b")")

785

786

787class NameObject(str, PdfObject): # noqa: SLOT000

788 delimiter_pattern = re.compile(rb"\s+|[\(\)<>\[\]{}/%]")

789 prefix = b"/"

790 renumber_table: ClassVar[Dict[str, bytes]] = {

791 **{chr(i): f"#{i:02X}".encode() for i in b"#()<>[]{}/%"},

792 **{chr(i): f"#{i:02X}".encode() for i in range(33)},

793 }

794

795 def clone(

796 self,

797 pdf_dest: Any,

798 force_duplicate: bool = False,

799 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

800 ) -> "NameObject":

801 """Clone object into pdf_dest."""

802 return cast(

803 "NameObject",

804 self._reference_clone(NameObject(self), pdf_dest, force_duplicate),

805 )

806

807 def hash_bin(self) -> int:

808 """

809 Used to detect modified object.

810

811 Returns:

812 Hash considering type and value.

813

814 """

815 return hash((self.__class__, self))

816

817 def write_to_stream(

818 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

819 ) -> None:

820 if encryption_key is not None: # deprecated

821 deprecate_no_replacement(

822 "the encryption_key parameter of write_to_stream", "5.0.0"

823 )

824 stream.write(self.renumber())

825

826 def renumber(self) -> bytes:

827 out = self[0].encode("utf-8")

828 if out != b"/":

829 deprecate_no_replacement(

830 f"Incorrect first char in NameObject, should start with '/': ({self})",

831 "6.0.0",

832 )

833 for c in self[1:]:

834 if c > "~":

835 for x in c.encode("utf-8"):

836 out += f"#{x:02X}".encode()

837 else:

838 try:

839 out += self.renumber_table[c]

840 except KeyError:

841 out += c.encode("utf-8")

842 return out

843

844 @classproperty

845 def surfix(cls) -> bytes: # noqa: N805

846 deprecate_with_replacement("surfix", "prefix", "6.0.0")

847 return b"/"

848

849 @staticmethod

850 def unnumber(sin: bytes) -> bytes:

851 i = sin.find(b"#", 0)

852 while i >= 0:

853 try:

854 sin = sin[:i] + unhexlify(sin[i + 1 : i + 3]) + sin[i + 3 :]

855 i = sin.find(b"#", i + 1)

856 except ValueError:

857 # if the 2 characters after # can not be converted to hex

858 # we change nothing and carry on

859 i = i + 1

860 return sin

861

862 CHARSETS = ("utf-8", "gbk", "latin1")

863

864 @staticmethod

865 def read_from_stream(stream: StreamType, pdf: Any) -> "NameObject": # PdfReader

866 name = stream.read(1)

867 if name != NameObject.prefix:

868 raise PdfReadError("Name read error")

869 name += read_until_regex(stream, NameObject.delimiter_pattern)

870 try:

871 # Name objects should represent irregular characters

872 # with a '#' followed by the symbol's hex number

873 name = NameObject.unnumber(name)

874 for enc in NameObject.CHARSETS:

875 try:

876 ret = name.decode(enc)

877 return NameObject(ret)

878 except Exception:

879 pass

880 raise UnicodeDecodeError("", name, 0, 0, "Code Not Found")

881 except (UnicodeEncodeError, UnicodeDecodeError) as e:

882 if not pdf.strict:

883 logger_warning(

884 f"Illegal character in NameObject ({name!r}), "

885 "you may need to adjust NameObject.CHARSETS",

886 __name__,

887 )

888 return NameObject(name.decode("charmap"))

889 raise PdfReadError(

890 f"Illegal character in NameObject ({name!r}). "

891 "You may need to adjust NameObject.CHARSETS.",

892 ) from e

893

894

895def encode_pdfdocencoding(unicode_string: str) -> bytes:

896 try:

897 return bytes([_pdfdoc_encoding_rev[k] for k in unicode_string])

898 except KeyError:

899 raise UnicodeEncodeError(

900 "pdfdocencoding",

901 unicode_string,

902 -1,

903 -1,

904 "does not exist in translation table",

905 )

906

907

908def is_null_or_none(x: Any) -> TypeGuard[Union[None, NullObject, IndirectObject]]:

909 """

910 Returns:

911 True if x is None or NullObject.

912

913 """

914 return x is None or (

915 isinstance(x, PdfObject)

916 and (x.get_object() is None or isinstance(x.get_object(), NullObject))

917 )

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/generic/_base.py: 35%

432 statements