Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/generic/

4# Redistribution and use in source and binary forms, with or without

5# modification, are permitted provided that the following conditions are

6# met:

8# * Redistributions of source code must retain the above copyright notice,

9# this list of conditions and the following disclaimer.

10# * Redistributions in binary form must reproduce the above copyright notice,

11# this list of conditions and the following disclaimer in the documentation

12# and/or other materials provided with the distribution.

13# * The name of the author may not be used to endorse or promote products

14# derived from this software without specific prior written permission.

15#

16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

17# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

18# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

19# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

20# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

21# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

22# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

23# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

24# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

25# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

26# POSSIBILITY OF SUCH DAMAGE.

27import binascii

28import codecs

29import hashlib

30import re

31import sys

32from binascii import unhexlify

33from collections.abc import Sequence

34from math import log10

35from struct import iter_unpack

36from typing import Any, Callable, ClassVar, Optional, Union, cast

38if sys.version_info[:2] >= (3, 10):

39 from typing import TypeGuard

40else:

41 from typing_extensions import TypeGuard # PEP 647

43from .._codecs import _pdfdoc_encoding_rev

44from .._protocols import PdfObjectProtocol, PdfWriterProtocol

45from .._utils import (

46 StreamType,

47 classproperty,

48 deprecation_no_replacement,

49 deprecation_with_replacement,

50 logger_warning,

51 read_non_whitespace,

52 read_until_regex,

53)

54from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError

56__author__ = "Mathieu Fenniak"

57__author_email__ = "biziqe@mathieu.fenniak.net"

60class PdfObject(PdfObjectProtocol):

61 # function for calculating a hash value

62 hash_func: Callable[..., "hashlib._Hash"] = hashlib.sha1

63 indirect_reference: Optional["IndirectObject"]

65 def hash_bin(self) -> int:

66 """

67 Used to detect modified object.

69 Returns:

70 Hash considering type and value.

72 """

73 raise NotImplementedError(

74 f"{self.__class__.__name__} does not implement .hash_bin() so far"

75 )

77 def hash_value_data(self) -> bytes:

78 return f"{self}".encode()

80 def hash_value(self) -> bytes:

81 return (

82 f"{self.__class__.__name__}:"

83 f"{self.hash_func(self.hash_value_data()).hexdigest()}"

84 ).encode()

86 def replicate(

87 self,

88 pdf_dest: PdfWriterProtocol,

89 ) -> "PdfObject":

90 """

91 Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter)

92 without ensuring links. This is used in clone_document_from_root with incremental = True.

94 Args:

95 pdf_dest: Target to clone to.

97 Returns:

98 The cloned PdfObject

100 """

101 return self.clone(pdf_dest)

102

103 def clone(

104 self,

105 pdf_dest: PdfWriterProtocol,

106 force_duplicate: bool = False,

107 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

108 ) -> "PdfObject":

109 """

110 Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter).

111

112 By default, this method will call ``_reference_clone`` (see ``_reference``).

113

114

115 Args:

116 pdf_dest: Target to clone to.

117 force_duplicate: By default, if the object has already been cloned and referenced,

118 the copy will be returned; when ``True``, a new copy will be created.

119 (Default value = ``False``)

120 ignore_fields: List/tuple of field names (for dictionaries) that will be ignored

121 during cloning (applies to children duplication as well). If fields are to be

122 considered for a limited number of levels, you have to add it as integer, for

123 example ``[1,"/B","/TOTO"]`` means that ``"/B"`` will be ignored at the first

124 level only but ``"/TOTO"`` on all levels.

125

126 Returns:

127 The cloned PdfObject

128

129 """

130 raise NotImplementedError(

131 f"{self.__class__.__name__} does not implement .clone so far"

132 )

133

134 def _reference_clone(

135 self, clone: Any, pdf_dest: PdfWriterProtocol, force_duplicate: bool = False

136 ) -> PdfObjectProtocol:

137 """

138 Reference the object within the _objects of pdf_dest only if

139 indirect_reference attribute exists (which means the objects was

140 already identified in xref/xobjstm) if object has been already

141 referenced do nothing.

142

143 Args:

144 clone:

145 pdf_dest:

146

147 Returns:

148 The clone

149

150 """

151 try:

152 if not force_duplicate and clone.indirect_reference.pdf == pdf_dest:

153 return clone

154 except Exception:

155 pass

156 # if hasattr(clone, "indirect_reference"):

157 try:

158 ind = self.indirect_reference

159 except AttributeError:

160 return clone

161 if (

162 pdf_dest.incremental

163 and ind is not None

164 and ind.pdf == pdf_dest._reader

165 and ind.idnum <= len(pdf_dest._objects)

166 ):

167 i = ind.idnum

168 else:

169 i = len(pdf_dest._objects) + 1

170 if ind is not None:

171 if id(ind.pdf) not in pdf_dest._id_translated:

172 pdf_dest._id_translated[id(ind.pdf)] = {}

173 pdf_dest._id_translated[id(ind.pdf)]["PreventGC"] = ind.pdf # type: ignore

174 if (

175 not force_duplicate

176 and ind.idnum in pdf_dest._id_translated[id(ind.pdf)]

177 ):

178 obj = pdf_dest.get_object(

179 pdf_dest._id_translated[id(ind.pdf)][ind.idnum]

180 )

181 assert obj is not None

182 return obj

183 pdf_dest._id_translated[id(ind.pdf)][ind.idnum] = i

184 try:

185 pdf_dest._objects[i - 1] = clone

186 except IndexError:

187 pdf_dest._objects.append(clone)

188 i = len(pdf_dest._objects)

189 clone.indirect_reference = IndirectObject(i, 0, pdf_dest)

190 return clone

191

192 def get_object(self) -> Optional["PdfObject"]:

193 """Resolve indirect references."""

194 return self

195

196 def write_to_stream(

197 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

198 ) -> None:

199 raise NotImplementedError

200

201

202class NullObject(PdfObject):

203 def clone(

204 self,

205 pdf_dest: PdfWriterProtocol,

206 force_duplicate: bool = False,

207 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

208 ) -> "NullObject":

209 """Clone object into pdf_dest."""

210 return cast(

211 "NullObject", self._reference_clone(NullObject(), pdf_dest, force_duplicate)

212 )

213

214 def hash_bin(self) -> int:

215 """

216 Used to detect modified object.

217

218 Returns:

219 Hash considering type and value.

220

221 """

222 return hash((self.__class__,))

223

224 def write_to_stream(

225 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

226 ) -> None:

227 if encryption_key is not None: # deprecated

228 deprecation_no_replacement(

229 "the encryption_key parameter of write_to_stream", "5.0.0"

230 )

231 stream.write(b"null")

232

233 @staticmethod

234 def read_from_stream(stream: StreamType) -> "NullObject":

235 nulltxt = stream.read(4)

236 if nulltxt != b"null":

237 raise PdfReadError("Could not read Null object")

238 return NullObject()

239

240 def __repr__(self) -> str:

241 return "NullObject"

242

243 def __eq__(self, other: object) -> bool:

244 return isinstance(other, NullObject)

245

246 def __hash__(self) -> int:

247 return self.hash_bin()

248

249

250class BooleanObject(PdfObject):

251 def __init__(self, value: Any) -> None:

252 self.value = value

253

254 def clone(

255 self,

256 pdf_dest: PdfWriterProtocol,

257 force_duplicate: bool = False,

258 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

259 ) -> "BooleanObject":

260 """Clone object into pdf_dest."""

261 return cast(

262 "BooleanObject",

263 self._reference_clone(BooleanObject(self.value), pdf_dest, force_duplicate),

264 )

265

266 def hash_bin(self) -> int:

267 """

268 Used to detect modified object.

269

270 Returns:

271 Hash considering type and value.

272

273 """

274 return hash((self.__class__, self.value))

275

276 def __eq__(self, o: object, /) -> bool:

277 if isinstance(o, BooleanObject):

278 return self.value == o.value

279 if isinstance(o, bool):

280 return self.value == o

281 return False

282

283 def __hash__(self) -> int:

284 return self.hash_bin()

285

286 def __repr__(self) -> str:

287 return "True" if self.value else "False"

288

289 def write_to_stream(

290 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

291 ) -> None:

292 if encryption_key is not None: # deprecated

293 deprecation_no_replacement(

294 "the encryption_key parameter of write_to_stream", "5.0.0"

295 )

296 if self.value:

297 stream.write(b"true")

298 else:

299 stream.write(b"false")

300

301 @staticmethod

302 def read_from_stream(stream: StreamType) -> "BooleanObject":

303 word = stream.read(4)

304 if word == b"true":

305 return BooleanObject(True)

306 if word == b"fals":

307 stream.read(1)

308 return BooleanObject(False)

309 raise PdfReadError("Could not read Boolean object")

310

311

312class IndirectObject(PdfObject):

313 def __init__(self, idnum: int, generation: int, pdf: Any) -> None: # PdfReader

314 self.idnum = idnum

315 self.generation = generation

316 self.pdf = pdf

317

318 def __hash__(self) -> int:

319 return hash((self.idnum, self.generation, id(self.pdf)))

320

321 def hash_bin(self) -> int:

322 """

323 Used to detect modified object.

324

325 Returns:

326 Hash considering type and value.

327

328 """

329 return hash((self.__class__, self.idnum, self.generation, id(self.pdf)))

330

331 def replicate(

332 self,

333 pdf_dest: PdfWriterProtocol,

334 ) -> "PdfObject":

335 return IndirectObject(self.idnum, self.generation, pdf_dest)

336

337 def clone(

338 self,

339 pdf_dest: PdfWriterProtocol,

340 force_duplicate: bool = False,

341 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

342 ) -> "IndirectObject":

343 """Clone object into pdf_dest."""

344 if self.pdf == pdf_dest and not force_duplicate:

345 # Already duplicated and no extra duplication required

346 return self

347 if id(self.pdf) not in pdf_dest._id_translated:

348 pdf_dest._id_translated[id(self.pdf)] = {}

349

350 if self.idnum in pdf_dest._id_translated[id(self.pdf)]:

351 dup = pdf_dest.get_object(pdf_dest._id_translated[id(self.pdf)][self.idnum])

352 if force_duplicate:

353 assert dup is not None

354 assert dup.indirect_reference is not None

355 idref = dup.indirect_reference

356 return IndirectObject(idref.idnum, idref.generation, idref.pdf)

357 else:

358 obj = self.get_object()

359 # case observed : a pointed object can not be found

360 if obj is None:

361 # this normally

362 obj = NullObject()

363 assert isinstance(self, (IndirectObject,))

364 obj.indirect_reference = self

365 dup = pdf_dest._add_object(

366 obj.clone(pdf_dest, force_duplicate, ignore_fields)

367 )

368 assert dup is not None, "mypy"

369 assert dup.indirect_reference is not None, "mypy"

370 return dup.indirect_reference

371

372 @property

373 def indirect_reference(self) -> "IndirectObject": # type: ignore[override]

374 return self

375

376 def get_object(self) -> Optional["PdfObject"]:

377 return self.pdf.get_object(self)

378

379 def __deepcopy__(self, memo: Any) -> "IndirectObject":

380 return IndirectObject(self.idnum, self.generation, self.pdf)

381

382 def _get_object_with_check(self) -> Optional["PdfObject"]:

383 o = self.get_object()

384 # the check is done here to not slow down get_object()

385 if isinstance(o, IndirectObject):

386 raise PdfStreamError(

387 f"{self.__repr__()} references an IndirectObject {o.__repr__()}"

388 )

389 return o

390

391 def __getattr__(self, name: str) -> Any:

392 # Attribute not found in object: look in pointed object

393 try:

394 return getattr(self._get_object_with_check(), name)

395 except AttributeError:

396 raise AttributeError(

397 f"No attribute {name} found in IndirectObject or pointed object"

398 )

399

400 def __getitem__(self, key: Any) -> Any:

401 # items should be extracted from pointed Object

402 return self._get_object_with_check()[key] # type: ignore

403

404 def __contains__(self, key: Any) -> bool:

405 return key in self._get_object_with_check() # type: ignore

406

407 def __iter__(self) -> Any:

408 return self._get_object_with_check().__iter__() # type: ignore

409

410 def __float__(self) -> str:

411 # in this case we are looking for the pointed data

412 return self.get_object().__float__() # type: ignore

413

414 def __int__(self) -> int:

415 # in this case we are looking for the pointed data

416 return self.get_object().__int__() # type: ignore

417

418 def __str__(self) -> str:

419 # in this case we are looking for the pointed data

420 return self.get_object().__str__()

421

422 def __repr__(self) -> str:

423 return f"IndirectObject({self.idnum!r}, {self.generation!r}, {id(self.pdf)})"

424

425 def __eq__(self, other: object) -> bool:

426 return (

427 other is not None

428 and isinstance(other, IndirectObject)

429 and self.idnum == other.idnum

430 and self.generation == other.generation

431 and self.pdf is other.pdf

432 )

433

434 def __ne__(self, other: object) -> bool:

435 return not self.__eq__(other)

436

437 def write_to_stream(

438 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

439 ) -> None:

440 if encryption_key is not None: # deprecated

441 deprecation_no_replacement(

442 "the encryption_key parameter of write_to_stream", "5.0.0"

443 )

444 stream.write(f"{self.idnum} {self.generation} R".encode())

445

446 @staticmethod

447 def read_from_stream(stream: StreamType, pdf: Any) -> "IndirectObject": # PdfReader

448 idnum = b""

449 while True:

450 tok = stream.read(1)

451 if not tok:

452 raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)

453 if tok.isspace():

454 break

455 idnum += tok

456 generation = b""

457 while True:

458 tok = stream.read(1)

459 if not tok:

460 raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)

461 if tok.isspace():

462 if not generation:

463 continue

464 break

465 generation += tok

466 r = read_non_whitespace(stream)

467 if r != b"R":

468 raise PdfReadError(

469 f"Error reading indirect object reference at byte {hex(stream.tell())}"

470 )

471 return IndirectObject(int(idnum), int(generation), pdf)

472

473

474FLOAT_WRITE_PRECISION = 8 # shall be min 5 digits max, allow user adj

475

476

477class FloatObject(float, PdfObject):

478 def __new__(

479 cls, value: Any = "0.0", context: Optional[Any] = None

480 ) -> "FloatObject":

481 try:

482 value = float(value)

483 return float.__new__(cls, value)

484 except Exception as e:

485 # If this isn't a valid decimal (happens in malformed PDFs)

486 # fallback to 0

487 logger_warning(

488 f"{e} : FloatObject ({value}) invalid; use 0.0 instead", __name__

489 )

490 return float.__new__(cls, 0.0)

491

492 def clone(

493 self,

494 pdf_dest: Any,

495 force_duplicate: bool = False,

496 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

497 ) -> "FloatObject":

498 """Clone object into pdf_dest."""

499 return cast(

500 "FloatObject",

501 self._reference_clone(FloatObject(self), pdf_dest, force_duplicate),

502 )

503

504 def hash_bin(self) -> int:

505 """

506 Used to detect modified object.

507

508 Returns:

509 Hash considering type and value.

510

511 """

512 return hash((self.__class__, self.as_numeric))

513

514 def myrepr(self) -> str:

515 if self == 0:

516 return "0.0"

517 nb = FLOAT_WRITE_PRECISION - int(log10(abs(self)))

518 return f"{self:.{max(1, nb)}f}".rstrip("0").rstrip(".")

519

520 def __repr__(self) -> str:

521 return self.myrepr() # repr(float(self))

522

523 def as_numeric(self) -> float:

524 return float(self)

525

526 def write_to_stream(

527 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

528 ) -> None:

529 if encryption_key is not None: # deprecated

530 deprecation_no_replacement(

531 "the encryption_key parameter of write_to_stream", "5.0.0"

532 )

533 stream.write(self.myrepr().encode("utf8"))

534

535

536class NumberObject(int, PdfObject):

537 NumberPattern = re.compile(b"[^+-.0-9]")

538

539 def __new__(cls, value: Any) -> "NumberObject":

540 try:

541 return int.__new__(cls, int(value))

542 except ValueError:

543 logger_warning(f"NumberObject({value}) invalid; use 0 instead", __name__)

544 return int.__new__(cls, 0)

545

546 def clone(

547 self,

548 pdf_dest: Any,

549 force_duplicate: bool = False,

550 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

551 ) -> "NumberObject":

552 """Clone object into pdf_dest."""

553 return cast(

554 "NumberObject",

555 self._reference_clone(NumberObject(self), pdf_dest, force_duplicate),

556 )

557

558 def hash_bin(self) -> int:

559 """

560 Used to detect modified object.

561

562 Returns:

563 Hash considering type and value.

564

565 """

566 return hash((self.__class__, self.as_numeric()))

567

568 def as_numeric(self) -> int:

569 return int(repr(self).encode("utf8"))

570

571 def write_to_stream(

572 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

573 ) -> None:

574 if encryption_key is not None: # deprecated

575 deprecation_no_replacement(

576 "the encryption_key parameter of write_to_stream", "5.0.0"

577 )

578 stream.write(repr(self).encode("utf8"))

579

580 @staticmethod

581 def read_from_stream(stream: StreamType) -> Union["NumberObject", "FloatObject"]:

582 num = read_until_regex(stream, NumberObject.NumberPattern)

583 if b"." in num:

584 return FloatObject(num)

585 return NumberObject(num)

586

587

588class ByteStringObject(bytes, PdfObject):

589 """

590 Represents a string object where the text encoding could not be determined.

591

592 This occurs quite often, as the PDF spec doesn't provide an alternate way to

593 represent strings -- for example, the encryption data stored in files (like

594 /O) is clearly not text, but is still stored in a "String" object.

595 """

596

597 def clone(

598 self,

599 pdf_dest: Any,

600 force_duplicate: bool = False,

601 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

602 ) -> "ByteStringObject":

603 """Clone object into pdf_dest."""

604 return cast(

605 "ByteStringObject",

606 self._reference_clone(

607 ByteStringObject(bytes(self)), pdf_dest, force_duplicate

608 ),

609 )

610

611 def hash_bin(self) -> int:

612 """

613 Used to detect modified object.

614

615 Returns:

616 Hash considering type and value.

617

618 """

619 return hash((self.__class__, bytes(self)))

620

621 @property

622 def original_bytes(self) -> bytes:

623 """For compatibility with TextStringObject.original_bytes."""

624 return self

625

626 def write_to_stream(

627 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

628 ) -> None:

629 if encryption_key is not None: # deprecated

630 deprecation_no_replacement(

631 "the encryption_key parameter of write_to_stream", "5.0.0"

632 )

633 stream.write(b"<")

634 stream.write(binascii.hexlify(self))

635 stream.write(b">")

636

637 def __str__(self) -> str:

638 charset_to_try = ["utf-16", *list(NameObject.CHARSETS)]

639 for enc in charset_to_try:

640 try:

641 return self.decode(enc)

642 except UnicodeDecodeError:

643 pass

644 raise PdfReadError("Cannot decode ByteStringObject.")

645

646

647class TextStringObject(str, PdfObject): # noqa: SLOT000

648 """

649 A string object that has been decoded into a real unicode string.

650

651 If read from a PDF document, this string appeared to match the

652 PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding

653 to occur.

654 """

655

656 autodetect_pdfdocencoding: bool

657 autodetect_utf16: bool

658 utf16_bom: bytes

659 _original_bytes: Optional[bytes] = None

660

661 def __new__(cls, value: Any) -> "TextStringObject":

662 org = None

663 if isinstance(value, bytes):

664 org = value

665 value = value.decode("charmap")

666 o = str.__new__(cls, value)

667 o._original_bytes = org

668 o.autodetect_utf16 = False

669 o.autodetect_pdfdocencoding = False

670 o.utf16_bom = b""

671 if o.startswith(("\xfe\xff", "\xff\xfe")):

672 assert org is not None, "mypy"

673 try:

674 o = str.__new__(cls, org.decode("utf-16"))

675 except UnicodeDecodeError as exc:

676 logger_warning(

677 f"{exc!s}\ninitial string:{exc.object!r}",

678 __name__,

679 )

680 o = str.__new__(cls, exc.object[: exc.start].decode("utf-16"))

681 o._original_bytes = org

682 o.autodetect_utf16 = True

683 o.utf16_bom = org[:2]

684 else:

685 try:

686 encode_pdfdocencoding(o)

687 o.autodetect_pdfdocencoding = True

688 except UnicodeEncodeError:

689 o.autodetect_utf16 = True

690 o.utf16_bom = codecs.BOM_UTF16_BE

691 return o

692

693 def clone(

694 self,

695 pdf_dest: Any,

696 force_duplicate: bool = False,

697 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

698 ) -> "TextStringObject":

699 """Clone object into pdf_dest."""

700 obj = TextStringObject(self)

701 obj._original_bytes = self._original_bytes

702 obj.autodetect_pdfdocencoding = self.autodetect_pdfdocencoding

703 obj.autodetect_utf16 = self.autodetect_utf16

704 obj.utf16_bom = self.utf16_bom

705 return cast(

706 "TextStringObject", self._reference_clone(obj, pdf_dest, force_duplicate)

707 )

708

709 def hash_bin(self) -> int:

710 """

711 Used to detect modified object.

712

713 Returns:

714 Hash considering type and value.

715

716 """

717 return hash((self.__class__, self.original_bytes))

718

719 @property

720 def original_bytes(self) -> bytes:

721 """

722 It is occasionally possible that a text string object gets created where

723 a byte string object was expected due to the autodetection mechanism --

724 if that occurs, this "original_bytes" property can be used to

725 back-calculate what the original encoded bytes were.

726 """

727 if self._original_bytes is not None:

728 return self._original_bytes

729 return self.get_original_bytes()

730

731 def get_original_bytes(self) -> bytes:

732 # We're a text string object, but the library is trying to get our raw

733 # bytes. This can happen if we auto-detected this string as text, but

734 # we were wrong. It's pretty common. Return the original bytes that

735 # would have been used to create this object, based upon the autodetect

736 # method.

737 if self.autodetect_utf16:

738 if self.utf16_bom == codecs.BOM_UTF16_LE:

739 return codecs.BOM_UTF16_LE + self.encode("utf-16le")

740 if self.utf16_bom == codecs.BOM_UTF16_BE:

741 return codecs.BOM_UTF16_BE + self.encode("utf-16be")

742 return self.encode("utf-16be")

743 if self.autodetect_pdfdocencoding:

744 return encode_pdfdocencoding(self)

745 raise Exception("no information about original bytes") # pragma: no cover

746

747 def get_encoded_bytes(self) -> bytes:

748 # Try to write the string out as a PDFDocEncoding encoded string. It's

749 # nicer to look at in the PDF file. Sadly, we take a performance hit

750 # here for trying...

751 try:

752 if self._original_bytes is not None:

753 return self._original_bytes

754 if self.autodetect_utf16:

755 raise UnicodeEncodeError("", "forced", -1, -1, "")

756 bytearr = encode_pdfdocencoding(self)

757 except UnicodeEncodeError:

758 if self.utf16_bom == codecs.BOM_UTF16_LE:

759 bytearr = codecs.BOM_UTF16_LE + self.encode("utf-16le")

760 elif self.utf16_bom == codecs.BOM_UTF16_BE:

761 bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")

762 else:

763 bytearr = self.encode("utf-16be")

764 return bytearr

765

766 def write_to_stream(

767 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

768 ) -> None:

769 if encryption_key is not None: # deprecated

770 deprecation_no_replacement(

771 "the encryption_key parameter of write_to_stream", "5.0.0"

772 )

773 bytearr = self.get_encoded_bytes()

774 stream.write(b"(")

775 for c_ in iter_unpack("c", bytearr):

776 c = cast(bytes, c_[0])

777 if not c.isalnum() and c != b" ":

778 # This:

779 # stream.write(rf"\{c:0>3o}".encode())

780 # gives

781 # https://github.com/davidhalter/parso/issues/207

782 stream.write(b"\\%03o" % ord(c))

783 else:

784 stream.write(c)

785 stream.write(b")")

786

787

788class NameObject(str, PdfObject): # noqa: SLOT000

789 delimiter_pattern = re.compile(rb"\s+|[\(\)<>\[\]{}/%]")

790 prefix = b"/"

791 renumber_table: ClassVar[dict[str, bytes]] = {

792 **{chr(i): f"#{i:02X}".encode() for i in b"#()<>[]{}/%"},

793 **{chr(i): f"#{i:02X}".encode() for i in range(33)},

794 }

795

796 def clone(

797 self,

798 pdf_dest: Any,

799 force_duplicate: bool = False,

800 ignore_fields: Optional[Sequence[Union[str, int]]] = (),

801 ) -> "NameObject":

802 """Clone object into pdf_dest."""

803 return cast(

804 "NameObject",

805 self._reference_clone(NameObject(self), pdf_dest, force_duplicate),

806 )

807

808 def hash_bin(self) -> int:

809 """

810 Used to detect modified object.

811

812 Returns:

813 Hash considering type and value.

814

815 """

816 return hash((self.__class__, self))

817

818 def write_to_stream(

819 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None

820 ) -> None:

821 if encryption_key is not None: # deprecated

822 deprecation_no_replacement(

823 "the encryption_key parameter of write_to_stream", "5.0.0"

824 )

825 stream.write(self.renumber())

826

827 def renumber(self) -> bytes:

828 out = self[0].encode("utf-8")

829 if out != b"/":

830 deprecation_no_replacement(

831 f"Incorrect first char in NameObject, should start with '/': ({self})",

832 "5.0.0",

833 )

834 for c in self[1:]:

835 if c > "~":

836 for x in c.encode("utf-8"):

837 out += f"#{x:02X}".encode()

838 else:

839 try:

840 out += self.renumber_table[c]

841 except KeyError:

842 out += c.encode("utf-8")

843 return out

844

845 def _sanitize(self) -> "NameObject":

846 """

847 Sanitize the NameObject's name to be a valid PDF name part

848 (alphanumeric, underscore, hyphen). The _sanitize method replaces

849 spaces and any non-alphanumeric/non-underscore/non-hyphen with

850 underscores.

851

852 Returns:

853 NameObject with sanitized name.

854 """

855 name = str(self)[1:] # Remove leading forward slash

856 name = re.sub(r"\ ", "_", name)

857 name = re.sub(r"[^a-zA-Z0-9_-]", "_", name)

858 return NameObject("/" + name)

859

860 @classproperty

861 def surfix(cls) -> bytes: # noqa: N805

862 deprecation_with_replacement("surfix", "prefix", "5.0.0")

863 return b"/"

864

865 @staticmethod

866 def unnumber(sin: bytes) -> bytes:

867 i = sin.find(b"#", 0)

868 while i >= 0:

869 try:

870 sin = sin[:i] + unhexlify(sin[i + 1 : i + 3]) + sin[i + 3 :]

871 i = sin.find(b"#", i + 1)

872 except ValueError:

873 # if the 2 characters after # can not be converted to hex

874 # we change nothing and carry on

875 i = i + 1

876 return sin

877

878 CHARSETS = ("utf-8", "gbk", "latin1")

879

880 @staticmethod

881 def read_from_stream(stream: StreamType, pdf: Any) -> "NameObject": # PdfReader

882 name = stream.read(1)

883 if name != NameObject.prefix:

884 raise PdfReadError("Name read error")

885 name += read_until_regex(stream, NameObject.delimiter_pattern)

886 try:

887 # Name objects should represent irregular characters

888 # with a '#' followed by the symbol's hex number

889 name = NameObject.unnumber(name)

890 for enc in NameObject.CHARSETS:

891 try:

892 ret = name.decode(enc)

893 return NameObject(ret)

894 except Exception:

895 pass

896 raise UnicodeDecodeError("", name, 0, 0, "Code Not Found")

897 except (UnicodeEncodeError, UnicodeDecodeError) as e:

898 if not pdf.strict:

899 logger_warning(

900 f"Illegal character in NameObject ({name!r}), "

901 "you may need to adjust NameObject.CHARSETS",

902 __name__,

903 )

904 return NameObject(name.decode("charmap"))

905 raise PdfReadError(

906 f"Illegal character in NameObject ({name!r}). "

907 "You may need to adjust NameObject.CHARSETS.",

908 ) from e

909

910

911def encode_pdfdocencoding(unicode_string: str) -> bytes:

912 try:

913 return bytes([_pdfdoc_encoding_rev[k] for k in unicode_string])

914 except KeyError:

915 raise UnicodeEncodeError(

916 "pdfdocencoding",

917 unicode_string,

918 -1,

919 -1,

920 "does not exist in translation table",

921 )

922

923

924def is_null_or_none(x: Any) -> TypeGuard[Union[None, NullObject, IndirectObject]]:

925 """

926 Returns:

927 True if x is None or NullObject.

928

929 """

930 return x is None or (

931 isinstance(x, PdfObject)

932 and (x.get_object() is None or isinstance(x.get_object(), NullObject))

933 )

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/generic/_base.py: 53%

438 statements