Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/generic/_base.py: 35%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

432 statements  

1# Copyright (c) 2006, Mathieu Fenniak 

2# All rights reserved. 

3# 

4# Redistribution and use in source and binary forms, with or without 

5# modification, are permitted provided that the following conditions are 

6# met: 

7# 

8# * Redistributions of source code must retain the above copyright notice, 

9# this list of conditions and the following disclaimer. 

10# * Redistributions in binary form must reproduce the above copyright notice, 

11# this list of conditions and the following disclaimer in the documentation 

12# and/or other materials provided with the distribution. 

13# * The name of the author may not be used to endorse or promote products 

14# derived from this software without specific prior written permission. 

15# 

16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 

17# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 

18# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 

19# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 

20# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 

21# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 

22# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 

23# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 

24# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 

25# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 

26# POSSIBILITY OF SUCH DAMAGE. 

27import binascii 

28import codecs 

29import hashlib 

30import re 

31import sys 

32from binascii import unhexlify 

33from math import log10 

34from struct import iter_unpack 

35from typing import Any, Callable, ClassVar, Dict, Optional, Sequence, Union, cast 

36 

37if sys.version_info[:2] >= (3, 10): 

38 from typing import TypeGuard 

39else: 

40 from typing_extensions import TypeGuard # PEP 647 

41 

42from .._codecs import _pdfdoc_encoding_rev 

43from .._protocols import PdfObjectProtocol, PdfWriterProtocol 

44from .._utils import ( 

45 StreamType, 

46 classproperty, 

47 deprecate_no_replacement, 

48 deprecate_with_replacement, 

49 logger_warning, 

50 read_non_whitespace, 

51 read_until_regex, 

52) 

53from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError 

54 

55__author__ = "Mathieu Fenniak" 

56__author_email__ = "biziqe@mathieu.fenniak.net" 

57 

58 

59class PdfObject(PdfObjectProtocol): 

60 # function for calculating a hash value 

61 hash_func: Callable[..., "hashlib._Hash"] = hashlib.sha1 

62 indirect_reference: Optional["IndirectObject"] 

63 

64 def hash_bin(self) -> int: 

65 """ 

66 Used to detect modified object. 

67 

68 Returns: 

69 Hash considering type and value. 

70 

71 """ 

72 raise NotImplementedError( 

73 f"{self.__class__.__name__} does not implement .hash_bin() so far" 

74 ) 

75 

76 def hash_value_data(self) -> bytes: 

77 return f"{self}".encode() 

78 

79 def hash_value(self) -> bytes: 

80 return ( 

81 f"{self.__class__.__name__}:" 

82 f"{self.hash_func(self.hash_value_data()).hexdigest()}" 

83 ).encode() 

84 

85 def replicate( 

86 self, 

87 pdf_dest: PdfWriterProtocol, 

88 ) -> "PdfObject": 

89 """ 

90 Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter) 

91 without ensuring links. This is used in clone_document_from_root with incremental = True. 

92 

93 Args: 

94 pdf_dest: Target to clone to. 

95 

96 Returns: 

97 The cloned PdfObject 

98 

99 """ 

100 return self.clone(pdf_dest) 

101 

102 def clone( 

103 self, 

104 pdf_dest: PdfWriterProtocol, 

105 force_duplicate: bool = False, 

106 ignore_fields: Optional[Sequence[Union[str, int]]] = (), 

107 ) -> "PdfObject": 

108 """ 

109 Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter). 

110 

111 By default, this method will call ``_reference_clone`` (see ``_reference``). 

112 

113 

114 Args: 

115 pdf_dest: Target to clone to. 

116 force_duplicate: By default, if the object has already been cloned and referenced, 

117 the copy will be returned; when ``True``, a new copy will be created. 

118 (Default value = ``False``) 

119 ignore_fields: List/tuple of field names (for dictionaries) that will be ignored 

120 during cloning (applies to children duplication as well). If fields are to be 

121 considered for a limited number of levels, you have to add it as integer, for 

122 example ``[1,"/B","/TOTO"]`` means that ``"/B"`` will be ignored at the first 

123 level only but ``"/TOTO"`` on all levels. 

124 

125 Returns: 

126 The cloned PdfObject 

127 

128 """ 

129 raise NotImplementedError( 

130 f"{self.__class__.__name__} does not implement .clone so far" 

131 ) 

132 

133 def _reference_clone( 

134 self, clone: Any, pdf_dest: PdfWriterProtocol, force_duplicate: bool = False 

135 ) -> PdfObjectProtocol: 

136 """ 

137 Reference the object within the _objects of pdf_dest only if 

138 indirect_reference attribute exists (which means the objects was 

139 already identified in xref/xobjstm) if object has been already 

140 referenced do nothing. 

141 

142 Args: 

143 clone: 

144 pdf_dest: 

145 

146 Returns: 

147 The clone 

148 

149 """ 

150 try: 

151 if not force_duplicate and clone.indirect_reference.pdf == pdf_dest: 

152 return clone 

153 except Exception: 

154 pass 

155 # if hasattr(clone, "indirect_reference"): 

156 try: 

157 ind = self.indirect_reference 

158 except AttributeError: 

159 return clone 

160 if ( 

161 pdf_dest.incremental 

162 and ind is not None 

163 and ind.pdf == pdf_dest._reader 

164 and ind.idnum <= len(pdf_dest._objects) 

165 ): 

166 i = ind.idnum 

167 else: 

168 i = len(pdf_dest._objects) + 1 

169 if ind is not None: 

170 if id(ind.pdf) not in pdf_dest._id_translated: 

171 pdf_dest._id_translated[id(ind.pdf)] = {} 

172 pdf_dest._id_translated[id(ind.pdf)]["PreventGC"] = ind.pdf # type: ignore 

173 if ( 

174 not force_duplicate 

175 and ind.idnum in pdf_dest._id_translated[id(ind.pdf)] 

176 ): 

177 obj = pdf_dest.get_object( 

178 pdf_dest._id_translated[id(ind.pdf)][ind.idnum] 

179 ) 

180 assert obj is not None 

181 return obj 

182 pdf_dest._id_translated[id(ind.pdf)][ind.idnum] = i 

183 try: 

184 pdf_dest._objects[i - 1] = clone 

185 except IndexError: 

186 pdf_dest._objects.append(clone) 

187 i = len(pdf_dest._objects) 

188 clone.indirect_reference = IndirectObject(i, 0, pdf_dest) 

189 return clone 

190 

191 def get_object(self) -> Optional["PdfObject"]: 

192 """Resolve indirect references.""" 

193 return self 

194 

195 def write_to_stream( 

196 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None 

197 ) -> None: 

198 raise NotImplementedError 

199 

200 

201class NullObject(PdfObject): 

202 def clone( 

203 self, 

204 pdf_dest: PdfWriterProtocol, 

205 force_duplicate: bool = False, 

206 ignore_fields: Optional[Sequence[Union[str, int]]] = (), 

207 ) -> "NullObject": 

208 """Clone object into pdf_dest.""" 

209 return cast( 

210 "NullObject", self._reference_clone(NullObject(), pdf_dest, force_duplicate) 

211 ) 

212 

213 def hash_bin(self) -> int: 

214 """ 

215 Used to detect modified object. 

216 

217 Returns: 

218 Hash considering type and value. 

219 

220 """ 

221 return hash((self.__class__,)) 

222 

223 def write_to_stream( 

224 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None 

225 ) -> None: 

226 if encryption_key is not None: # deprecated 

227 deprecate_no_replacement( 

228 "the encryption_key parameter of write_to_stream", "5.0.0" 

229 ) 

230 stream.write(b"null") 

231 

232 @staticmethod 

233 def read_from_stream(stream: StreamType) -> "NullObject": 

234 nulltxt = stream.read(4) 

235 if nulltxt != b"null": 

236 raise PdfReadError("Could not read Null object") 

237 return NullObject() 

238 

239 def __repr__(self) -> str: 

240 return "NullObject" 

241 

242 def __eq__(self, other: object) -> bool: 

243 return isinstance(other, NullObject) 

244 

245 def __hash__(self) -> int: 

246 return self.hash_bin() 

247 

248 

249class BooleanObject(PdfObject): 

250 def __init__(self, value: Any) -> None: 

251 self.value = value 

252 

253 def clone( 

254 self, 

255 pdf_dest: PdfWriterProtocol, 

256 force_duplicate: bool = False, 

257 ignore_fields: Optional[Sequence[Union[str, int]]] = (), 

258 ) -> "BooleanObject": 

259 """Clone object into pdf_dest.""" 

260 return cast( 

261 "BooleanObject", 

262 self._reference_clone(BooleanObject(self.value), pdf_dest, force_duplicate), 

263 ) 

264 

265 def hash_bin(self) -> int: 

266 """ 

267 Used to detect modified object. 

268 

269 Returns: 

270 Hash considering type and value. 

271 

272 """ 

273 return hash((self.__class__, self.value)) 

274 

275 def __eq__(self, o: object, /) -> bool: 

276 if isinstance(o, BooleanObject): 

277 return self.value == o.value 

278 if isinstance(o, bool): 

279 return self.value == o 

280 return False 

281 

282 def __hash__(self) -> int: 

283 return self.hash_bin() 

284 

285 def __repr__(self) -> str: 

286 return "True" if self.value else "False" 

287 

288 def write_to_stream( 

289 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None 

290 ) -> None: 

291 if encryption_key is not None: # deprecated 

292 deprecate_no_replacement( 

293 "the encryption_key parameter of write_to_stream", "5.0.0" 

294 ) 

295 if self.value: 

296 stream.write(b"true") 

297 else: 

298 stream.write(b"false") 

299 

300 @staticmethod 

301 def read_from_stream(stream: StreamType) -> "BooleanObject": 

302 word = stream.read(4) 

303 if word == b"true": 

304 return BooleanObject(True) 

305 if word == b"fals": 

306 stream.read(1) 

307 return BooleanObject(False) 

308 raise PdfReadError("Could not read Boolean object") 

309 

310 

311class IndirectObject(PdfObject): 

312 def __init__(self, idnum: int, generation: int, pdf: Any) -> None: # PdfReader 

313 self.idnum = idnum 

314 self.generation = generation 

315 self.pdf = pdf 

316 

317 def __hash__(self) -> int: 

318 return hash((self.idnum, self.generation, id(self.pdf))) 

319 

320 def hash_bin(self) -> int: 

321 """ 

322 Used to detect modified object. 

323 

324 Returns: 

325 Hash considering type and value. 

326 

327 """ 

328 return hash((self.__class__, self.idnum, self.generation, id(self.pdf))) 

329 

330 def replicate( 

331 self, 

332 pdf_dest: PdfWriterProtocol, 

333 ) -> "PdfObject": 

334 return IndirectObject(self.idnum, self.generation, pdf_dest) 

335 

336 def clone( 

337 self, 

338 pdf_dest: PdfWriterProtocol, 

339 force_duplicate: bool = False, 

340 ignore_fields: Optional[Sequence[Union[str, int]]] = (), 

341 ) -> "IndirectObject": 

342 """Clone object into pdf_dest.""" 

343 if self.pdf == pdf_dest and not force_duplicate: 

344 # Already duplicated and no extra duplication required 

345 return self 

346 if id(self.pdf) not in pdf_dest._id_translated: 

347 pdf_dest._id_translated[id(self.pdf)] = {} 

348 

349 if self.idnum in pdf_dest._id_translated[id(self.pdf)]: 

350 dup = pdf_dest.get_object(pdf_dest._id_translated[id(self.pdf)][self.idnum]) 

351 if force_duplicate: 

352 assert dup is not None 

353 assert dup.indirect_reference is not None 

354 idref = dup.indirect_reference 

355 return IndirectObject(idref.idnum, idref.generation, idref.pdf) 

356 else: 

357 obj = self.get_object() 

358 # case observed : a pointed object can not be found 

359 if obj is None: 

360 # this normally 

361 obj = NullObject() 

362 assert isinstance(self, (IndirectObject,)) 

363 obj.indirect_reference = self 

364 dup = pdf_dest._add_object( 

365 obj.clone(pdf_dest, force_duplicate, ignore_fields) 

366 ) 

367 assert dup is not None, "mypy" 

368 assert dup.indirect_reference is not None, "mypy" 

369 return dup.indirect_reference 

370 

371 @property 

372 def indirect_reference(self) -> "IndirectObject": # type: ignore[override] 

373 return self 

374 

375 def get_object(self) -> Optional["PdfObject"]: 

376 return self.pdf.get_object(self) 

377 

378 def __deepcopy__(self, memo: Any) -> "IndirectObject": 

379 return IndirectObject(self.idnum, self.generation, self.pdf) 

380 

381 def _get_object_with_check(self) -> Optional["PdfObject"]: 

382 o = self.get_object() 

383 # the check is done here to not slow down get_object() 

384 if isinstance(o, IndirectObject): 

385 raise PdfStreamError( 

386 f"{self.__repr__()} references an IndirectObject {o.__repr__()}" 

387 ) 

388 return o 

389 

390 def __getattr__(self, name: str) -> Any: 

391 # Attribute not found in object: look in pointed object 

392 try: 

393 return getattr(self._get_object_with_check(), name) 

394 except AttributeError: 

395 raise AttributeError( 

396 f"No attribute {name} found in IndirectObject or pointed object" 

397 ) 

398 

399 def __getitem__(self, key: Any) -> Any: 

400 # items should be extracted from pointed Object 

401 return self._get_object_with_check()[key] # type: ignore 

402 

403 def __contains__(self, key: Any) -> bool: 

404 return key in self._get_object_with_check() # type: ignore 

405 

406 def __iter__(self) -> Any: 

407 return self._get_object_with_check().__iter__() # type: ignore 

408 

409 def __float__(self) -> str: 

410 # in this case we are looking for the pointed data 

411 return self.get_object().__float__() # type: ignore 

412 

413 def __int__(self) -> int: 

414 # in this case we are looking for the pointed data 

415 return self.get_object().__int__() # type: ignore 

416 

417 def __str__(self) -> str: 

418 # in this case we are looking for the pointed data 

419 return self.get_object().__str__() 

420 

421 def __repr__(self) -> str: 

422 return f"IndirectObject({self.idnum!r}, {self.generation!r}, {id(self.pdf)})" 

423 

424 def __eq__(self, other: object) -> bool: 

425 return ( 

426 other is not None 

427 and isinstance(other, IndirectObject) 

428 and self.idnum == other.idnum 

429 and self.generation == other.generation 

430 and self.pdf is other.pdf 

431 ) 

432 

433 def __ne__(self, other: object) -> bool: 

434 return not self.__eq__(other) 

435 

436 def write_to_stream( 

437 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None 

438 ) -> None: 

439 if encryption_key is not None: # deprecated 

440 deprecate_no_replacement( 

441 "the encryption_key parameter of write_to_stream", "5.0.0" 

442 ) 

443 stream.write(f"{self.idnum} {self.generation} R".encode()) 

444 

445 @staticmethod 

446 def read_from_stream(stream: StreamType, pdf: Any) -> "IndirectObject": # PdfReader 

447 idnum = b"" 

448 while True: 

449 tok = stream.read(1) 

450 if not tok: 

451 raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY) 

452 if tok.isspace(): 

453 break 

454 idnum += tok 

455 generation = b"" 

456 while True: 

457 tok = stream.read(1) 

458 if not tok: 

459 raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY) 

460 if tok.isspace(): 

461 if not generation: 

462 continue 

463 break 

464 generation += tok 

465 r = read_non_whitespace(stream) 

466 if r != b"R": 

467 raise PdfReadError( 

468 f"Error reading indirect object reference at byte {hex(stream.tell())}" 

469 ) 

470 return IndirectObject(int(idnum), int(generation), pdf) 

471 

472 

473FLOAT_WRITE_PRECISION = 8 # shall be min 5 digits max, allow user adj 

474 

475 

476class FloatObject(float, PdfObject): 

477 def __new__( 

478 cls, value: Any = "0.0", context: Optional[Any] = None 

479 ) -> "FloatObject": 

480 try: 

481 value = float(value) 

482 return float.__new__(cls, value) 

483 except Exception as e: 

484 # If this isn't a valid decimal (happens in malformed PDFs) 

485 # fallback to 0 

486 logger_warning( 

487 f"{e} : FloatObject ({value}) invalid; use 0.0 instead", __name__ 

488 ) 

489 return float.__new__(cls, 0.0) 

490 

491 def clone( 

492 self, 

493 pdf_dest: Any, 

494 force_duplicate: bool = False, 

495 ignore_fields: Optional[Sequence[Union[str, int]]] = (), 

496 ) -> "FloatObject": 

497 """Clone object into pdf_dest.""" 

498 return cast( 

499 "FloatObject", 

500 self._reference_clone(FloatObject(self), pdf_dest, force_duplicate), 

501 ) 

502 

503 def hash_bin(self) -> int: 

504 """ 

505 Used to detect modified object. 

506 

507 Returns: 

508 Hash considering type and value. 

509 

510 """ 

511 return hash((self.__class__, self.as_numeric)) 

512 

513 def myrepr(self) -> str: 

514 if self == 0: 

515 return "0.0" 

516 nb = FLOAT_WRITE_PRECISION - int(log10(abs(self))) 

517 return f"{self:.{max(1, nb)}f}".rstrip("0").rstrip(".") 

518 

519 def __repr__(self) -> str: 

520 return self.myrepr() # repr(float(self)) 

521 

522 def as_numeric(self) -> float: 

523 return float(self) 

524 

525 def write_to_stream( 

526 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None 

527 ) -> None: 

528 if encryption_key is not None: # deprecated 

529 deprecate_no_replacement( 

530 "the encryption_key parameter of write_to_stream", "5.0.0" 

531 ) 

532 stream.write(self.myrepr().encode("utf8")) 

533 

534 

535class NumberObject(int, PdfObject): 

536 NumberPattern = re.compile(b"[^+-.0-9]") 

537 

538 def __new__(cls, value: Any) -> "NumberObject": 

539 try: 

540 return int.__new__(cls, int(value)) 

541 except ValueError: 

542 logger_warning(f"NumberObject({value}) invalid; use 0 instead", __name__) 

543 return int.__new__(cls, 0) 

544 

545 def clone( 

546 self, 

547 pdf_dest: Any, 

548 force_duplicate: bool = False, 

549 ignore_fields: Optional[Sequence[Union[str, int]]] = (), 

550 ) -> "NumberObject": 

551 """Clone object into pdf_dest.""" 

552 return cast( 

553 "NumberObject", 

554 self._reference_clone(NumberObject(self), pdf_dest, force_duplicate), 

555 ) 

556 

557 def hash_bin(self) -> int: 

558 """ 

559 Used to detect modified object. 

560 

561 Returns: 

562 Hash considering type and value. 

563 

564 """ 

565 return hash((self.__class__, self.as_numeric())) 

566 

567 def as_numeric(self) -> int: 

568 return int(repr(self).encode("utf8")) 

569 

570 def write_to_stream( 

571 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None 

572 ) -> None: 

573 if encryption_key is not None: # deprecated 

574 deprecate_no_replacement( 

575 "the encryption_key parameter of write_to_stream", "5.0.0" 

576 ) 

577 stream.write(repr(self).encode("utf8")) 

578 

579 @staticmethod 

580 def read_from_stream(stream: StreamType) -> Union["NumberObject", "FloatObject"]: 

581 num = read_until_regex(stream, NumberObject.NumberPattern) 

582 if b"." in num: 

583 return FloatObject(num) 

584 return NumberObject(num) 

585 

586 

587class ByteStringObject(bytes, PdfObject): 

588 """ 

589 Represents a string object where the text encoding could not be determined. 

590 

591 This occurs quite often, as the PDF spec doesn't provide an alternate way to 

592 represent strings -- for example, the encryption data stored in files (like 

593 /O) is clearly not text, but is still stored in a "String" object. 

594 """ 

595 

596 def clone( 

597 self, 

598 pdf_dest: Any, 

599 force_duplicate: bool = False, 

600 ignore_fields: Optional[Sequence[Union[str, int]]] = (), 

601 ) -> "ByteStringObject": 

602 """Clone object into pdf_dest.""" 

603 return cast( 

604 "ByteStringObject", 

605 self._reference_clone( 

606 ByteStringObject(bytes(self)), pdf_dest, force_duplicate 

607 ), 

608 ) 

609 

610 def hash_bin(self) -> int: 

611 """ 

612 Used to detect modified object. 

613 

614 Returns: 

615 Hash considering type and value. 

616 

617 """ 

618 return hash((self.__class__, bytes(self))) 

619 

620 @property 

621 def original_bytes(self) -> bytes: 

622 """For compatibility with TextStringObject.original_bytes.""" 

623 return self 

624 

625 def write_to_stream( 

626 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None 

627 ) -> None: 

628 if encryption_key is not None: # deprecated 

629 deprecate_no_replacement( 

630 "the encryption_key parameter of write_to_stream", "5.0.0" 

631 ) 

632 stream.write(b"<") 

633 stream.write(binascii.hexlify(self)) 

634 stream.write(b">") 

635 

636 def __str__(self) -> str: 

637 charset_to_try = ["utf-16", *list(NameObject.CHARSETS)] 

638 for enc in charset_to_try: 

639 try: 

640 return self.decode(enc) 

641 except UnicodeDecodeError: 

642 pass 

643 raise PdfReadError("Cannot decode ByteStringObject.") 

644 

645 

646class TextStringObject(str, PdfObject): # noqa: SLOT000 

647 """ 

648 A string object that has been decoded into a real unicode string. 

649 

650 If read from a PDF document, this string appeared to match the 

651 PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding 

652 to occur. 

653 """ 

654 

655 autodetect_pdfdocencoding: bool 

656 autodetect_utf16: bool 

657 utf16_bom: bytes 

658 _original_bytes: Optional[bytes] = None 

659 

660 def __new__(cls, value: Any) -> "TextStringObject": 

661 org = None 

662 if isinstance(value, bytes): 

663 org = value 

664 value = value.decode("charmap") 

665 o = str.__new__(cls, value) 

666 o._original_bytes = org 

667 o.autodetect_utf16 = False 

668 o.autodetect_pdfdocencoding = False 

669 o.utf16_bom = b"" 

670 if o.startswith(("\xfe\xff", "\xff\xfe")): 

671 assert org is not None, "mypy" 

672 try: 

673 o = str.__new__(cls, org.decode("utf-16")) 

674 except UnicodeDecodeError as exc: 

675 logger_warning( 

676 f"{exc!s}\ninitial string:{exc.object!r}", 

677 __name__, 

678 ) 

679 o = str.__new__(cls, exc.object[: exc.start].decode("utf-16")) 

680 o._original_bytes = org 

681 o.autodetect_utf16 = True 

682 o.utf16_bom = org[:2] 

683 else: 

684 try: 

685 encode_pdfdocencoding(o) 

686 o.autodetect_pdfdocencoding = True 

687 except UnicodeEncodeError: 

688 o.autodetect_utf16 = True 

689 o.utf16_bom = codecs.BOM_UTF16_BE 

690 return o 

691 

692 def clone( 

693 self, 

694 pdf_dest: Any, 

695 force_duplicate: bool = False, 

696 ignore_fields: Optional[Sequence[Union[str, int]]] = (), 

697 ) -> "TextStringObject": 

698 """Clone object into pdf_dest.""" 

699 obj = TextStringObject(self) 

700 obj._original_bytes = self._original_bytes 

701 obj.autodetect_pdfdocencoding = self.autodetect_pdfdocencoding 

702 obj.autodetect_utf16 = self.autodetect_utf16 

703 obj.utf16_bom = self.utf16_bom 

704 return cast( 

705 "TextStringObject", self._reference_clone(obj, pdf_dest, force_duplicate) 

706 ) 

707 

708 def hash_bin(self) -> int: 

709 """ 

710 Used to detect modified object. 

711 

712 Returns: 

713 Hash considering type and value. 

714 

715 """ 

716 return hash((self.__class__, self.original_bytes)) 

717 

718 @property 

719 def original_bytes(self) -> bytes: 

720 """ 

721 It is occasionally possible that a text string object gets created where 

722 a byte string object was expected due to the autodetection mechanism -- 

723 if that occurs, this "original_bytes" property can be used to 

724 back-calculate what the original encoded bytes were. 

725 """ 

726 if self._original_bytes is not None: 

727 return self._original_bytes 

728 return self.get_original_bytes() 

729 

730 def get_original_bytes(self) -> bytes: 

731 # We're a text string object, but the library is trying to get our raw 

732 # bytes. This can happen if we auto-detected this string as text, but 

733 # we were wrong. It's pretty common. Return the original bytes that 

734 # would have been used to create this object, based upon the autodetect 

735 # method. 

736 if self.autodetect_utf16: 

737 if self.utf16_bom == codecs.BOM_UTF16_LE: 

738 return codecs.BOM_UTF16_LE + self.encode("utf-16le") 

739 if self.utf16_bom == codecs.BOM_UTF16_BE: 

740 return codecs.BOM_UTF16_BE + self.encode("utf-16be") 

741 return self.encode("utf-16be") 

742 if self.autodetect_pdfdocencoding: 

743 return encode_pdfdocencoding(self) 

744 raise Exception("no information about original bytes") # pragma: no cover 

745 

746 def get_encoded_bytes(self) -> bytes: 

747 # Try to write the string out as a PDFDocEncoding encoded string. It's 

748 # nicer to look at in the PDF file. Sadly, we take a performance hit 

749 # here for trying... 

750 try: 

751 if self._original_bytes is not None: 

752 return self._original_bytes 

753 if self.autodetect_utf16: 

754 raise UnicodeEncodeError("", "forced", -1, -1, "") 

755 bytearr = encode_pdfdocencoding(self) 

756 except UnicodeEncodeError: 

757 if self.utf16_bom == codecs.BOM_UTF16_LE: 

758 bytearr = codecs.BOM_UTF16_LE + self.encode("utf-16le") 

759 elif self.utf16_bom == codecs.BOM_UTF16_BE: 

760 bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be") 

761 else: 

762 bytearr = self.encode("utf-16be") 

763 return bytearr 

764 

765 def write_to_stream( 

766 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None 

767 ) -> None: 

768 if encryption_key is not None: # deprecated 

769 deprecate_no_replacement( 

770 "the encryption_key parameter of write_to_stream", "5.0.0" 

771 ) 

772 bytearr = self.get_encoded_bytes() 

773 stream.write(b"(") 

774 for c_ in iter_unpack("c", bytearr): 

775 c = cast(bytes, c_[0]) 

776 if not c.isalnum() and c != b" ": 

777 # This: 

778 # stream.write(rf"\{c:0>3o}".encode()) 

779 # gives 

780 # https://github.com/davidhalter/parso/issues/207 

781 stream.write(b"\\%03o" % ord(c)) 

782 else: 

783 stream.write(c) 

784 stream.write(b")") 

785 

786 

787class NameObject(str, PdfObject): # noqa: SLOT000 

788 delimiter_pattern = re.compile(rb"\s+|[\(\)<>\[\]{}/%]") 

789 prefix = b"/" 

790 renumber_table: ClassVar[Dict[str, bytes]] = { 

791 **{chr(i): f"#{i:02X}".encode() for i in b"#()<>[]{}/%"}, 

792 **{chr(i): f"#{i:02X}".encode() for i in range(33)}, 

793 } 

794 

795 def clone( 

796 self, 

797 pdf_dest: Any, 

798 force_duplicate: bool = False, 

799 ignore_fields: Optional[Sequence[Union[str, int]]] = (), 

800 ) -> "NameObject": 

801 """Clone object into pdf_dest.""" 

802 return cast( 

803 "NameObject", 

804 self._reference_clone(NameObject(self), pdf_dest, force_duplicate), 

805 ) 

806 

807 def hash_bin(self) -> int: 

808 """ 

809 Used to detect modified object. 

810 

811 Returns: 

812 Hash considering type and value. 

813 

814 """ 

815 return hash((self.__class__, self)) 

816 

817 def write_to_stream( 

818 self, stream: StreamType, encryption_key: Union[None, str, bytes] = None 

819 ) -> None: 

820 if encryption_key is not None: # deprecated 

821 deprecate_no_replacement( 

822 "the encryption_key parameter of write_to_stream", "5.0.0" 

823 ) 

824 stream.write(self.renumber()) 

825 

826 def renumber(self) -> bytes: 

827 out = self[0].encode("utf-8") 

828 if out != b"/": 

829 deprecate_no_replacement( 

830 f"Incorrect first char in NameObject, should start with '/': ({self})", 

831 "6.0.0", 

832 ) 

833 for c in self[1:]: 

834 if c > "~": 

835 for x in c.encode("utf-8"): 

836 out += f"#{x:02X}".encode() 

837 else: 

838 try: 

839 out += self.renumber_table[c] 

840 except KeyError: 

841 out += c.encode("utf-8") 

842 return out 

843 

844 @classproperty 

845 def surfix(cls) -> bytes: # noqa: N805 

846 deprecate_with_replacement("surfix", "prefix", "6.0.0") 

847 return b"/" 

848 

849 @staticmethod 

850 def unnumber(sin: bytes) -> bytes: 

851 i = sin.find(b"#", 0) 

852 while i >= 0: 

853 try: 

854 sin = sin[:i] + unhexlify(sin[i + 1 : i + 3]) + sin[i + 3 :] 

855 i = sin.find(b"#", i + 1) 

856 except ValueError: 

857 # if the 2 characters after # can not be converted to hex 

858 # we change nothing and carry on 

859 i = i + 1 

860 return sin 

861 

862 CHARSETS = ("utf-8", "gbk", "latin1") 

863 

864 @staticmethod 

865 def read_from_stream(stream: StreamType, pdf: Any) -> "NameObject": # PdfReader 

866 name = stream.read(1) 

867 if name != NameObject.prefix: 

868 raise PdfReadError("Name read error") 

869 name += read_until_regex(stream, NameObject.delimiter_pattern) 

870 try: 

871 # Name objects should represent irregular characters 

872 # with a '#' followed by the symbol's hex number 

873 name = NameObject.unnumber(name) 

874 for enc in NameObject.CHARSETS: 

875 try: 

876 ret = name.decode(enc) 

877 return NameObject(ret) 

878 except Exception: 

879 pass 

880 raise UnicodeDecodeError("", name, 0, 0, "Code Not Found") 

881 except (UnicodeEncodeError, UnicodeDecodeError) as e: 

882 if not pdf.strict: 

883 logger_warning( 

884 f"Illegal character in NameObject ({name!r}), " 

885 "you may need to adjust NameObject.CHARSETS", 

886 __name__, 

887 ) 

888 return NameObject(name.decode("charmap")) 

889 raise PdfReadError( 

890 f"Illegal character in NameObject ({name!r}). " 

891 "You may need to adjust NameObject.CHARSETS.", 

892 ) from e 

893 

894 

895def encode_pdfdocencoding(unicode_string: str) -> bytes: 

896 try: 

897 return bytes([_pdfdoc_encoding_rev[k] for k in unicode_string]) 

898 except KeyError: 

899 raise UnicodeEncodeError( 

900 "pdfdocencoding", 

901 unicode_string, 

902 -1, 

903 -1, 

904 "does not exist in translation table", 

905 ) 

906 

907 

908def is_null_or_none(x: Any) -> TypeGuard[Union[None, NullObject, IndirectObject]]: 

909 """ 

910 Returns: 

911 True if x is None or NullObject. 

912 

913 """ 

914 return x is None or ( 

915 isinstance(x, PdfObject) 

916 and (x.get_object() is None or isinstance(x.get_object(), NullObject)) 

917 )