Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/_writer.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1450 statements  

1# Copyright (c) 2006, Mathieu Fenniak 

2# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com> 

3# 

4# All rights reserved. 

5# 

6# Redistribution and use in source and binary forms, with or without 

7# modification, are permitted provided that the following conditions are 

8# met: 

9# 

10# * Redistributions of source code must retain the above copyright notice, 

11# this list of conditions and the following disclaimer. 

12# * Redistributions in binary form must reproduce the above copyright notice, 

13# this list of conditions and the following disclaimer in the documentation 

14# and/or other materials provided with the distribution. 

15# * The name of the author may not be used to endorse or promote products 

16# derived from this software without specific prior written permission. 

17# 

18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 

19# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 

20# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 

21# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 

22# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 

23# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 

24# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 

25# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 

26# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 

27# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 

28# POSSIBILITY OF SUCH DAMAGE. 

29 

30import decimal 

31import enum 

32import hashlib 

33import re 

34import struct 

35import sys 

36import uuid 

37from collections.abc import Iterable, Mapping 

38from io import BytesIO, FileIO, IOBase 

39from itertools import compress 

40from pathlib import Path 

41from re import Pattern 

42from types import TracebackType 

43from typing import ( 

44 IO, 

45 Any, 

46 Callable, 

47 Optional, 

48 Union, 

49 cast, 

50) 

51 

52if sys.version_info >= (3, 11): 

53 from typing import Self 

54else: 

55 from typing_extensions import Self 

56 

57from ._doc_common import DocumentInformation, PdfDocCommon 

58from ._encryption import EncryptAlgorithm, Encryption 

59from ._page import PageObject, Transformation 

60from ._page_labels import nums_clear_range, nums_insert, nums_next 

61from ._reader import PdfReader 

62from ._utils import ( 

63 StrByteType, 

64 StreamType, 

65 _get_max_pdf_version_header, 

66 deprecate_with_replacement, 

67 deprecation_no_replacement, 

68 logger_warning, 

69) 

70from .constants import AnnotationDictionaryAttributes as AA 

71from .constants import CatalogAttributes as CA 

72from .constants import ( 

73 CatalogDictionary, 

74 GoToActionArguments, 

75 ImageType, 

76 InteractiveFormDictEntries, 

77 OutlineFontFlag, 

78 PageLabelStyle, 

79 PagesAttributes, 

80 TypFitArguments, 

81 UserAccessPermissions, 

82) 

83from .constants import Core as CO 

84from .constants import FieldDictionaryAttributes as FA 

85from .constants import PageAttributes as PG 

86from .constants import TrailerKeys as TK 

87from .errors import LimitReachedError, PdfReadError, PyPdfError 

88from .generic import ( 

89 PAGE_FIT, 

90 ArrayObject, 

91 BooleanObject, 

92 ByteStringObject, 

93 ContentStream, 

94 Destination, 

95 DictionaryObject, 

96 EmbeddedFile, 

97 Fit, 

98 FloatObject, 

99 IndirectObject, 

100 NameObject, 

101 NullObject, 

102 NumberObject, 

103 PdfObject, 

104 RectangleObject, 

105 ReferenceLink, 

106 StreamObject, 

107 TextStringObject, 

108 TreeObject, 

109 ViewerPreferences, 

110 create_string_object, 

111 extract_links, 

112 hex_to_rgb, 

113 is_null_or_none, 

114) 

115from .generic._appearance_stream import TextStreamAppearance 

116from .pagerange import PageRange, PageRangeSpec 

117from .types import ( 

118 AnnotationSubtype, 

119 BorderArrayType, 

120 LayoutType, 

121 OutlineItemType, 

122 OutlineType, 

123 PagemodeType, 

124) 

125from .xmp import XmpInformation 

126 

127ALL_DOCUMENT_PERMISSIONS = UserAccessPermissions.all() 

128 

129 

130class ObjectDeletionFlag(enum.IntFlag): 

131 NONE = 0 

132 TEXT = enum.auto() 

133 LINKS = enum.auto() 

134 ATTACHMENTS = enum.auto() 

135 OBJECTS_3D = enum.auto() 

136 ALL_ANNOTATIONS = enum.auto() 

137 XOBJECT_IMAGES = enum.auto() 

138 INLINE_IMAGES = enum.auto() 

139 DRAWING_IMAGES = enum.auto() 

140 IMAGES = XOBJECT_IMAGES | INLINE_IMAGES | DRAWING_IMAGES 

141 

142 

143def _rolling_checksum(stream: BytesIO, blocksize: int = 65536) -> str: 

144 hash = hashlib.md5(usedforsecurity=False) 

145 for block in iter(lambda: stream.read(blocksize), b""): 

146 hash.update(block) 

147 return hash.hexdigest() 

148 

149 

150class PdfWriter(PdfDocCommon): 

151 """ 

152 Write a PDF file out, given pages produced by another class or through 

153 cloning a PDF file during initialization. 

154 

155 Typically data is added from a :class:`PdfReader<pypdf.PdfReader>`. 

156 

157 Args: 

158 clone_from: identical to fileobj (for compatibility) 

159 

160 incremental: If true, loads the document and set the PdfWriter in incremental mode. 

161 

162 When writing incrementally, the original document is written first and new/modified 

163 content is appended. To be used for signed document/forms to keep signature valid. 

164 

165 full: If true, loads all the objects (always full if incremental = True). 

166 This parameter may allow loading large PDFs. 

167 

168 strict: If true, pypdf will raise an exception if a PDF does not follow the specification. 

169 If false, pypdf will try to be forgiving and do something reasonable, but it will log 

170 a warning message. It is a best-effort approach. 

171 

172 """ 

173 

174 def __init__( 

175 self, 

176 fileobj: Union[None, PdfReader, StrByteType, Path] = "", 

177 clone_from: Union[None, PdfReader, StrByteType, Path] = None, 

178 incremental: bool = False, 

179 full: bool = False, 

180 strict: bool = False, 

181 *, 

182 incremental_clone_object_count_limit: Optional[int] = 500_000, 

183 incremental_clone_object_id_limit: Optional[int] = 1_000_000, 

184 ) -> None: 

185 self.strict = strict 

186 """ 

187 If true, pypdf will raise an exception if a PDF does not follow the specification. 

188 If false, pypdf will try to be forgiving and do something reasonable, but it will log 

189 a warning message. It is a best-effort approach. 

190 """ 

191 

192 self.incremental = incremental or full 

193 """ 

194 Returns if the PdfWriter object has been started in incremental mode. 

195 """ 

196 

197 self._objects: list[Optional[PdfObject]] = [] 

198 """ 

199 The indirect objects in the PDF. 

200 For the incremental case, it will be filled with None 

201 in clone_reader_document_root. 

202 """ 

203 

204 self._original_hash: list[int] = [] 

205 """ 

206 List of hashes after import; used to identify changes. 

207 """ 

208 

209 self._idnum_hash: dict[bytes, tuple[IndirectObject, list[IndirectObject]]] = {} 

210 """ 

211 Maps hash values of indirect objects to the list of IndirectObjects. 

212 This is used for compression. 

213 """ 

214 

215 self._id_translated: dict[int, dict[int, int]] = {} 

216 """List of already translated IDs. 

217 dict[id(pdf)][(idnum, generation)] 

218 """ 

219 

220 self._info_obj: Optional[PdfObject] 

221 """The PDF files's document information dictionary, 

222 defined by Info in the PDF file's trailer dictionary.""" 

223 

224 self._ID: Union[ArrayObject, None] = None 

225 """The PDF file identifier, 

226 defined by the ID in the PDF file's trailer dictionary.""" 

227 

228 self._unresolved_links: list[tuple[ReferenceLink, ReferenceLink]] = [] 

229 "Tracks links in pages added to the writer for resolving later." 

230 self._merged_in_pages: dict[Optional[IndirectObject], Optional[IndirectObject]] = {} 

231 "Tracks pages added to the writer and what page they turned into." 

232 

233 # Security parameters. 

234 self._incremental_clone_object_count_limit = ( 

235 incremental_clone_object_count_limit 

236 if isinstance(incremental_clone_object_count_limit, int) 

237 else sys.maxsize 

238 ) 

239 self._incremental_clone_object_id_limit = ( 

240 incremental_clone_object_id_limit if isinstance(incremental_clone_object_id_limit, int) else sys.maxsize 

241 ) 

242 

243 if self.incremental: 

244 if isinstance(fileobj, (str, Path)): 

245 with open(fileobj, "rb") as f: 

246 fileobj = BytesIO(f.read(-1)) 

247 if isinstance(fileobj, BytesIO): 

248 fileobj = PdfReader(fileobj) 

249 if not isinstance(fileobj, PdfReader): 

250 raise PyPdfError("Invalid type for incremental mode") 

251 self._reader = fileobj # prev content is in _reader.stream 

252 self._header = fileobj.pdf_header.encode() 

253 self._readonly = True # TODO: to be analysed 

254 else: 

255 self._header = b"%PDF-1.3" 

256 self._info_obj = self._add_object( 

257 DictionaryObject( 

258 {NameObject("/Producer"): create_string_object("pypdf")} 

259 ) 

260 ) 

261 

262 def _get_clone_from( 

263 fileobj: Union[None, PdfReader, str, Path, IO[Any], BytesIO], 

264 clone_from: Union[None, PdfReader, str, Path, IO[Any], BytesIO], 

265 ) -> Union[None, PdfReader, str, Path, IO[Any], BytesIO]: 

266 if isinstance(fileobj, (str, Path, IO, BytesIO)) and ( 

267 fileobj == "" or clone_from is not None 

268 ): 

269 return clone_from 

270 cloning = True 

271 if isinstance(fileobj, (str, Path)) and ( 

272 not Path(str(fileobj)).exists() 

273 or Path(str(fileobj)).stat().st_size == 0 

274 ): 

275 cloning = False 

276 if isinstance(fileobj, (IOBase, BytesIO)): 

277 t = fileobj.tell() 

278 if fileobj.seek(0, 2) == 0: 

279 cloning = False 

280 fileobj.seek(t, 0) 

281 if cloning: 

282 clone_from = fileobj 

283 return clone_from 

284 

285 clone_from = _get_clone_from(fileobj, clone_from) 

286 # To prevent overwriting 

287 self.temp_fileobj = fileobj 

288 self.fileobj = "" 

289 self._with_as_usage = False 

290 self._cloned = False 

291 # The root of our page tree node 

292 pages = DictionaryObject( 

293 { 

294 NameObject(PagesAttributes.TYPE): NameObject("/Pages"), 

295 NameObject(PagesAttributes.COUNT): NumberObject(0), 

296 NameObject(PagesAttributes.KIDS): ArrayObject(), 

297 } 

298 ) 

299 self.flattened_pages = [] 

300 self._encryption: Optional[Encryption] = None 

301 self._encrypt_entry: Optional[DictionaryObject] = None 

302 

303 if clone_from is not None: 

304 if not isinstance(clone_from, PdfReader): 

305 clone_from = PdfReader(clone_from) 

306 self.clone_document_from_reader(clone_from) 

307 self._cloned = True 

308 else: 

309 self._pages = self._add_object(pages) 

310 self._root_object = DictionaryObject( 

311 { 

312 NameObject(PagesAttributes.TYPE): NameObject(CO.CATALOG), 

313 NameObject(CO.PAGES): self._pages, 

314 } 

315 ) 

316 self._add_object(self._root_object) 

317 if full and not incremental: 

318 self.incremental = False 

319 if isinstance(self._ID, list): 

320 if isinstance(self._ID[0], TextStringObject): 

321 self._ID[0] = ByteStringObject(self._ID[0].get_original_bytes()) 

322 if isinstance(self._ID[1], TextStringObject): 

323 self._ID[1] = ByteStringObject(self._ID[1].get_original_bytes()) 

324 

325 # for commonality 

326 @property 

327 def is_encrypted(self) -> bool: 

328 """ 

329 Read-only boolean property showing whether this PDF file is encrypted. 

330 

331 Note that this property, if true, will remain true even after the 

332 :meth:`decrypt()<pypdf.PdfReader.decrypt>` method is called. 

333 """ 

334 return False 

335 

336 @property 

337 def root_object(self) -> DictionaryObject: 

338 """ 

339 Provide direct access to PDF Structure. 

340 

341 Note: 

342 Recommended only for read access. 

343 

344 """ 

345 return self._root_object 

346 

347 @property 

348 def _info(self) -> Optional[DictionaryObject]: 

349 """ 

350 Provide access to "/Info". Standardized with PdfReader. 

351 

352 Returns: 

353 /Info Dictionary; None if the entry does not exist 

354 

355 """ 

356 return ( 

357 None 

358 if self._info_obj is None 

359 else cast(DictionaryObject, self._info_obj.get_object()) 

360 ) 

361 

362 @_info.setter 

363 def _info(self, value: Optional[Union[IndirectObject, DictionaryObject]]) -> None: 

364 if value is None: 

365 try: 

366 self._objects[self._info_obj.indirect_reference.idnum - 1] = None # type: ignore 

367 except (KeyError, AttributeError): 

368 pass 

369 self._info_obj = None 

370 else: 

371 if self._info_obj is None: 

372 self._info_obj = self._add_object(DictionaryObject()) 

373 obj = cast(DictionaryObject, self._info_obj.get_object()) 

374 obj.clear() 

375 obj.update(cast(DictionaryObject, value.get_object())) 

376 

377 @property 

378 def xmp_metadata(self) -> Optional[XmpInformation]: 

379 """XMP (Extensible Metadata Platform) data.""" 

380 return cast(XmpInformation, self.root_object.xmp_metadata) 

381 

382 @xmp_metadata.setter 

383 def xmp_metadata(self, value: Union[XmpInformation, bytes, None]) -> None: 

384 """XMP (Extensible Metadata Platform) data.""" 

385 if value is None: 

386 if "/Metadata" in self.root_object: 

387 del self.root_object["/Metadata"] 

388 return 

389 

390 metadata = self.root_object.get("/Metadata", None) 

391 if not isinstance(metadata, IndirectObject): 

392 if metadata is not None: 

393 del self.root_object["/Metadata"] 

394 metadata_stream = StreamObject() 

395 stream_reference = self._add_object(metadata_stream) 

396 self.root_object[NameObject("/Metadata")] = stream_reference 

397 else: 

398 metadata_stream = cast(StreamObject, metadata.get_object()) 

399 

400 if isinstance(value, XmpInformation): 

401 bytes_data = value.stream.get_data() 

402 else: 

403 bytes_data = value 

404 metadata_stream.set_data(bytes_data) 

405 

406 @property 

407 def with_as_usage(self) -> bool: 

408 deprecation_no_replacement("with_as_usage", "5.0") 

409 return self._with_as_usage 

410 

411 @with_as_usage.setter 

412 def with_as_usage(self, value: bool) -> None: 

413 deprecation_no_replacement("with_as_usage", "5.0") 

414 self._with_as_usage = value 

415 

416 def __enter__(self) -> Self: 

417 """Store how writer is initialized by 'with'.""" 

418 c: bool = self._cloned 

419 t = self.temp_fileobj 

420 self.__init__() # type: ignore 

421 self._cloned = c 

422 self._with_as_usage = True 

423 self.fileobj = t # type: ignore 

424 return self 

425 

426 def __exit__( 

427 self, 

428 exc_type: Optional[type[BaseException]], 

429 exc: Optional[BaseException], 

430 traceback: Optional[TracebackType], 

431 ) -> None: 

432 """Write data to the fileobj.""" 

433 if self.fileobj and not self._cloned: 

434 self.write(self.fileobj) 

435 

436 @property 

437 def pdf_header(self) -> str: 

438 """ 

439 Read/Write property of the PDF header that is written. 

440 

441 This should be something like ``'%PDF-1.5'``. It is recommended to set 

442 the lowest version that supports all features which are used within the 

443 PDF file. 

444 

445 Note: `pdf_header` returns a string but accepts bytes or str for writing 

446 """ 

447 return self._header.decode() 

448 

449 @pdf_header.setter 

450 def pdf_header(self, new_header: Union[str, bytes]) -> None: 

451 if isinstance(new_header, str): 

452 new_header = new_header.encode() 

453 self._header = new_header 

454 

455 def _add_object(self, obj: PdfObject) -> IndirectObject: 

456 if ( 

457 getattr(obj, "indirect_reference", None) is not None 

458 and obj.indirect_reference.pdf == self # type: ignore 

459 ): 

460 return obj.indirect_reference # type: ignore 

461 # check for /Contents in Pages (/Contents in annotations are strings) 

462 if isinstance(obj, DictionaryObject) and isinstance( 

463 obj.get(PG.CONTENTS, None), (ArrayObject, DictionaryObject) 

464 ): 

465 obj[NameObject(PG.CONTENTS)] = self._add_object(obj[PG.CONTENTS]) 

466 self._objects.append(obj) 

467 obj.indirect_reference = IndirectObject(len(self._objects), 0, self) 

468 return obj.indirect_reference 

469 

470 def get_object( 

471 self, 

472 indirect_reference: Union[int, IndirectObject], 

473 ) -> PdfObject: 

474 if isinstance(indirect_reference, int): 

475 obj = self._objects[indirect_reference - 1] 

476 elif indirect_reference.pdf != self: 

477 raise ValueError("PDF must be self") 

478 else: 

479 obj = self._objects[indirect_reference.idnum - 1] 

480 if obj is None: 

481 raise PdfReadError(f"Object {indirect_reference!r} not found!") 

482 return obj 

483 

484 def _replace_object( 

485 self, 

486 indirect_reference: Union[int, IndirectObject], 

487 obj: PdfObject, 

488 ) -> PdfObject: 

489 if isinstance(indirect_reference, IndirectObject): 

490 if indirect_reference.pdf != self: 

491 raise ValueError("PDF must be self") 

492 indirect_reference = indirect_reference.idnum 

493 gen = self._objects[indirect_reference - 1].indirect_reference.generation # type: ignore 

494 if ( 

495 getattr(obj, "indirect_reference", None) is not None 

496 and obj.indirect_reference.pdf != self # type: ignore 

497 ): 

498 obj = obj.clone(self) 

499 self._objects[indirect_reference - 1] = obj 

500 obj.indirect_reference = IndirectObject(indirect_reference, gen, self) 

501 

502 assert isinstance(obj, PdfObject), "mypy" 

503 return obj 

504 

505 def _add_page( 

506 self, 

507 page: PageObject, 

508 index: int, 

509 excluded_keys: Iterable[str] = (), 

510 ) -> PageObject: 

511 if not isinstance(page, PageObject) or page.get(PagesAttributes.TYPE, None) != CO.PAGE: 

512 raise ValueError("Invalid page object") 

513 assert self.flattened_pages is not None, "for mypy" 

514 page_org = page 

515 excluded_keys = list(excluded_keys) 

516 excluded_keys += [PagesAttributes.PARENT, "/StructParents"] 

517 # Acrobat does not accept two indirect references pointing on the same 

518 # page; therefore in order to add multiple copies of the same 

519 # page, we need to create a new dictionary for the page, however the 

520 # objects below (including content) are not duplicated: 

521 try: # delete an already existing page 

522 del self._id_translated[id(page_org.indirect_reference.pdf)][ # type: ignore 

523 page_org.indirect_reference.idnum # type: ignore 

524 ] 

525 except Exception: 

526 pass 

527 

528 page = cast( 

529 "PageObject", page_org.clone(self, False, excluded_keys).get_object() 

530 ) 

531 if page_org.pdf is not None: 

532 other = page_org.pdf.pdf_header 

533 self.pdf_header = _get_max_pdf_version_header(self.pdf_header, other) 

534 

535 node, idx = self._get_page_in_node(index) 

536 page[NameObject(PagesAttributes.PARENT)] = node.indirect_reference 

537 

538 if idx >= 0: 

539 cast(ArrayObject, node[PagesAttributes.KIDS]).insert(idx, page.indirect_reference) 

540 self.flattened_pages.insert(index, page) 

541 else: 

542 cast(ArrayObject, node[PagesAttributes.KIDS]).append(page.indirect_reference) 

543 self.flattened_pages.append(page) 

544 recurse = 0 

545 while not is_null_or_none(node): 

546 node = cast(DictionaryObject, node.get_object()) 

547 node[NameObject(PagesAttributes.COUNT)] = NumberObject(cast(int, node[PagesAttributes.COUNT]) + 1) 

548 node = node.get(PagesAttributes.PARENT, None) # type: ignore[assignment] # TODO: Fix. 

549 recurse += 1 

550 if recurse > 1000: 

551 raise PyPdfError("Too many recursive calls!") 

552 

553 if page_org.pdf is not None: 

554 # the page may contain links to other pages, and those other 

555 # pages may or may not already be added. we store the 

556 # information we need, so that we can resolve the references 

557 # later. 

558 self._unresolved_links.extend(extract_links(page, page_org)) 

559 self._merged_in_pages[page_org.indirect_reference] = page.indirect_reference 

560 

561 return page 

562 

563 def set_need_appearances_writer(self, state: bool = True) -> None: 

564 """ 

565 Sets the "NeedAppearances" flag in the PDF writer. 

566 

567 The "NeedAppearances" flag indicates whether the appearance dictionary 

568 for form fields should be automatically generated by the PDF viewer or 

569 if the embedded appearance should be used. 

570 

571 Args: 

572 state: The actual value of the NeedAppearances flag. 

573 

574 Returns: 

575 None 

576 

577 """ 

578 # See §12.7.2 and §7.7.2 for more information: 

579 # https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf 

580 try: 

581 # get the AcroForm tree 

582 if CatalogDictionary.ACRO_FORM not in self._root_object: 

583 self._root_object[ 

584 NameObject(CatalogDictionary.ACRO_FORM) 

585 ] = self._add_object(DictionaryObject()) 

586 

587 need_appearances = NameObject(InteractiveFormDictEntries.NeedAppearances) 

588 cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM])[ 

589 need_appearances 

590 ] = BooleanObject(state) 

591 except Exception as exc: # pragma: no cover 

592 logger_warning( 

593 f"set_need_appearances_writer({state}) catch : {exc}", __name__ 

594 ) 

595 

596 def create_viewer_preferences(self) -> ViewerPreferences: 

597 o = ViewerPreferences() 

598 self._root_object[ 

599 NameObject(CatalogDictionary.VIEWER_PREFERENCES) 

600 ] = self._add_object(o) 

601 return o 

602 

603 def add_page( 

604 self, 

605 page: PageObject, 

606 excluded_keys: Iterable[str] = (), 

607 ) -> PageObject: 

608 """ 

609 Add a page to this PDF file. 

610 

611 Recommended for advanced usage including the adequate excluded_keys. 

612 

613 The page is usually acquired from a :class:`PdfReader<pypdf.PdfReader>` 

614 instance. 

615 

616 Args: 

617 page: The page to add to the document. Should be 

618 an instance of :class:`PageObject<pypdf._page.PageObject>` 

619 excluded_keys: 

620 

621 Returns: 

622 The added PageObject. 

623 

624 """ 

625 assert self.flattened_pages is not None, "mypy" 

626 return self._add_page(page, len(self.flattened_pages), excluded_keys) 

627 

628 def insert_page( 

629 self, 

630 page: PageObject, 

631 index: int = 0, 

632 excluded_keys: Iterable[str] = (), 

633 ) -> PageObject: 

634 """ 

635 Insert a page in this PDF file. The page is usually acquired from a 

636 :class:`PdfReader<pypdf.PdfReader>` instance. 

637 

638 Args: 

639 page: The page to add to the document. 

640 index: Position at which the page will be inserted. 

641 excluded_keys: 

642 

643 Returns: 

644 The added PageObject. 

645 

646 """ 

647 assert self.flattened_pages is not None, "mypy" 

648 if index < 0: 

649 index += len(self.flattened_pages) 

650 if index < 0: 

651 raise ValueError("Invalid index value") 

652 if index >= len(self.flattened_pages): 

653 return self.add_page(page, excluded_keys) 

654 return self._add_page(page, index, excluded_keys) 

655 

656 def _get_page_number_by_indirect( 

657 self, indirect_reference: Union[None, int, NullObject, IndirectObject] 

658 ) -> Optional[int]: 

659 """ 

660 Generate _page_id2num. 

661 

662 Args: 

663 indirect_reference: 

664 

665 Returns: 

666 The page number or None 

667 

668 """ 

669 # To provide same function as in PdfReader 

670 if is_null_or_none(indirect_reference): 

671 return None 

672 assert indirect_reference is not None, "mypy" 

673 if isinstance(indirect_reference, int): 

674 indirect_reference = IndirectObject(indirect_reference, 0, self) 

675 obj = indirect_reference.get_object() 

676 if isinstance(obj, PageObject): 

677 return obj.page_number 

678 return None 

679 

680 def add_blank_page( 

681 self, width: Optional[float] = None, height: Optional[float] = None 

682 ) -> PageObject: 

683 """ 

684 Append a blank page to this PDF file and return it. 

685 

686 If no page size is specified, use the size of the last page. 

687 

688 Args: 

689 width: The width of the new page expressed in default user 

690 space units. 

691 height: The height of the new page expressed in default 

692 user space units. 

693 

694 Returns: 

695 The newly appended page. 

696 

697 Raises: 

698 PageSizeNotDefinedError: if width and height are not defined 

699 and previous page does not exist. 

700 

701 """ 

702 page = PageObject.create_blank_page(self, width, height) 

703 return self.add_page(page) 

704 

705 def insert_blank_page( 

706 self, 

707 width: Optional[Union[float, decimal.Decimal]] = None, 

708 height: Optional[Union[float, decimal.Decimal]] = None, 

709 index: int = 0, 

710 ) -> PageObject: 

711 """ 

712 Insert a blank page to this PDF file and return it. 

713 

714 If no page size is specified for a dimension, use the size of the last page. 

715 

716 Args: 

717 width: The width of the new page in default user space units. 

718 height: The height of the new page in default user space units. 

719 index: Position to add the page. 

720 

721 Returns: 

722 The newly inserted page. 

723 

724 Raises: 

725 PageSizeNotDefinedError: if width and height are not defined 

726 and previous page does not exist. 

727 IndexError: Index is outside of [-self.get_num_pages(), self.get_num_pages()] 

728 """ 

729 num_pages = self.get_num_pages() 

730 if abs(index) <= num_pages: 

731 # Use the chosen index, but do not exceed the available pages 

732 fixed_index = min(index, num_pages - 1) 

733 mediabox = self.pages[fixed_index].mediabox 

734 if width is None or width <= 0: 

735 width = mediabox.width 

736 if height is None or height <= 0: 

737 height = mediabox.height 

738 else: 

739 raise IndexError(f"Index should be in range [-{num_pages}, {num_pages}]") 

740 

741 page = PageObject.create_blank_page(self, width, height) 

742 self.insert_page(page, index) 

743 return page 

744 

745 @property 

746 def open_destination( 

747 self, 

748 ) -> Union[None, Destination, TextStringObject, ByteStringObject]: 

749 return super().open_destination 

750 

751 @open_destination.setter 

752 def open_destination(self, dest: Union[None, str, Destination, PageObject]) -> None: 

753 if dest is None: 

754 try: 

755 del self._root_object["/OpenAction"] 

756 except KeyError: 

757 pass 

758 elif isinstance(dest, str): 

759 self._root_object[NameObject("/OpenAction")] = TextStringObject(dest) 

760 elif isinstance(dest, Destination): 

761 self._root_object[NameObject("/OpenAction")] = dest.dest_array 

762 elif isinstance(dest, PageObject): 

763 self._root_object[NameObject("/OpenAction")] = Destination( 

764 "Opening", 

765 dest.indirect_reference 

766 if dest.indirect_reference is not None 

767 else NullObject(), 

768 PAGE_FIT, 

769 ).dest_array 

770 

771 def add_js(self, javascript: str) -> None: 

772 """ 

773 Add JavaScript which will launch upon opening this PDF. 

774 

775 Args: 

776 javascript: Your JavaScript. 

777 

778 Example: 

779 This will launch the print window when the PDF is opened. 

780 

781 >>> from pypdf import PdfWriter 

782 >>> output = PdfWriter() 

783 >>> output.add_js("this.print({bUI:true,bSilent:false,bShrinkToFit:true});") 

784 

785 """ 

786 # Names / JavaScript preferred to be able to add multiple scripts 

787 if "/Names" not in self._root_object: 

788 self._root_object[NameObject(CA.NAMES)] = DictionaryObject() 

789 names = cast(DictionaryObject, self._root_object[CA.NAMES]) 

790 if "/JavaScript" not in names: 

791 names[NameObject("/JavaScript")] = DictionaryObject( 

792 {NameObject("/Names"): ArrayObject()} 

793 ) 

794 js_list = cast( 

795 ArrayObject, cast(DictionaryObject, names["/JavaScript"])["/Names"] 

796 ) 

797 # We need a name for parameterized JavaScript in the PDF file, 

798 # but it can be anything. 

799 js_list.append(create_string_object(str(uuid.uuid4()))) 

800 

801 js = DictionaryObject( 

802 { 

803 NameObject(PagesAttributes.TYPE): NameObject("/Action"), 

804 NameObject("/S"): NameObject("/JavaScript"), 

805 NameObject("/JS"): TextStringObject(f"{javascript}"), 

806 } 

807 ) 

808 js_list.append(self._add_object(js)) 

809 

810 def add_attachment(self, filename: str, data: Union[str, bytes]) -> "EmbeddedFile": 

811 """ 

812 Embed a file inside the PDF. 

813 

814 Reference: 

815 https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf 

816 Section 7.11.3 

817 

818 Args: 

819 filename: The filename to display. 

820 data: The data in the file. 

821 

822 Returns: 

823 EmbeddedFile instance for the newly created embedded file. 

824 

825 """ 

826 return EmbeddedFile._create_new(self, filename, data) 

827 

828 def append_pages_from_reader( 

829 self, 

830 reader: PdfReader, 

831 after_page_append: Optional[Callable[[PageObject], None]] = None, 

832 ) -> None: 

833 """ 

834 Copy pages from reader to writer. Includes an optional callback 

835 parameter which is invoked after pages are appended to the writer. 

836 

837 ``append`` should be preferred. 

838 

839 Args: 

840 reader: a PdfReader object from which to copy page 

841 annotations to this writer object. The writer's annots 

842 will then be updated. 

843 after_page_append: 

844 Callback function that is invoked after each page is appended to 

845 the writer. Signature includes a reference to the appended page 

846 (delegates to append_pages_from_reader). The single parameter of 

847 the callback is a reference to the page just appended to the 

848 document. 

849 

850 """ 

851 reader_num_pages = len(reader.pages) 

852 # Copy pages from reader to writer 

853 for reader_page_number in range(reader_num_pages): 

854 reader_page = reader.pages[reader_page_number] 

855 writer_page = self.add_page(reader_page) 

856 # Trigger callback, pass writer page as parameter 

857 if callable(after_page_append): 

858 after_page_append(writer_page) 

859 

860 def _merge_content_stream_to_page( 

861 self, 

862 page: PageObject, 

863 new_content_data: bytes, 

864 ) -> None: 

865 """ 

866 Combines existing content stream(s) with new content (as bytes). 

867 

868 Args: 

869 page: The page to which the new content data will be added. 

870 new_content_data: A binary-encoded new content stream, for 

871 instance the commands to draw an XObject. 

872 """ 

873 # First resolve the existing page content. This always is an IndirectObject: 

874 # PDF Explained by John Whitington 

875 # https://www.oreilly.com/library/view/pdf-explained/9781449321581/ch04.html 

876 if NameObject("/Contents") in page: 

877 existing_content_ref = page[NameObject("/Contents")] 

878 existing_content = existing_content_ref.get_object() 

879 

880 if isinstance(existing_content, ArrayObject): 

881 # Create a new StreamObject for the new_content_data 

882 new_stream_obj = StreamObject() 

883 new_stream_obj.set_data(new_content_data) 

884 existing_content.append(self._add_object(new_stream_obj)) 

885 page[NameObject("/Contents")] = self._add_object(existing_content) 

886 if isinstance(existing_content, StreamObject): 

887 # Merge new content to existing StreamObject 

888 merged_data = existing_content.get_data() + b"\n" + new_content_data 

889 new_stream = StreamObject() 

890 new_stream.set_data(merged_data) 

891 page[NameObject("/Contents")] = self._add_object(new_stream) 

892 else: 

893 # If no existing content, then we have an empty page. 

894 # Create a new StreamObject in a new /Contents entry. 

895 new_stream = StreamObject() 

896 new_stream.set_data(new_content_data) 

897 page[NameObject("/Contents")] = self._add_object(new_stream) 

898 

899 def _add_apstream_object( 

900 self, 

901 page: PageObject, 

902 appearance_stream_obj: StreamObject, 

903 object_name: str, 

904 x_offset: float, 

905 y_offset: float, 

906 ) -> None: 

907 """ 

908 Adds an appearance stream to the page content in the form of 

909 an XObject. 

910 

911 Args: 

912 page: The page to which to add the appearance stream. 

913 appearance_stream_obj: The appearance stream. 

914 object_name: The name of the appearance stream. 

915 x_offset: The horizontal offset for the appearance stream. 

916 y_offset: The vertical offset for the appearance stream. 

917 """ 

918 # Prepare XObject resource dictionary on the page. This currently 

919 # only deals with font resources, but can easily be adapted to also 

920 # include other resources. 

921 pg_res = cast(DictionaryObject, page[PG.RESOURCES]) 

922 if "/Resources" in appearance_stream_obj: 

923 ap_stream_res = cast(DictionaryObject, appearance_stream_obj["/Resources"]) 

924 ap_stream_font_dict = cast(DictionaryObject, ap_stream_res.get("/Font", DictionaryObject())) 

925 if "/Font" not in pg_res: 

926 font_dict_ref = self._add_object(DictionaryObject()) 

927 pg_res[NameObject("/Font")] = font_dict_ref 

928 pg_font_res = cast(DictionaryObject, pg_res["/Font"].get_object()) 

929 # Merge fonts from the appearance stream into the page's font resources 

930 for font_name, font_res in ap_stream_font_dict.items(): 

931 if font_name not in pg_font_res: 

932 font_res_ref = self._add_object(font_res) 

933 pg_font_res[font_name] = font_res_ref 

934 # Always add the resolved stream object to the writer to get a new IndirectObject. 

935 # This ensures we have a valid IndirectObject managed by *this* writer. 

936 xobject_ref = self._add_object(appearance_stream_obj) 

937 xobject_name = NameObject(f"/Fm_{object_name}")._sanitize() 

938 if "/XObject" not in pg_res: 

939 pg_res[NameObject("/XObject")] = DictionaryObject() 

940 pg_xo_res = cast(DictionaryObject, pg_res["/XObject"]) 

941 if xobject_name not in pg_xo_res: 

942 pg_xo_res[xobject_name] = xobject_ref 

943 else: 

944 logger_warning( 

945 f"XObject {xobject_name!r} already added to page resources. This might be an issue.", 

946 __name__ 

947 ) 

948 xobject_cm = Transformation().translate(x_offset, y_offset) 

949 xobject_drawing_commands = f"q\n{xobject_cm._to_cm()}\n{xobject_name} Do\nQ".encode() 

950 self._merge_content_stream_to_page(page, xobject_drawing_commands) 

951 

952 FFBITS_NUL = FA.FfBits(0) 

953 

954 def update_page_form_field_values( 

955 self, 

956 page: Union[PageObject, list[PageObject], None], 

957 fields: Mapping[str, Union[str, list[str], tuple[str, str, float]]], 

958 flags: FA.FfBits = FFBITS_NUL, 

959 auto_regenerate: Optional[bool] = True, 

960 flatten: bool = False, 

961 ) -> None: 

962 """ 

963 Update the form field values for a given page from a fields dictionary. 

964 

965 Copy field texts and values from fields to page. 

966 If the field links to a parent object, add the information to the parent. 

967 

968 Args: 

969 page: `PageObject` - references **PDF writer's page** where the 

970 annotations and field data will be updated. 

971 `List[Pageobject]` - provides list of pages to be processed. 

972 `None` - all pages. 

973 fields: a Python dictionary of: 

974 

975 * field names (/T) as keys and text values (/V) as value 

976 * field names (/T) as keys and list of text values (/V) for multiple choice list 

977 * field names (/T) as keys and tuple of: 

978 * text values (/V) 

979 * font id (e.g. /F1, the font id must exist) 

980 * font size (0 for autosize) 

981 

982 flags: A set of flags from :class:`~pypdf.constants.FieldDictionaryAttributes.FfBits`. 

983 

984 auto_regenerate: Set/unset the need_appearances flag; 

985 the flag is unchanged if auto_regenerate is None. 

986 

987 flatten: Whether or not to flatten the annotation. If True, this adds the annotation's 

988 appearance stream to the page contents. Note that this option does not remove the 

989 annotation itself. 

990 

991 """ 

992 if CatalogDictionary.ACRO_FORM not in self._root_object: 

993 raise PyPdfError("No /AcroForm dictionary in PDF of PdfWriter Object") 

994 acro_form = cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM]) 

995 if InteractiveFormDictEntries.Fields not in acro_form: 

996 raise PyPdfError("No /Fields dictionary in PDF of PdfWriter Object") 

997 if isinstance(auto_regenerate, bool): 

998 self.set_need_appearances_writer(auto_regenerate) 

999 # Iterate through pages, update field values 

1000 if page is None: 

1001 page = list(self.pages) 

1002 if isinstance(page, list): 

1003 for p in page: 

1004 if PG.ANNOTS in p: # just to prevent warnings 

1005 self.update_page_form_field_values(p, fields, flags, None, flatten=flatten) 

1006 return 

1007 if PG.ANNOTS not in page: 

1008 logger_warning("No fields to update on this page", __name__) 

1009 return 

1010 appearance_stream_obj: Optional[StreamObject] = None 

1011 

1012 for annotation in page[PG.ANNOTS]: # type: ignore 

1013 annotation = cast(DictionaryObject, annotation.get_object()) 

1014 if annotation.get("/Subtype", "") != "/Widget": 

1015 continue 

1016 if "/FT" in annotation and "/T" in annotation: 

1017 parent_annotation = annotation 

1018 else: 

1019 parent_annotation = annotation.get( 

1020 PG.PARENT, DictionaryObject() 

1021 ).get_object() 

1022 

1023 for field, value in fields.items(): 

1024 rectangle = cast(RectangleObject, annotation[AA.Rect]) 

1025 if not ( 

1026 self._get_qualified_field_name(parent_annotation) == field 

1027 or parent_annotation.get("/T", None) == field 

1028 ): 

1029 continue 

1030 if ( 

1031 parent_annotation.get("/FT", None) == "/Ch" 

1032 and "/I" in parent_annotation 

1033 ): 

1034 del parent_annotation["/I"] 

1035 if flags: 

1036 annotation[NameObject(FA.Ff)] = NumberObject(flags) 

1037 # Set the field value 

1038 if not (value is None and flatten): # Only change values if given by user and not flattening. 

1039 if isinstance(value, list): 

1040 lst = ArrayObject(TextStringObject(v) for v in value) 

1041 parent_annotation[NameObject(FA.V)] = lst 

1042 elif isinstance(value, tuple): 

1043 annotation[NameObject(FA.V)] = TextStringObject( 

1044 value[0], 

1045 ) 

1046 else: 

1047 parent_annotation[NameObject(FA.V)] = TextStringObject(value) 

1048 # Get or create the field's appearance stream object 

1049 if parent_annotation.get(FA.FT) == "/Btn": 

1050 # Checkbox button (no /FT found in Radio widgets); 

1051 # We can find the associated appearance stream object 

1052 # within the annotation. 

1053 v = NameObject(value) 

1054 ap = cast(DictionaryObject, annotation[NameObject(AA.AP)]) 

1055 normal_ap = cast(DictionaryObject, ap["/N"]) 

1056 if v not in normal_ap: 

1057 v = NameObject("/Off") 

1058 appearance_stream_obj = normal_ap.get(v) 

1059 # Other cases will be updated through the for loop 

1060 annotation[NameObject(AA.AS)] = v 

1061 annotation[NameObject(FA.V)] = v 

1062 elif ( 

1063 parent_annotation.get(FA.FT) == "/Tx" 

1064 or parent_annotation.get(FA.FT) == "/Ch" 

1065 ): 

1066 # Textbox; we need to generate the appearance stream object 

1067 if isinstance(value, tuple): 

1068 appearance_stream_obj = TextStreamAppearance.from_text_annotation( 

1069 acro_form, parent_annotation, annotation, value[1], value[2] 

1070 ) 

1071 else: 

1072 appearance_stream_obj = TextStreamAppearance.from_text_annotation( 

1073 acro_form, parent_annotation, annotation 

1074 ) 

1075 # Add the appearance stream object 

1076 if AA.AP not in annotation: 

1077 annotation[NameObject(AA.AP)] = DictionaryObject( 

1078 {NameObject("/N"): self._add_object(appearance_stream_obj)} 

1079 ) 

1080 elif "/N" not in (ap:= cast(DictionaryObject, annotation[AA.AP])): 

1081 cast(DictionaryObject, annotation[NameObject(AA.AP)])[ 

1082 NameObject("/N") 

1083 ] = self._add_object(appearance_stream_obj) 

1084 else: # [/AP][/N] exists 

1085 n = annotation[AA.AP]["/N"].indirect_reference.idnum # type: ignore 

1086 self._objects[n - 1] = appearance_stream_obj 

1087 appearance_stream_obj.indirect_reference = IndirectObject(n, 0, self) 

1088 elif ( 

1089 annotation.get(FA.FT) == "/Sig" 

1090 ): # deprecated # not implemented yet 

1091 logger_warning("Signature forms not implemented yet", __name__) 

1092 if flatten and appearance_stream_obj is not None: 

1093 self._add_apstream_object(page, appearance_stream_obj, field, rectangle[0], rectangle[1]) 

1094 

1095 def reattach_fields( 

1096 self, page: Optional[PageObject] = None 

1097 ) -> list[DictionaryObject]: 

1098 """ 

1099 Parse annotations within the page looking for orphan fields and 

1100 reattach then into the Fields Structure. 

1101 

1102 Args: 

1103 page: page to analyze. 

1104 If none is provided, all pages will be analyzed. 

1105 

1106 Returns: 

1107 list of reattached fields. 

1108 

1109 """ 

1110 lst = [] 

1111 if page is None: 

1112 for p in self.pages: 

1113 lst += self.reattach_fields(p) 

1114 return lst 

1115 

1116 try: 

1117 af = cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM]) 

1118 except KeyError: 

1119 af = DictionaryObject() 

1120 self._root_object[NameObject(CatalogDictionary.ACRO_FORM)] = af 

1121 try: 

1122 fields = cast(ArrayObject, af[InteractiveFormDictEntries.Fields]) 

1123 except KeyError: 

1124 fields = ArrayObject() 

1125 af[NameObject(InteractiveFormDictEntries.Fields)] = fields 

1126 

1127 if "/Annots" not in page: 

1128 return lst 

1129 annotations = cast(ArrayObject, page["/Annots"]) 

1130 for idx, annotation in enumerate(annotations): 

1131 is_indirect = isinstance(annotation, IndirectObject) 

1132 annotation = cast(DictionaryObject, annotation.get_object()) 

1133 if annotation.get("/Subtype", "") == "/Widget" and "/FT" in annotation: 

1134 if ( 

1135 "indirect_reference" in annotation.__dict__ 

1136 and annotation.indirect_reference in fields 

1137 ): 

1138 continue 

1139 if not is_indirect: 

1140 annotations[idx] = self._add_object(annotation) 

1141 fields.append(annotation.indirect_reference) 

1142 lst.append(annotation) 

1143 return lst 

1144 

1145 def _collect_incremental_clone_object_ids(self, reader: PdfReader) -> list[int]: 

1146 object_ids: set[int] = set() 

1147 for xref_entry in reader.xref.values(): 

1148 object_ids.update(filter(None, xref_entry)) 

1149 object_ids.update(filter(None, reader.xref_objStm)) 

1150 

1151 object_count = len(object_ids) 

1152 if object_count > self._incremental_clone_object_count_limit: 

1153 raise LimitReachedError( 

1154 f"Incremental clone object count {object_count} exceeds " 

1155 f"maximum allowed count {self._incremental_clone_object_count_limit}." 

1156 ) 

1157 

1158 max_object_id = max(object_ids, default=0) 

1159 if max_object_id > self._incremental_clone_object_id_limit: 

1160 raise LimitReachedError( 

1161 f"Incremental clone object ID {max_object_id} exceeds " 

1162 f"maximum allowed ID {self._incremental_clone_object_id_limit}." 

1163 ) 

1164 

1165 return sorted(object_ids) 

1166 

1167 def clone_reader_document_root(self, reader: PdfReader) -> None: 

1168 """ 

1169 Copy the reader document root to the writer and all sub-elements, 

1170 including pages, threads, outlines,... For partial insertion, ``append`` 

1171 should be considered. 

1172 

1173 Args: 

1174 reader: PdfReader from which the document root should be copied. 

1175 

1176 """ 

1177 self._info_obj = None 

1178 if self.incremental: 

1179 object_ids = self._collect_incremental_clone_object_ids(reader) 

1180 self._objects = [None] * (object_ids[-1] if object_ids else 0) 

1181 for object_id in object_ids: 

1182 reader_object = reader.get_object(object_id) 

1183 if reader_object is not None: 

1184 self._objects[object_id - 1] = reader_object.replicate(self) 

1185 else: 

1186 self._objects.clear() 

1187 self._root_object = reader.root_object.clone(self) 

1188 self._pages = self._root_object.raw_get("/Pages") 

1189 

1190 if len(self._objects) > cast(int, reader.trailer["/Size"]): 

1191 if self.strict: 

1192 raise PdfReadError( 

1193 f"Object count {len(self._objects)} exceeds defined trailer size {reader.trailer['/Size']}" 

1194 ) 

1195 logger_warning( 

1196 f"Object count {len(self._objects)} exceeds defined trailer size {reader.trailer['/Size']}", 

1197 __name__ 

1198 ) 

1199 

1200 # must be done here before rewriting 

1201 if self.incremental: 

1202 self._original_hash = [ 

1203 (obj.hash_bin() if obj is not None else 0) for obj in self._objects 

1204 ] 

1205 

1206 try: 

1207 self._flatten() 

1208 except IndexError: 

1209 raise PdfReadError("Got index error while flattening.") 

1210 

1211 assert self.flattened_pages is not None 

1212 for p in self.flattened_pages: 

1213 self._replace_object(cast(IndirectObject, p.indirect_reference).idnum, p) 

1214 if not self.incremental: 

1215 p[NameObject("/Parent")] = self._pages 

1216 if not self.incremental: 

1217 cast(DictionaryObject, self._pages.get_object())[ 

1218 NameObject("/Kids") 

1219 ] = ArrayObject([p.indirect_reference for p in self.flattened_pages]) 

1220 

1221 def clone_document_from_reader( 

1222 self, 

1223 reader: PdfReader, 

1224 after_page_append: Optional[Callable[[PageObject], None]] = None, 

1225 ) -> None: 

1226 """ 

1227 Create a copy (clone) of a document from a PDF file reader cloning 

1228 section '/Root' and '/Info' and '/ID' of the pdf. 

1229 

1230 Args: 

1231 reader: PDF file reader instance from which the clone 

1232 should be created. 

1233 after_page_append: 

1234 Callback function that is invoked after each page is appended to 

1235 the writer. Signature includes a reference to the appended page 

1236 (delegates to append_pages_from_reader). The single parameter of 

1237 the callback is a reference to the page just appended to the 

1238 document. 

1239 

1240 """ 

1241 self.clone_reader_document_root(reader) 

1242 inf = reader._info 

1243 if self.incremental: 

1244 if inf is not None: 

1245 self._info_obj = cast( 

1246 IndirectObject, inf.clone(self).indirect_reference 

1247 ) 

1248 assert isinstance(self._info, DictionaryObject), "for mypy" 

1249 self._original_hash[ 

1250 self._info_obj.indirect_reference.idnum - 1 

1251 ] = self._info.hash_bin() 

1252 elif inf is not None: 

1253 self._info_obj = self._add_object( 

1254 DictionaryObject(cast(DictionaryObject, inf.get_object())) 

1255 ) 

1256 # else: _info_obj = None done in clone_reader_document_root() 

1257 

1258 try: 

1259 self._ID = cast(ArrayObject, reader._ID).clone(self) 

1260 except AttributeError: 

1261 pass 

1262 

1263 if callable(after_page_append): 

1264 for page in cast( 

1265 ArrayObject, cast(DictionaryObject, self._pages.get_object())["/Kids"] 

1266 ): 

1267 after_page_append(page.get_object()) 

1268 

1269 def _compute_document_identifier(self) -> ByteStringObject: 

1270 stream = BytesIO() 

1271 self._write_pdf_structure(stream) 

1272 stream.seek(0) 

1273 return ByteStringObject(_rolling_checksum(stream).encode("utf8")) 

1274 

1275 def generate_file_identifiers(self) -> None: 

1276 """ 

1277 Generate an identifier for the PDF that will be written. 

1278 

1279 The only point of this is ensuring uniqueness. Reproducibility is not 

1280 required. 

1281 When a file is first written, both identifiers shall be set to the same value. 

1282 If both identifiers match when a file reference is resolved, it is very 

1283 likely that the correct and unchanged file has been found. If only the first 

1284 identifier matches, a different version of the correct file has been found. 

1285 see §14.4 "File Identifiers". 

1286 """ 

1287 if self._ID: 

1288 id1 = self._ID[0] 

1289 id2 = self._compute_document_identifier() 

1290 else: 

1291 id1 = self._compute_document_identifier() 

1292 id2 = id1 

1293 self._ID = ArrayObject((id1, id2)) 

1294 

1295 def encrypt( 

1296 self, 

1297 user_password: str, 

1298 owner_password: Optional[str] = None, 

1299 use_128bit: bool = True, 

1300 permissions_flag: UserAccessPermissions = ALL_DOCUMENT_PERMISSIONS, 

1301 *, 

1302 algorithm: Optional[str] = None, 

1303 ) -> None: 

1304 """ 

1305 Encrypt this PDF file with the PDF Standard encryption handler. 

1306 

1307 Args: 

1308 user_password: The password which allows for opening 

1309 and reading the PDF file with the restrictions provided. 

1310 owner_password: The password which allows for 

1311 opening the PDF files without any restrictions. By default, 

1312 the owner password is the same as the user password. 

1313 use_128bit: flag as to whether to use 128bit 

1314 encryption. When false, 40bit encryption will be used. 

1315 By default, this flag is on. 

1316 permissions_flag: permissions as described in 

1317 Table 3.20 of the PDF 1.7 specification. A bit value of 1 means 

1318 the permission is granted. 

1319 Hence an integer value of -1 will set all flags. 

1320 Bit position 3 is for printing, 4 is for modifying content, 

1321 5 and 6 control annotations, 9 for form fields, 

1322 10 for extraction of text and graphics. 

1323 algorithm: encrypt algorithm. Values may be one of "RC4-40", "RC4-128", 

1324 "AES-128", "AES-256-R5", "AES-256". If it is valid, 

1325 `use_128bit` will be ignored. 

1326 

1327 """ 

1328 if owner_password is None: 

1329 owner_password = user_password 

1330 

1331 if algorithm is not None: 

1332 try: 

1333 alg = getattr(EncryptAlgorithm, algorithm.replace("-", "_")) 

1334 except AttributeError: 

1335 raise ValueError(f"Algorithm '{algorithm}' NOT supported") 

1336 else: 

1337 alg = EncryptAlgorithm.RC4_128 

1338 if not use_128bit: 

1339 alg = EncryptAlgorithm.RC4_40 

1340 self.generate_file_identifiers() 

1341 assert self._ID 

1342 self._encryption = Encryption.make(alg, permissions_flag, self._ID[0]) 

1343 # in case call `encrypt` again 

1344 entry = self._encryption.write_entry(user_password, owner_password) 

1345 if self._encrypt_entry: 

1346 # replace old encrypt_entry 

1347 assert self._encrypt_entry.indirect_reference is not None 

1348 entry.indirect_reference = self._encrypt_entry.indirect_reference 

1349 self._objects[entry.indirect_reference.idnum - 1] = entry 

1350 else: 

1351 self._add_object(entry) 

1352 self._encrypt_entry = entry 

1353 

1354 def _resolve_links(self) -> None: 

1355 """Patch up links that were added to the document earlier, to 

1356 make sure they still point to the same pages. 

1357 """ 

1358 for (new_link, old_link) in self._unresolved_links: 

1359 old_page = old_link.find_referenced_page() 

1360 if not old_page: 

1361 continue 

1362 new_page = self._merged_in_pages.get(old_page) 

1363 if new_page is None: 

1364 continue 

1365 new_link.patch_reference(self, new_page) 

1366 

1367 def write_stream(self, stream: StreamType) -> None: 

1368 if hasattr(stream, "mode") and "b" not in stream.mode: 

1369 logger_warning( 

1370 f"File <{stream.name}> to write to is not in binary mode. " 

1371 "It may not be written to correctly.", 

1372 __name__, 

1373 ) 

1374 self._resolve_links() 

1375 

1376 if self.incremental: 

1377 self._reader.stream.seek(0) 

1378 stream.write(self._reader.stream.read(-1)) 

1379 if len(self.list_objects_in_increment()) > 0: 

1380 self._write_increment(stream) # writes objs, xref stream and startxref 

1381 else: 

1382 object_positions, free_objects = self._write_pdf_structure(stream) 

1383 xref_location = self._write_xref_table( 

1384 stream, object_positions, free_objects 

1385 ) 

1386 self._write_trailer(stream, xref_location) 

1387 

1388 def write(self, stream: Union[Path, StrByteType]) -> tuple[bool, IO[Any]]: 

1389 """ 

1390 Write the collection of pages added to this object out as a PDF file. 

1391 

1392 Args: 

1393 stream: An object to write the file to. The object can support 

1394 the write method and the tell method, similar to a file object, or 

1395 be a file path, just like the fileobj, just named it stream to keep 

1396 existing workflow. 

1397 

1398 Returns: 

1399 A tuple (bool, IO). 

1400 

1401 """ 

1402 my_file = False 

1403 

1404 if stream == "": 

1405 raise ValueError(f"Output({stream=}) is empty.") 

1406 

1407 if isinstance(stream, (str, Path)): 

1408 stream = FileIO(stream, "wb") 

1409 my_file = True 

1410 

1411 self.write_stream(stream) 

1412 

1413 if my_file: 

1414 stream.close() 

1415 else: 

1416 stream.flush() 

1417 

1418 return my_file, stream 

1419 

1420 def list_objects_in_increment(self) -> list[IndirectObject]: 

1421 """ 

1422 For analysis or debugging. 

1423 Provides the list of new or modified objects that will be written 

1424 in the increment. 

1425 Deleted objects will not be freed but will become orphans. 

1426 

1427 Returns: 

1428 List of new or modified IndirectObjects 

1429 

1430 """ 

1431 original_hash_count = len(self._original_hash) 

1432 return [ 

1433 cast(IndirectObject, obj).indirect_reference 

1434 for i, obj in enumerate(self._objects) 

1435 if ( 

1436 obj is not None 

1437 and ( 

1438 i >= original_hash_count 

1439 or obj.hash_bin() != self._original_hash[i] 

1440 ) 

1441 ) 

1442 ] 

1443 

1444 def _write_increment(self, stream: StreamType) -> None: 

1445 object_positions = {} 

1446 object_blocks = [] 

1447 current_start = -1 

1448 current_stop = -2 

1449 original_hash_count = len(self._original_hash) 

1450 for i, obj in enumerate(self._objects): 

1451 if obj is not None and ( 

1452 i >= original_hash_count 

1453 or obj.hash_bin() != self._original_hash[i] 

1454 ): 

1455 idnum = i + 1 

1456 assert isinstance(obj, PdfObject), "mypy" 

1457 # first write new/modified object 

1458 object_positions[idnum] = stream.tell() 

1459 stream.write(f"{idnum} 0 obj\n".encode()) 

1460 """ encryption is not operational 

1461 if self._encryption and obj != self._encrypt_entry: 

1462 obj = self._encryption.encrypt_object(obj, idnum, 0) 

1463 """ 

1464 obj.write_to_stream(stream) 

1465 stream.write(b"\nendobj\n") 

1466 

1467 # prepare xref 

1468 if idnum != current_stop: 

1469 if current_start > 0: 

1470 object_blocks.append( 

1471 [current_start, current_stop - current_start] 

1472 ) 

1473 current_start = idnum 

1474 current_stop = idnum + 1 

1475 assert current_start > 0, "for pytest only" 

1476 object_blocks.append([current_start, current_stop - current_start]) 

1477 # write incremented xref 

1478 xref_location = stream.tell() 

1479 xr_id = len(self._objects) + 1 

1480 stream.write(f"{xr_id} 0 obj".encode()) 

1481 init_data = { 

1482 NameObject("/Type"): NameObject("/XRef"), 

1483 NameObject("/Size"): NumberObject(xr_id + 1), 

1484 NameObject("/Root"): self.root_object.indirect_reference, 

1485 NameObject("/Filter"): NameObject("/FlateDecode"), 

1486 NameObject("/Index"): ArrayObject( 

1487 [NumberObject(_it) for _su in object_blocks for _it in _su] 

1488 ), 

1489 NameObject("/W"): ArrayObject( 

1490 [NumberObject(1), NumberObject(4), NumberObject(1)] 

1491 ), 

1492 "__streamdata__": b"", 

1493 } 

1494 if self._info is not None and ( 

1495 self._info.indirect_reference.idnum - 1 # type: ignore 

1496 >= len(self._original_hash) 

1497 or cast(IndirectObject, self._info).hash_bin() # kept for future 

1498 != self._original_hash[ 

1499 self._info.indirect_reference.idnum - 1 # type: ignore 

1500 ] 

1501 ): 

1502 init_data[NameObject(TK.INFO)] = self._info.indirect_reference 

1503 init_data[NameObject(TK.PREV)] = NumberObject(self._reader._startxref) 

1504 if self._ID: 

1505 init_data[NameObject(TK.ID)] = self._ID 

1506 xr = StreamObject.initialize_from_dictionary(init_data) 

1507 xr.set_data( 

1508 b"".join( 

1509 [struct.pack(b">BIB", 1, _pos, 0) for _pos in object_positions.values()] 

1510 ) 

1511 ) 

1512 xr.write_to_stream(stream) 

1513 stream.write(f"\nendobj\nstartxref\n{xref_location}\n%%EOF\n".encode()) # eof 

1514 

1515 def _write_pdf_structure(self, stream: StreamType) -> tuple[list[int], list[int]]: 

1516 object_positions = [] 

1517 free_objects = [] 

1518 stream.write(self.pdf_header.encode() + b"\n") 

1519 stream.write(b"%\xE2\xE3\xCF\xD3\n") 

1520 

1521 for idnum, obj in enumerate(self._objects, start=1): 

1522 if obj is not None: 

1523 object_positions.append(stream.tell()) 

1524 stream.write(f"{idnum} 0 obj\n".encode()) 

1525 if self._encryption and obj != self._encrypt_entry: 

1526 obj = self._encryption.encrypt_object(obj, idnum, 0) 

1527 obj.write_to_stream(stream) 

1528 stream.write(b"\nendobj\n") 

1529 else: 

1530 object_positions.append(-1) 

1531 free_objects.append(idnum) 

1532 free_objects.append(0) # add 0 to loop in accordance with specification 

1533 return object_positions, free_objects 

1534 

1535 def _write_xref_table( 

1536 self, stream: StreamType, object_positions: list[int], free_objects: list[int] 

1537 ) -> int: 

1538 xref_location = stream.tell() 

1539 stream.write(b"xref\n") 

1540 stream.write(f"0 {len(self._objects) + 1}\n".encode()) 

1541 stream.write(f"{free_objects[0]:0>10} {65535:0>5} f \n".encode()) 

1542 free_idx = 1 

1543 for offset in object_positions: 

1544 if offset > 0: 

1545 stream.write(f"{offset:0>10} {0:0>5} n \n".encode()) 

1546 else: 

1547 stream.write(f"{free_objects[free_idx]:0>10} {1:0>5} f \n".encode()) 

1548 free_idx += 1 

1549 return xref_location 

1550 

1551 def _write_trailer(self, stream: StreamType, xref_location: int) -> None: 

1552 """ 

1553 Write the PDF trailer to the stream. 

1554 

1555 To quote the PDF specification: 

1556 [The] trailer [gives] the location of the cross-reference table and 

1557 of certain special objects within the body of the file. 

1558 """ 

1559 stream.write(b"trailer\n") 

1560 trailer = DictionaryObject( 

1561 { 

1562 NameObject(TK.SIZE): NumberObject(len(self._objects) + 1), 

1563 NameObject(TK.ROOT): self.root_object.indirect_reference, 

1564 } 

1565 ) 

1566 if self._info is not None: 

1567 trailer[NameObject(TK.INFO)] = self._info.indirect_reference 

1568 if self._ID is not None: 

1569 trailer[NameObject(TK.ID)] = self._ID 

1570 if self._encrypt_entry: 

1571 trailer[NameObject(TK.ENCRYPT)] = self._encrypt_entry.indirect_reference 

1572 trailer.write_to_stream(stream) 

1573 stream.write(f"\nstartxref\n{xref_location}\n%%EOF\n".encode()) # eof 

1574 

1575 @property 

1576 def metadata(self) -> Optional[DocumentInformation]: 

1577 """ 

1578 Retrieve/set the PDF file's document information dictionary, if it exists. 

1579 

1580 Args: 

1581 value: dict with the entries to be set. if None : remove the /Info entry from the pdf. 

1582 

1583 Note that some PDF files use (XMP) metadata streams instead of document 

1584 information dictionaries, and these metadata streams will not be 

1585 accessed by this function, but by :meth:`~xmp_metadata`. 

1586 

1587 """ 

1588 return super().metadata 

1589 

1590 @metadata.setter 

1591 def metadata( 

1592 self, 

1593 value: Optional[Union[DocumentInformation, DictionaryObject, dict[Any, Any]]], 

1594 ) -> None: 

1595 if value is None: 

1596 self._info = None 

1597 else: 

1598 if self._info is not None: 

1599 self._info.clear() 

1600 

1601 self.add_metadata(value) 

1602 

1603 def add_metadata(self, infos: dict[str, Any]) -> None: 

1604 """ 

1605 Add custom metadata to the output. 

1606 

1607 Args: 

1608 infos: a Python dictionary where each key is a field 

1609 and each value is your new metadata. 

1610 

1611 """ 

1612 args = {} 

1613 if isinstance(infos, PdfObject): 

1614 infos = cast(DictionaryObject, infos.get_object()) 

1615 for key, value in list(infos.items()): 

1616 if isinstance(value, PdfObject): 

1617 value = value.get_object() 

1618 args[NameObject(key)] = create_string_object(str(value)) 

1619 if self._info is None: 

1620 self._info = DictionaryObject() 

1621 self._info.update(args) 

1622 

1623 _UNSET = object() 

1624 

1625 def compress_identical_objects( 

1626 self, 

1627 remove_identicals: Any = _UNSET, 

1628 remove_orphans: Any = _UNSET, 

1629 *, 

1630 remove_duplicates: bool = True, 

1631 remove_unreferenced: bool = True, 

1632 ) -> None: 

1633 """ 

1634 Parse the PDF file and merge objects that have the same hash. 

1635 This will make objects common to multiple pages. 

1636 Recommended to be used just before writing output. 

1637 

1638 Args: 

1639 remove_identicals: Deprecated. 

1640 remove_orphans: Deprecated. 

1641 remove_duplicates: Remove duplicate objects. 

1642 remove_unreferenced: Remove unreferenced objects. 

1643 

1644 """ 

1645 if remove_identicals != self._UNSET: 

1646 deprecate_with_replacement("remove_identicals", "remove_duplicates", "7.0.0") 

1647 assert isinstance(remove_identicals, bool) 

1648 remove_duplicates = remove_identicals 

1649 if remove_orphans != self._UNSET: 

1650 deprecate_with_replacement("remove_orphans", "remove_unreferenced", "7.0.0") 

1651 assert isinstance(remove_orphans, bool) 

1652 remove_unreferenced = remove_orphans 

1653 

1654 def replace_in_obj( 

1655 obj: PdfObject, crossref: dict[IndirectObject, IndirectObject] 

1656 ) -> None: 

1657 if isinstance(obj, DictionaryObject): 

1658 key_val = obj.items() 

1659 elif isinstance(obj, ArrayObject): 

1660 key_val = enumerate(obj) # type: ignore 

1661 else: 

1662 return 

1663 assert isinstance(obj, (DictionaryObject, ArrayObject)) 

1664 for k, v in key_val: 

1665 if isinstance(v, IndirectObject): 

1666 unreferenced[v.idnum - 1] = False 

1667 if v in crossref: 

1668 obj[k] = crossref[v] 

1669 else: 

1670 """The filtering on DictionaryObject and ArrayObject only 

1671 will be performed within replace_in_obj""" 

1672 replace_in_obj(v, crossref) 

1673 

1674 # _idnum_hash: dict[hash] = (1st_ind_obj, [2nd_ind_obj,...]) 

1675 self._idnum_hash = {} 

1676 unreferenced = [True] * len(self._objects) 

1677 # look for similar objects 

1678 for idx, obj in enumerate(self._objects): 

1679 if is_null_or_none(obj): 

1680 continue 

1681 assert obj is not None, "mypy" # mypy: TypeGuard of `is_null_or_none` does not help here. 

1682 assert isinstance(obj.indirect_reference, IndirectObject) 

1683 h = obj.hash_value() 

1684 if remove_duplicates and h in self._idnum_hash: 

1685 self._idnum_hash[h][1].append(obj.indirect_reference) 

1686 self._objects[idx] = None 

1687 else: 

1688 self._idnum_hash[h] = (obj.indirect_reference, []) 

1689 

1690 # generate the dict converting others to 1st 

1691 cnv = {v[0]: v[1] for v in self._idnum_hash.values() if len(v[1]) > 0} 

1692 cnv_rev: dict[IndirectObject, IndirectObject] = {} 

1693 for k, v in cnv.items(): 

1694 cnv_rev.update(zip(v, (k,) * len(v))) 

1695 

1696 # replace reference to merged objects 

1697 for obj in self._objects: 

1698 if isinstance(obj, (DictionaryObject, ArrayObject)): 

1699 replace_in_obj(obj, cnv_rev) 

1700 

1701 if remove_unreferenced: 

1702 unreferenced[self.root_object.indirect_reference.idnum - 1] = False # type: ignore 

1703 

1704 if not is_null_or_none(self._info): 

1705 unreferenced[self._info.indirect_reference.idnum - 1] = False # type: ignore 

1706 

1707 try: 

1708 unreferenced[self._ID.indirect_reference.idnum - 1] = False # type: ignore 

1709 except AttributeError: 

1710 pass 

1711 

1712 for i in compress(range(len(self._objects)), unreferenced): 

1713 self._objects[i] = None 

1714 

1715 def get_reference(self, obj: PdfObject) -> IndirectObject: 

1716 idnum = self._objects.index(obj) + 1 

1717 ref = IndirectObject(idnum, 0, self) 

1718 assert ref.get_object() == obj 

1719 return ref 

1720 

1721 def get_outline_root(self) -> TreeObject: 

1722 if CO.OUTLINES in self._root_object: 

1723 # Entries in the catalog dictionary 

1724 outline = cast(TreeObject, self._root_object[CO.OUTLINES]) 

1725 if not isinstance(outline, TreeObject): 

1726 t = TreeObject(outline) 

1727 self._replace_object(outline.indirect_reference.idnum, t) 

1728 outline = t 

1729 idnum = self._objects.index(outline) + 1 

1730 outline_ref = IndirectObject(idnum, 0, self) 

1731 assert outline_ref.get_object() == outline 

1732 else: 

1733 outline = TreeObject() 

1734 outline.update({}) 

1735 outline_ref = self._add_object(outline) 

1736 self._root_object[NameObject(CO.OUTLINES)] = outline_ref 

1737 

1738 return outline 

1739 

1740 def get_threads_root(self) -> ArrayObject: 

1741 """ 

1742 The list of threads. 

1743 

1744 See §12.4.3 of the PDF 1.7 or PDF 2.0 specification. 

1745 

1746 Returns: 

1747 An array (possibly empty) of Dictionaries with an ``/F`` key, 

1748 and optionally information about the thread in ``/I`` or ``/Metadata`` keys. 

1749 

1750 """ 

1751 if CO.THREADS in self._root_object: 

1752 # Entries in the catalog dictionary 

1753 threads = cast(ArrayObject, self._root_object[CO.THREADS]) 

1754 else: 

1755 threads = ArrayObject() 

1756 self._root_object[NameObject(CO.THREADS)] = threads 

1757 return threads 

1758 

1759 @property 

1760 def threads(self) -> ArrayObject: 

1761 """ 

1762 Read-only property for the list of threads. 

1763 

1764 See §12.4.3 of the PDF 1.7 or PDF 2.0 specification. 

1765 

1766 Each element is a dictionary with an ``/F`` key, and optionally 

1767 information about the thread in ``/I`` or ``/Metadata`` keys. 

1768 """ 

1769 return self.get_threads_root() 

1770 

1771 def add_outline_item_destination( 

1772 self, 

1773 page_destination: Union[IndirectObject, PageObject, TreeObject], 

1774 parent: Union[None, TreeObject, IndirectObject] = None, 

1775 before: Union[None, TreeObject, IndirectObject] = None, 

1776 is_open: bool = True, 

1777 ) -> IndirectObject: 

1778 page_destination = cast(PageObject, page_destination.get_object()) 

1779 if isinstance(page_destination, PageObject): 

1780 return self.add_outline_item_destination( 

1781 Destination( 

1782 f"page #{page_destination.page_number}", 

1783 cast(IndirectObject, page_destination.indirect_reference), 

1784 Fit.fit(), 

1785 ) 

1786 ) 

1787 

1788 if parent is None: 

1789 parent = self.get_outline_root() 

1790 

1791 page_destination[NameObject("/%is_open%")] = BooleanObject(is_open) 

1792 parent = cast(TreeObject, parent.get_object()) 

1793 page_destination_ref = self._add_object(page_destination) 

1794 if before is not None: 

1795 before = before.indirect_reference 

1796 parent.insert_child( 

1797 page_destination_ref, 

1798 before, 

1799 self, 

1800 page_destination.inc_parent_counter_outline 

1801 if is_open 

1802 else (lambda x, y: 0), # noqa: ARG005 

1803 ) 

1804 if "/Count" not in page_destination: 

1805 page_destination[NameObject("/Count")] = NumberObject(0) 

1806 

1807 return page_destination_ref 

1808 

1809 def add_outline_item_dict( 

1810 self, 

1811 outline_item: OutlineItemType, 

1812 parent: Union[None, TreeObject, IndirectObject] = None, 

1813 before: Union[None, TreeObject, IndirectObject] = None, 

1814 is_open: bool = True, 

1815 ) -> IndirectObject: 

1816 outline_item_object = TreeObject() 

1817 outline_item_object.update(outline_item) 

1818 

1819 """code currently unreachable 

1820 if "/A" in outline_item: 

1821 action = DictionaryObject() 

1822 a_dict = cast(DictionaryObject, outline_item["/A"]) 

1823 for k, v in list(a_dict.items()): 

1824 action[NameObject(str(k))] = v 

1825 action_ref = self._add_object(action) 

1826 outline_item_object[NameObject("/A")] = action_ref 

1827 """ 

1828 return self.add_outline_item_destination( 

1829 outline_item_object, parent, before, is_open 

1830 ) 

1831 

1832 def add_outline_item( 

1833 self, 

1834 title: str, 

1835 page_number: Union[None, PageObject, IndirectObject, int], 

1836 parent: Union[None, TreeObject, IndirectObject] = None, 

1837 before: Union[None, TreeObject, IndirectObject] = None, 

1838 color: Optional[Union[tuple[float, float, float], str]] = None, 

1839 bold: bool = False, 

1840 italic: bool = False, 

1841 fit: Fit = PAGE_FIT, 

1842 is_open: bool = True, 

1843 ) -> IndirectObject: 

1844 """ 

1845 Add an outline item (commonly referred to as a "Bookmark") to the PDF file. 

1846 

1847 Args: 

1848 title: Title to use for this outline item. 

1849 page_number: Page number this outline item will point to. 

1850 parent: A reference to a parent outline item to create nested 

1851 outline items. 

1852 before: 

1853 color: Color of the outline item's font as a red, green, blue tuple 

1854 from 0.0 to 1.0 or as a Hex String (#RRGGBB) 

1855 bold: Outline item font is bold 

1856 italic: Outline item font is italic 

1857 fit: The fit of the destination page. 

1858 

1859 Returns: 

1860 The added outline item as an indirect object. 

1861 

1862 """ 

1863 page_ref: Union[None, NullObject, IndirectObject, NumberObject] 

1864 if isinstance(italic, Fit): # it means that we are on the old params 

1865 if fit is not None and page_number is None: 

1866 page_number = fit 

1867 return self.add_outline_item( 

1868 title, page_number, parent, None, before, color, bold, italic, is_open=is_open 

1869 ) 

1870 if page_number is None: 

1871 action_ref = None 

1872 else: 

1873 if isinstance(page_number, IndirectObject): 

1874 page_ref = page_number 

1875 elif isinstance(page_number, PageObject): 

1876 page_ref = page_number.indirect_reference 

1877 elif isinstance(page_number, int): 

1878 try: 

1879 page_ref = self.pages[page_number].indirect_reference 

1880 except IndexError: 

1881 page_ref = NumberObject(page_number) 

1882 if page_ref is None: 

1883 logger_warning( 

1884 f"can not find reference of page {page_number}", 

1885 __name__, 

1886 ) 

1887 page_ref = NullObject() 

1888 dest = Destination( 

1889 NameObject("/" + title + " outline item"), 

1890 page_ref, 

1891 fit, 

1892 ) 

1893 

1894 action_ref = self._add_object( 

1895 DictionaryObject( 

1896 { 

1897 NameObject(GoToActionArguments.D): dest.dest_array, 

1898 NameObject(GoToActionArguments.S): NameObject("/GoTo"), 

1899 } 

1900 ) 

1901 ) 

1902 outline_item = self._add_object( 

1903 _create_outline_item(action_ref, title, color, italic, bold) 

1904 ) 

1905 

1906 if parent is None: 

1907 parent = self.get_outline_root() 

1908 return self.add_outline_item_destination(outline_item, parent, before, is_open) 

1909 

1910 def add_outline(self) -> None: 

1911 raise NotImplementedError( 

1912 "This method is not yet implemented. Use :meth:`add_outline_item` instead." 

1913 ) 

1914 

1915 def add_named_destination_array( 

1916 self, title: TextStringObject, destination: Union[IndirectObject, ArrayObject] 

1917 ) -> None: 

1918 named_dest = self.get_named_dest_root() 

1919 i = 0 

1920 while i < len(named_dest): 

1921 if title < named_dest[i]: 

1922 named_dest.insert(i, destination) 

1923 named_dest.insert(i, TextStringObject(title)) 

1924 return 

1925 i += 2 

1926 named_dest.extend([TextStringObject(title), destination]) 

1927 return 

1928 

1929 def add_named_destination_object( 

1930 self, 

1931 page_destination: PdfObject, 

1932 ) -> IndirectObject: 

1933 page_destination_ref = self._add_object(page_destination.dest_array) # type: ignore 

1934 self.add_named_destination_array( 

1935 cast("TextStringObject", page_destination["/Title"]), page_destination_ref # type: ignore 

1936 ) 

1937 

1938 return page_destination_ref 

1939 

1940 def add_named_destination( 

1941 self, 

1942 title: str, 

1943 page_number: int, 

1944 ) -> IndirectObject: 

1945 page_ref = self.get_object(self._pages)[PagesAttributes.KIDS][page_number] # type: ignore 

1946 dest = DictionaryObject() 

1947 dest.update( 

1948 { 

1949 NameObject(GoToActionArguments.D): ArrayObject( 

1950 [page_ref, NameObject(TypFitArguments.FIT_H), NumberObject(826)] 

1951 ), 

1952 NameObject(GoToActionArguments.S): NameObject("/GoTo"), 

1953 } 

1954 ) 

1955 

1956 dest_ref = self._add_object(dest) 

1957 if not isinstance(title, TextStringObject): 

1958 title = TextStringObject(str(title)) 

1959 

1960 self.add_named_destination_array(title, dest_ref) 

1961 return dest_ref 

1962 

1963 def remove_links(self) -> None: 

1964 """Remove links and annotations from this output.""" 

1965 for page in self.pages: 

1966 self.remove_objects_from_page(page, ObjectDeletionFlag.ALL_ANNOTATIONS) 

1967 

1968 def remove_annotations( 

1969 self, subtypes: Optional[Union[AnnotationSubtype, Iterable[AnnotationSubtype]]] 

1970 ) -> None: 

1971 """ 

1972 Remove annotations by annotation subtype. 

1973 

1974 Args: 

1975 subtypes: subtype or list of subtypes to be removed. 

1976 Examples are: "/Link", "/FileAttachment", "/Sound", 

1977 "/Movie", "/Screen", ... 

1978 If you want to remove all annotations, use subtypes=None. 

1979 

1980 """ 

1981 for page in self.pages: 

1982 self._remove_annots_from_page(page, subtypes) 

1983 

1984 def _remove_annots_from_page( 

1985 self, 

1986 page: Union[IndirectObject, PageObject, DictionaryObject], 

1987 subtypes: Optional[Iterable[str]], 

1988 ) -> None: 

1989 page = cast(DictionaryObject, page.get_object()) 

1990 if PG.ANNOTS in page: 

1991 i = 0 

1992 while i < len(cast(ArrayObject, page[PG.ANNOTS])): 

1993 an = cast(ArrayObject, page[PG.ANNOTS])[i] 

1994 obj = cast(DictionaryObject, an.get_object()) 

1995 if subtypes is None or cast(str, obj["/Subtype"]) in subtypes: 

1996 if isinstance(an, IndirectObject): 

1997 self._objects[an.idnum - 1] = NullObject() # to reduce PDF size 

1998 del page[PG.ANNOTS][i] # type:ignore 

1999 else: 

2000 i += 1 

2001 

2002 def remove_objects_from_page( 

2003 self, 

2004 page: Union[PageObject, DictionaryObject], 

2005 to_delete: Union[ObjectDeletionFlag, Iterable[ObjectDeletionFlag]], 

2006 text_filters: Optional[dict[str, Any]] = None 

2007 ) -> None: 

2008 """ 

2009 Remove objects specified by ``to_delete`` from the given page. 

2010 

2011 Args: 

2012 page: Page object to clean up. 

2013 to_delete: Objects to be deleted; can be a ``ObjectDeletionFlag`` 

2014 or a list of ObjectDeletionFlag 

2015 text_filters: Properties of text to be deleted, if applicable. Optional. 

2016 This is a Python dictionary with the following properties: 

2017 

2018 * font_ids: List of font resource IDs (such as /F1 or /T1_0) to be deleted. 

2019 

2020 """ 

2021 if isinstance(to_delete, (list, tuple)): 

2022 for to_d in to_delete: 

2023 self.remove_objects_from_page(page, to_d) 

2024 return None 

2025 assert isinstance(to_delete, ObjectDeletionFlag) 

2026 

2027 if to_delete & ObjectDeletionFlag.LINKS: 

2028 return self._remove_annots_from_page(page, ("/Link",)) 

2029 if to_delete & ObjectDeletionFlag.ATTACHMENTS: 

2030 return self._remove_annots_from_page( 

2031 page, ("/FileAttachment", "/Sound", "/Movie", "/Screen") 

2032 ) 

2033 if to_delete & ObjectDeletionFlag.OBJECTS_3D: 

2034 return self._remove_annots_from_page(page, ("/3D",)) 

2035 if to_delete & ObjectDeletionFlag.ALL_ANNOTATIONS: 

2036 return self._remove_annots_from_page(page, None) 

2037 

2038 jump_operators = [] 

2039 if to_delete & ObjectDeletionFlag.DRAWING_IMAGES: 

2040 jump_operators = [ 

2041 b"w", b"J", b"j", b"M", b"d", b"i", 

2042 b"W", b"W*", 

2043 b"b", b"b*", b"B", b"B*", b"S", b"s", b"f", b"f*", b"F", b"n", 

2044 b"m", b"l", b"c", b"v", b"y", b"h", b"re", 

2045 b"sh" 

2046 ] 

2047 if to_delete & ObjectDeletionFlag.TEXT: 

2048 jump_operators = [b"Tj", b"TJ", b"'", b'"'] 

2049 

2050 if not isinstance(page, PageObject): 

2051 page = PageObject(self, page.indirect_reference) # pragma: no cover 

2052 if "/Contents" in page: 

2053 content = cast(ContentStream, page.get_contents()) 

2054 

2055 images, forms = self._remove_objects_from_page__clean_forms( 

2056 elt=page, stack=[], jump_operators=jump_operators, to_delete=to_delete, text_filters=text_filters, 

2057 ) 

2058 

2059 self._remove_objects_from_page__clean( 

2060 content=content, images=images, forms=forms, 

2061 jump_operators=jump_operators, to_delete=to_delete, 

2062 text_filters=text_filters 

2063 ) 

2064 page.replace_contents(content) 

2065 return [], [] # type: ignore[return-value] 

2066 

2067 def _remove_objects_from_page__clean( 

2068 self, 

2069 content: ContentStream, 

2070 images: list[str], 

2071 forms: list[str], 

2072 jump_operators: list[bytes], 

2073 to_delete: ObjectDeletionFlag, 

2074 text_filters: Optional[dict[str, Any]] = None, 

2075 ) -> None: 

2076 font_id = None 

2077 font_ids_to_delete = [] 

2078 if text_filters and to_delete & ObjectDeletionFlag.TEXT: 

2079 font_ids_to_delete = text_filters.get("font_ids", []) 

2080 

2081 i = 0 

2082 while i < len(content.operations): 

2083 operands, operator = content.operations[i] 

2084 if operator == b"Tf": 

2085 font_id = operands[0] 

2086 if ( 

2087 ( 

2088 operator == b"INLINE IMAGE" 

2089 and (to_delete & ObjectDeletionFlag.INLINE_IMAGES) 

2090 ) 

2091 or (operator in jump_operators) 

2092 or ( 

2093 operator == b"Do" 

2094 and (to_delete & ObjectDeletionFlag.XOBJECT_IMAGES) 

2095 and (operands[0] in images) 

2096 ) 

2097 ): 

2098 if ( 

2099 not to_delete & ObjectDeletionFlag.TEXT 

2100 or (to_delete & ObjectDeletionFlag.TEXT and not text_filters) 

2101 or (to_delete & ObjectDeletionFlag.TEXT and font_id in font_ids_to_delete) 

2102 ): 

2103 del content.operations[i] 

2104 else: 

2105 i += 1 

2106 else: 

2107 i += 1 

2108 content.get_data() # this ensures ._data is rebuilt from the .operations 

2109 

2110 def _remove_objects_from_page__clean_forms( 

2111 self, 

2112 elt: DictionaryObject, 

2113 stack: list[DictionaryObject], 

2114 jump_operators: list[bytes], 

2115 to_delete: ObjectDeletionFlag, 

2116 text_filters: Optional[dict[str, Any]] = None, 

2117 ) -> tuple[list[str], list[str]]: 

2118 # elt in recursive call is a new ContentStream object, so we have to check the indirect_reference 

2119 if (elt in stack) or ( 

2120 hasattr(elt, "indirect_reference") and any( 

2121 elt.indirect_reference == getattr(x, "indirect_reference", -1) 

2122 for x in stack 

2123 ) 

2124 ): 

2125 # to prevent infinite looping 

2126 return [], [] # pragma: no cover 

2127 try: 

2128 d = cast( 

2129 dict[Any, Any], 

2130 cast(DictionaryObject, elt["/Resources"])["/XObject"], 

2131 ) 

2132 except KeyError: 

2133 d = {} 

2134 images = [] 

2135 forms = [] 

2136 for k, v in d.items(): 

2137 o = v.get_object() 

2138 try: 

2139 content: Any = None 

2140 if ( 

2141 to_delete & ObjectDeletionFlag.XOBJECT_IMAGES 

2142 and o["/Subtype"] == "/Image" 

2143 ): 

2144 content = NullObject() # to delete the image keeping the entry 

2145 images.append(k) 

2146 if o["/Subtype"] == "/Form": 

2147 forms.append(k) 

2148 if isinstance(o, ContentStream): 

2149 content = o 

2150 else: 

2151 content = ContentStream(o, self) 

2152 content.update( 

2153 { 

2154 k1: v1 

2155 for k1, v1 in o.items() 

2156 if k1 not in ["/Length", "/Filter", "/DecodeParms"] 

2157 } 

2158 ) 

2159 try: 

2160 content.indirect_reference = o.indirect_reference 

2161 except AttributeError: # pragma: no cover 

2162 pass 

2163 stack.append(elt) 

2164 

2165 # clean subforms 

2166 self._remove_objects_from_page__clean_forms( 

2167 elt=content, stack=stack, jump_operators=jump_operators, to_delete=to_delete, 

2168 text_filters=text_filters, 

2169 ) 

2170 if content is not None: 

2171 if isinstance(v, IndirectObject): 

2172 self._objects[v.idnum - 1] = content 

2173 else: 

2174 # should only occur in a PDF not respecting PDF spec 

2175 # where streams must be indirected. 

2176 d[k] = self._add_object(content) # pragma: no cover 

2177 except (TypeError, KeyError): 

2178 pass 

2179 for im in images: 

2180 del d[im] # for clean-up 

2181 if isinstance(elt, StreamObject): # for /Form 

2182 if not isinstance(elt, ContentStream): # pragma: no cover 

2183 e = ContentStream(elt, self) 

2184 e.update(elt.items()) 

2185 elt = e 

2186 # clean the content 

2187 self._remove_objects_from_page__clean( 

2188 content=elt, images=images, forms=forms, jump_operators=jump_operators, 

2189 to_delete=to_delete, text_filters=text_filters 

2190 ) 

2191 return images, forms 

2192 

2193 def remove_images( 

2194 self, 

2195 to_delete: ImageType = ImageType.ALL, 

2196 ) -> None: 

2197 """ 

2198 Remove images from this output. 

2199 

2200 Args: 

2201 to_delete: The type of images to be deleted 

2202 (default = all images types) 

2203 

2204 """ 

2205 if isinstance(to_delete, bool): 

2206 to_delete = ImageType.ALL 

2207 

2208 i = ObjectDeletionFlag.NONE 

2209 

2210 for image in ("XOBJECT_IMAGES", "INLINE_IMAGES", "DRAWING_IMAGES"): 

2211 if to_delete & ImageType[image]: 

2212 i |= ObjectDeletionFlag[image] 

2213 

2214 for page in self.pages: 

2215 self.remove_objects_from_page(page, i) 

2216 

2217 def remove_text(self, font_names: Optional[list[str]] = None) -> None: 

2218 """ 

2219 Remove text from the PDF. 

2220 

2221 Args: 

2222 font_names: List of font names to remove, such as "Helvetica-Bold". 

2223 Optional. If not specified, all text will be removed. 

2224 """ 

2225 if not font_names: 

2226 font_names = [] 

2227 

2228 for page in self.pages: 

2229 resource_ids_to_remove = [] 

2230 

2231 # Content streams reference fonts and other resources with names like "/F1" or "/T1_0" 

2232 # Font names need to be converted to resource names/IDs for easier removal 

2233 if font_names: 

2234 # Recursively loop through page objects to gather font info 

2235 def get_font_info( 

2236 obj: Any, 

2237 font_info: Optional[dict[str, Any]] = None, 

2238 key: Optional[str] = None 

2239 ) -> dict[str, Any]: 

2240 if font_info is None: 

2241 font_info = {} 

2242 if isinstance(obj, IndirectObject): 

2243 obj = obj.get_object() 

2244 if isinstance(obj, dict): 

2245 if obj.get("/Type") == "/Font": 

2246 font_name = obj.get("/BaseFont", "") 

2247 # Normalize font names like "/RRXFFV+Palatino-Bold" to "Palatino-Bold" 

2248 normalized_font_name = font_name.lstrip("/").split("+")[-1] 

2249 if normalized_font_name not in font_info: 

2250 font_info[normalized_font_name] = { 

2251 "normalized_font_name": normalized_font_name, 

2252 "resource_ids": [], 

2253 } 

2254 if key not in font_info[normalized_font_name]["resource_ids"]: 

2255 font_info[normalized_font_name]["resource_ids"].append(key) 

2256 for k in obj: 

2257 font_info = get_font_info(obj[k], font_info, k) 

2258 elif isinstance(obj, (list, ArrayObject)): 

2259 for child_obj in obj: 

2260 font_info = get_font_info(child_obj, font_info) 

2261 return font_info 

2262 

2263 # Add relevant resource names for removal 

2264 font_info = get_font_info(page.get("/Resources")) 

2265 for font_name in font_names: 

2266 if font_name in font_info: 

2267 resource_ids_to_remove.extend(font_info[font_name]["resource_ids"]) 

2268 

2269 text_filters = {} 

2270 if font_names: 

2271 text_filters["font_ids"] = resource_ids_to_remove 

2272 self.remove_objects_from_page(page, ObjectDeletionFlag.TEXT, text_filters=text_filters) 

2273 

2274 def add_uri( 

2275 self, 

2276 page_number: int, 

2277 uri: str, 

2278 rect: RectangleObject, 

2279 border: Optional[ArrayObject] = None, 

2280 ) -> None: 

2281 """ 

2282 Add an URI from a rectangular area to the specified page. 

2283 

2284 Args: 

2285 page_number: index of the page on which to place the URI action. 

2286 uri: URI of resource to link to. 

2287 rect: :class:`RectangleObject<pypdf.generic.RectangleObject>` or 

2288 array of four integers specifying the clickable rectangular area 

2289 ``[xLL, yLL, xUR, yUR]``, or string in the form 

2290 ``"[ xLL yLL xUR yUR ]"``. 

2291 border: if provided, an array describing border-drawing 

2292 properties. See the PDF spec for details. No border will be 

2293 drawn if this argument is omitted. 

2294 

2295 """ 

2296 page_link = self.get_object(self._pages)[PagesAttributes.KIDS][page_number] # type: ignore 

2297 page_ref = cast(dict[str, Any], self.get_object(page_link)) 

2298 

2299 border_arr: BorderArrayType 

2300 if border is not None: 

2301 border_arr = [NumberObject(n) for n in border[:3]] 

2302 if len(border) == 4: 

2303 dash_pattern = ArrayObject([NumberObject(n) for n in border[3]]) 

2304 border_arr.append(dash_pattern) 

2305 else: 

2306 border_arr = [NumberObject(2), NumberObject(2), NumberObject(2)] 

2307 

2308 if isinstance(rect, str): 

2309 rect = NumberObject(rect) 

2310 elif isinstance(rect, RectangleObject): 

2311 pass 

2312 else: 

2313 rect = RectangleObject(rect) 

2314 

2315 lnk2 = DictionaryObject() 

2316 lnk2.update( 

2317 { 

2318 NameObject("/S"): NameObject("/URI"), 

2319 NameObject("/URI"): TextStringObject(uri), 

2320 } 

2321 ) 

2322 lnk = DictionaryObject() 

2323 lnk.update( 

2324 { 

2325 NameObject(AA.Type): NameObject("/Annot"), 

2326 NameObject(AA.Subtype): NameObject("/Link"), 

2327 NameObject(AA.P): page_link, 

2328 NameObject(AA.Rect): rect, 

2329 NameObject("/H"): NameObject("/I"), 

2330 NameObject(AA.Border): ArrayObject(border_arr), 

2331 NameObject("/A"): lnk2, 

2332 } 

2333 ) 

2334 lnk_ref = self._add_object(lnk) 

2335 

2336 if PG.ANNOTS in page_ref: 

2337 page_ref[PG.ANNOTS].append(lnk_ref) 

2338 else: 

2339 page_ref[NameObject(PG.ANNOTS)] = ArrayObject([lnk_ref]) 

2340 

2341 _valid_layouts = ( 

2342 "/NoLayout", 

2343 "/SinglePage", 

2344 "/OneColumn", 

2345 "/TwoColumnLeft", 

2346 "/TwoColumnRight", 

2347 "/TwoPageLeft", 

2348 "/TwoPageRight", 

2349 ) 

2350 

2351 def _get_page_layout(self) -> Optional[LayoutType]: 

2352 try: 

2353 return cast(LayoutType, self._root_object["/PageLayout"]) 

2354 except KeyError: 

2355 return None 

2356 

2357 def _set_page_layout(self, layout: Union[NameObject, LayoutType]) -> None: 

2358 """ 

2359 Set the page layout. 

2360 

2361 Args: 

2362 layout: The page layout to be used. 

2363 

2364 .. list-table:: Valid ``layout`` arguments 

2365 :widths: 50 200 

2366 

2367 * - /NoLayout 

2368 - Layout explicitly not specified 

2369 * - /SinglePage 

2370 - Show one page at a time 

2371 * - /OneColumn 

2372 - Show one column at a time 

2373 * - /TwoColumnLeft 

2374 - Show pages in two columns, odd-numbered pages on the left 

2375 * - /TwoColumnRight 

2376 - Show pages in two columns, odd-numbered pages on the right 

2377 * - /TwoPageLeft 

2378 - Show two pages at a time, odd-numbered pages on the left 

2379 * - /TwoPageRight 

2380 - Show two pages at a time, odd-numbered pages on the right 

2381 

2382 """ 

2383 if not isinstance(layout, NameObject): 

2384 if layout not in self._valid_layouts: 

2385 logger_warning( 

2386 f"Layout should be one of: {'', ''.join(self._valid_layouts)}", 

2387 __name__, 

2388 ) 

2389 layout = NameObject(layout) 

2390 self._root_object.update({NameObject("/PageLayout"): layout}) 

2391 

2392 def set_page_layout(self, layout: LayoutType) -> None: 

2393 """ 

2394 Set the page layout. 

2395 

2396 Args: 

2397 layout: The page layout to be used 

2398 

2399 .. list-table:: Valid ``layout`` arguments 

2400 :widths: 50 200 

2401 

2402 * - /NoLayout 

2403 - Layout explicitly not specified 

2404 * - /SinglePage 

2405 - Show one page at a time 

2406 * - /OneColumn 

2407 - Show one column at a time 

2408 * - /TwoColumnLeft 

2409 - Show pages in two columns, odd-numbered pages on the left 

2410 * - /TwoColumnRight 

2411 - Show pages in two columns, odd-numbered pages on the right 

2412 * - /TwoPageLeft 

2413 - Show two pages at a time, odd-numbered pages on the left 

2414 * - /TwoPageRight 

2415 - Show two pages at a time, odd-numbered pages on the right 

2416 

2417 """ 

2418 self._set_page_layout(layout) 

2419 

2420 @property 

2421 def page_layout(self) -> Optional[LayoutType]: 

2422 """ 

2423 Page layout property. 

2424 

2425 .. list-table:: Valid ``layout`` values 

2426 :widths: 50 200 

2427 

2428 * - /NoLayout 

2429 - Layout explicitly not specified 

2430 * - /SinglePage 

2431 - Show one page at a time 

2432 * - /OneColumn 

2433 - Show one column at a time 

2434 * - /TwoColumnLeft 

2435 - Show pages in two columns, odd-numbered pages on the left 

2436 * - /TwoColumnRight 

2437 - Show pages in two columns, odd-numbered pages on the right 

2438 * - /TwoPageLeft 

2439 - Show two pages at a time, odd-numbered pages on the left 

2440 * - /TwoPageRight 

2441 - Show two pages at a time, odd-numbered pages on the right 

2442 """ 

2443 return self._get_page_layout() 

2444 

2445 @page_layout.setter 

2446 def page_layout(self, layout: LayoutType) -> None: 

2447 self._set_page_layout(layout) 

2448 

2449 _valid_modes = ( 

2450 "/UseNone", 

2451 "/UseOutlines", 

2452 "/UseThumbs", 

2453 "/FullScreen", 

2454 "/UseOC", 

2455 "/UseAttachments", 

2456 ) 

2457 

2458 def _get_page_mode(self) -> Optional[PagemodeType]: 

2459 try: 

2460 return cast(PagemodeType, self._root_object["/PageMode"]) 

2461 except KeyError: 

2462 return None 

2463 

2464 @property 

2465 def page_mode(self) -> Optional[PagemodeType]: 

2466 """ 

2467 Page mode property. 

2468 

2469 .. list-table:: Valid ``mode`` values 

2470 :widths: 50 200 

2471 

2472 * - /UseNone 

2473 - Do not show outline or thumbnails panels 

2474 * - /UseOutlines 

2475 - Show outline (aka bookmarks) panel 

2476 * - /UseThumbs 

2477 - Show page thumbnails panel 

2478 * - /FullScreen 

2479 - Fullscreen view 

2480 * - /UseOC 

2481 - Show Optional Content Group (OCG) panel 

2482 * - /UseAttachments 

2483 - Show attachments panel 

2484 """ 

2485 return self._get_page_mode() 

2486 

2487 @page_mode.setter 

2488 def page_mode(self, mode: PagemodeType) -> None: 

2489 if isinstance(mode, NameObject): 

2490 mode_name: NameObject = mode 

2491 else: 

2492 if mode not in self._valid_modes: 

2493 logger_warning( 

2494 f"Mode should be one of: {', '.join(self._valid_modes)}", __name__ 

2495 ) 

2496 mode_name = NameObject(mode) 

2497 self._root_object.update({NameObject("/PageMode"): mode_name}) 

2498 

2499 def add_annotation( 

2500 self, 

2501 page_number: Union[int, PageObject], 

2502 annotation: dict[str, Any], 

2503 ) -> DictionaryObject: 

2504 """ 

2505 Add a single annotation to the page. 

2506 The added annotation must be a new annotation. 

2507 It cannot be recycled. 

2508 

2509 Args: 

2510 page_number: PageObject or page index. 

2511 annotation: Annotation to be added (created with annotation). 

2512 

2513 Returns: 

2514 The inserted object. 

2515 This can be used for popup creation, for example. 

2516 

2517 """ 

2518 page = page_number 

2519 if isinstance(page, int): 

2520 page = self.pages[page] 

2521 elif not isinstance(page, PageObject): 

2522 raise TypeError("page: invalid type") 

2523 

2524 to_add = cast(DictionaryObject, _pdf_objectify(annotation)) 

2525 to_add[NameObject("/P")] = page.indirect_reference 

2526 

2527 if page.annotations is None: 

2528 page[NameObject("/Annots")] = ArrayObject() 

2529 assert page.annotations is not None 

2530 

2531 # Internal link annotations need the correct object type for the 

2532 # destination 

2533 if to_add.get("/Subtype") == "/Link" and "/Dest" in to_add: 

2534 tmp = cast(dict[Any, Any], to_add[NameObject("/Dest")]) 

2535 dest = Destination( 

2536 NameObject("/LinkName"), 

2537 tmp["target_page_index"], 

2538 Fit( 

2539 fit_type=tmp["fit"], fit_args=dict(tmp)["fit_args"] 

2540 ), # I have no clue why this dict-hack is necessary 

2541 ) 

2542 to_add[NameObject("/Dest")] = dest.dest_array 

2543 

2544 page.annotations.append(self._add_object(to_add)) 

2545 

2546 if to_add.get("/Subtype") == "/Popup" and NameObject("/Parent") in to_add: 

2547 cast(DictionaryObject, to_add["/Parent"].get_object())[ 

2548 NameObject("/Popup") 

2549 ] = to_add.indirect_reference 

2550 

2551 return to_add 

2552 

2553 def clean_page(self, page: Union[PageObject, IndirectObject]) -> PageObject: 

2554 """ 

2555 Perform some clean up in the page. 

2556 Currently: convert NameObject named destination to TextStringObject 

2557 (required for names/dests list) 

2558 

2559 Args: 

2560 page: 

2561 

2562 Returns: 

2563 The cleaned PageObject 

2564 

2565 """ 

2566 page = cast("PageObject", page.get_object()) 

2567 for a in page.get("/Annots", []): 

2568 a_obj = a.get_object() 

2569 d = a_obj.get("/Dest", None) 

2570 act = a_obj.get("/A", None) 

2571 if isinstance(d, NameObject): 

2572 a_obj[NameObject("/Dest")] = TextStringObject(d) 

2573 elif act is not None: 

2574 act = act.get_object() 

2575 d = act.get("/D", None) 

2576 if isinstance(d, NameObject): 

2577 act[NameObject("/D")] = TextStringObject(d) 

2578 return page 

2579 

2580 def _create_stream( 

2581 self, fileobj: Union[Path, StrByteType, PdfReader] 

2582 ) -> tuple[IOBase, Optional[Encryption]]: 

2583 # If the fileobj parameter is a string, assume it is a path 

2584 # and create a file object at that location. If it is a file, 

2585 # copy the file's contents into a BytesIO stream object; if 

2586 # it is a PdfReader, copy that reader's stream into a 

2587 # BytesIO stream. 

2588 # If fileobj is none of the above types, it is not modified 

2589 encryption_obj = None 

2590 stream: IOBase 

2591 if isinstance(fileobj, (str, Path)): 

2592 with FileIO(fileobj, "rb") as f: 

2593 stream = BytesIO(f.read()) 

2594 elif isinstance(fileobj, PdfReader): 

2595 if fileobj._encryption: 

2596 encryption_obj = fileobj._encryption 

2597 orig_tell = fileobj.stream.tell() 

2598 fileobj.stream.seek(0) 

2599 stream = BytesIO(fileobj.stream.read()) 

2600 

2601 # reset the stream to its original location 

2602 fileobj.stream.seek(orig_tell) 

2603 elif hasattr(fileobj, "seek") and hasattr(fileobj, "read"): 

2604 fileobj.seek(0) 

2605 filecontent = fileobj.read() 

2606 stream = BytesIO(filecontent) 

2607 else: 

2608 raise NotImplementedError( 

2609 "Merging requires an object that PdfReader can parse. " 

2610 "Typically, that is a Path or a string representing a Path, " 

2611 "a file object, or an object implementing .seek and .read. " 

2612 "Passing a PdfReader directly works as well." 

2613 ) 

2614 return stream, encryption_obj 

2615 

2616 def append( 

2617 self, 

2618 fileobj: Union[StrByteType, PdfReader, Path], 

2619 outline_item: Union[ 

2620 str, None, PageRange, tuple[int, int], tuple[int, int, int], list[int] 

2621 ] = None, 

2622 pages: Union[ 

2623 None, 

2624 PageRange, 

2625 tuple[int, int], 

2626 tuple[int, int, int], 

2627 list[int], 

2628 list[PageObject], 

2629 ] = None, 

2630 import_outline: bool = True, 

2631 excluded_fields: Optional[Union[list[str], tuple[str, ...]]] = None, 

2632 ) -> None: 

2633 """ 

2634 Identical to the :meth:`merge()<merge>` method, but assumes you want to 

2635 concatenate all pages onto the end of the file instead of specifying a 

2636 position. 

2637 

2638 Args: 

2639 fileobj: A File Object or an object that supports the standard 

2640 read and seek methods similar to a File Object. Could also be a 

2641 string representing a path to a PDF file. 

2642 outline_item: Optionally, you may specify a string to build an 

2643 outline (aka 'bookmark') to identify the beginning of the 

2644 included file. 

2645 pages: Can be a :class:`PageRange<pypdf.pagerange.PageRange>` 

2646 or a ``(start, stop[, step])`` tuple 

2647 or a list of pages to be processed 

2648 to merge only the specified range of pages from the source 

2649 document into the output document. 

2650 import_outline: You may prevent the source document's 

2651 outline (collection of outline items, previously referred to as 

2652 'bookmarks') from being imported by specifying this as ``False``. 

2653 excluded_fields: Provide the list of fields/keys to be ignored 

2654 if ``/Annots`` is part of the list, the annotation will be ignored 

2655 if ``/B`` is part of the list, the articles will be ignored 

2656 

2657 """ 

2658 if excluded_fields is None: 

2659 excluded_fields = () 

2660 if isinstance(outline_item, (tuple, list, PageRange)): 

2661 if isinstance(pages, bool): 

2662 if not isinstance(import_outline, bool): 

2663 excluded_fields = import_outline 

2664 import_outline = pages 

2665 pages = outline_item 

2666 self.merge( 

2667 None, 

2668 fileobj, 

2669 None, 

2670 pages, 

2671 import_outline, 

2672 excluded_fields, 

2673 ) 

2674 else: # if isinstance(outline_item, str): 

2675 self.merge( 

2676 None, 

2677 fileobj, 

2678 outline_item, 

2679 pages, 

2680 import_outline, 

2681 excluded_fields, 

2682 ) 

2683 

2684 def merge( 

2685 self, 

2686 position: Optional[int], 

2687 fileobj: Union[Path, StrByteType, PdfReader], 

2688 outline_item: Optional[str] = None, 

2689 pages: Optional[Union[PageRangeSpec, list[PageObject]]] = None, 

2690 import_outline: bool = True, 

2691 excluded_fields: Optional[Union[list[str], tuple[str, ...]]] = (), 

2692 ) -> None: 

2693 """ 

2694 Merge the pages from the given file into the output file at the 

2695 specified page number. 

2696 

2697 Args: 

2698 position: The *page number* to insert this file. File will 

2699 be inserted after the given number. 

2700 fileobj: A File Object or an object that supports the standard 

2701 read and seek methods similar to a File Object. Could also be a 

2702 string representing a path to a PDF file. 

2703 outline_item: Optionally, you may specify a string to build an outline 

2704 (aka 'bookmark') to identify the 

2705 beginning of the included file. 

2706 pages: can be a :class:`PageRange<pypdf.pagerange.PageRange>` 

2707 or a ``(start, stop[, step])`` tuple 

2708 or a list of pages to be processed 

2709 to merge only the specified range of pages from the source 

2710 document into the output document. 

2711 import_outline: You may prevent the source document's 

2712 outline (collection of outline items, previously referred to as 

2713 'bookmarks') from being imported by specifying this as ``False``. 

2714 excluded_fields: provide the list of fields/keys to be ignored 

2715 if ``/Annots`` is part of the list, the annotation will be ignored 

2716 if ``/B`` is part of the list, the articles will be ignored 

2717 

2718 Raises: 

2719 TypeError: The pages attribute is not configured properly 

2720 

2721 """ 

2722 if isinstance(fileobj, PdfDocCommon): 

2723 reader = fileobj 

2724 else: 

2725 stream, _encryption_obj = self._create_stream(fileobj) 

2726 # Create a new PdfReader instance using the stream 

2727 # (either file or BytesIO or StringIO) created above 

2728 reader = PdfReader(stream, strict=False) # type: ignore[arg-type] 

2729 

2730 if excluded_fields is None: 

2731 excluded_fields = () 

2732 # Find the range of pages to merge. 

2733 if pages is None: 

2734 pages = list(range(len(reader.pages))) 

2735 elif isinstance(pages, PageRange): 

2736 pages = list(range(*pages.indices(len(reader.pages)))) 

2737 elif isinstance(pages, list): 

2738 pass # keep unchanged 

2739 elif isinstance(pages, tuple) and len(pages) <= 3: 

2740 pages = list(range(*pages)) 

2741 elif not isinstance(pages, tuple): 

2742 raise TypeError( 

2743 '"pages" must be a tuple of (start, stop[, step]) or a list' 

2744 ) 

2745 

2746 srcpages = {} 

2747 for page in pages: 

2748 if isinstance(page, PageObject): 

2749 pg = page 

2750 else: 

2751 pg = reader.pages[page] 

2752 assert pg.indirect_reference is not None 

2753 if position is None: 

2754 # numbers in the exclude list identifies that the exclusion is 

2755 # only applicable to 1st level of cloning 

2756 srcpages[pg.indirect_reference.idnum] = self.add_page( 

2757 pg, [*list(excluded_fields), 1, "/B", 1, "/Annots"] # type: ignore 

2758 ) 

2759 else: 

2760 srcpages[pg.indirect_reference.idnum] = self.insert_page( 

2761 pg, position, [*list(excluded_fields), 1, "/B", 1, "/Annots"] # type: ignore 

2762 ) 

2763 position += 1 

2764 srcpages[pg.indirect_reference.idnum].original_page = pg 

2765 

2766 reader._named_destinations = ( 

2767 reader.named_destinations 

2768 ) # need for the outline processing below 

2769 

2770 arr: Any 

2771 

2772 for dest in reader._named_destinations.values(): 

2773 self._merge__process_named_dests(dest=dest, reader=reader, srcpages=srcpages) 

2774 

2775 outline_item_typ: TreeObject 

2776 if outline_item is not None: 

2777 outline_item_typ = cast( 

2778 "TreeObject", 

2779 self.add_outline_item( 

2780 TextStringObject(outline_item), 

2781 next(iter(srcpages.values())).indirect_reference, 

2782 fit=PAGE_FIT, 

2783 ).get_object(), 

2784 ) 

2785 else: 

2786 outline_item_typ = self.get_outline_root() 

2787 

2788 _ro = reader.root_object 

2789 if import_outline and CO.OUTLINES in _ro: 

2790 outline = self._get_filtered_outline( 

2791 _ro.get(CO.OUTLINES, None), srcpages, reader 

2792 ) 

2793 self._insert_filtered_outline( 

2794 outline, outline_item_typ, None 

2795 ) # TODO: use before parameter 

2796 

2797 if "/Annots" not in excluded_fields: 

2798 for pag in srcpages.values(): 

2799 lst = self._insert_filtered_annotations( 

2800 pag.original_page.get("/Annots", []), pag, srcpages, reader 

2801 ) 

2802 if len(lst) > 0: 

2803 pag[NameObject("/Annots")] = lst 

2804 self.clean_page(pag) 

2805 

2806 if "/AcroForm" in _ro and not is_null_or_none(_ro["/AcroForm"]): 

2807 if "/AcroForm" not in self._root_object: 

2808 self._root_object[NameObject("/AcroForm")] = self._add_object( 

2809 cast( 

2810 DictionaryObject, 

2811 reader.root_object["/AcroForm"], 

2812 ).clone(self, False, ("/Fields",)) 

2813 ) 

2814 arr = ArrayObject() 

2815 else: 

2816 arr = cast( 

2817 ArrayObject, 

2818 cast(DictionaryObject, self._root_object["/AcroForm"])["/Fields"], 

2819 ) 

2820 trslat = self._id_translated[id(reader)] 

2821 try: 

2822 for f in reader.root_object["/AcroForm"]["/Fields"]: # type: ignore 

2823 try: 

2824 ind = IndirectObject(trslat[f.idnum], 0, self) 

2825 if ind not in arr: 

2826 arr.append(ind) 

2827 except KeyError: 

2828 # for trslat[] which mean the field has not be copied 

2829 # through the page 

2830 pass 

2831 except KeyError: # for /Acroform or /Fields are not existing 

2832 arr = self._add_object(ArrayObject()) 

2833 cast(DictionaryObject, self._root_object["/AcroForm"])[ 

2834 NameObject("/Fields") 

2835 ] = arr 

2836 

2837 if "/B" not in excluded_fields: 

2838 self.add_filtered_articles("", srcpages, reader) 

2839 

2840 def _merge__process_named_dests(self, dest: Any, reader: PdfDocCommon, srcpages: dict[int, PageObject]) -> None: 

2841 arr: Any = dest.dest_array 

2842 if "/Names" in self._root_object and dest["/Title"] in cast( 

2843 list[Any], 

2844 cast( 

2845 DictionaryObject, 

2846 cast(DictionaryObject, self._root_object["/Names"]).get("/Dests", DictionaryObject()), 

2847 ).get("/Names", DictionaryObject()), 

2848 ): 

2849 # already exists: should not duplicate it 

2850 pass 

2851 elif dest["/Page"] is None or isinstance(dest["/Page"], NullObject): 

2852 pass 

2853 elif isinstance(dest["/Page"], int): 

2854 # the page reference is a page number normally not a PDF Reference 

2855 # page numbers as int are normally accepted only in external goto 

2856 try: 

2857 p = reader.pages[dest["/Page"]] 

2858 except IndexError: 

2859 return 

2860 assert p.indirect_reference is not None 

2861 try: 

2862 arr[NumberObject(0)] = NumberObject( 

2863 srcpages[p.indirect_reference.idnum].page_number 

2864 ) 

2865 self.add_named_destination_array(dest["/Title"], arr) 

2866 except KeyError: 

2867 pass 

2868 elif dest["/Page"].indirect_reference.idnum in srcpages: 

2869 arr[NumberObject(0)] = srcpages[ 

2870 dest["/Page"].indirect_reference.idnum 

2871 ].indirect_reference 

2872 self.add_named_destination_array(dest["/Title"], arr) 

2873 

2874 def _add_articles_thread( 

2875 self, 

2876 thread: DictionaryObject, # thread entry from the reader's array of threads 

2877 pages: dict[int, PageObject], 

2878 reader: PdfReader, 

2879 ) -> IndirectObject: 

2880 """ 

2881 Clone the thread with only the applicable articles. 

2882 

2883 Args: 

2884 thread: 

2885 pages: 

2886 reader: 

2887 

2888 Returns: 

2889 The added thread as an indirect reference 

2890 

2891 """ 

2892 nthread = thread.clone( 

2893 self, force_duplicate=True, ignore_fields=("/F",) 

2894 ) # use of clone to keep link between reader and writer 

2895 self.threads.append(nthread.indirect_reference) 

2896 first_article = cast("DictionaryObject", thread["/F"]) 

2897 current_article: Optional[DictionaryObject] = first_article 

2898 new_article: Optional[DictionaryObject] = None 

2899 while current_article is not None: 

2900 pag = self._get_cloned_page( 

2901 cast("PageObject", current_article["/P"]), pages, reader 

2902 ) 

2903 if pag is not None: 

2904 if new_article is None: 

2905 new_article = cast( 

2906 "DictionaryObject", 

2907 self._add_object(DictionaryObject()).get_object(), 

2908 ) 

2909 new_first = new_article 

2910 nthread[NameObject("/F")] = new_article.indirect_reference 

2911 else: 

2912 new_article2 = cast( 

2913 "DictionaryObject", 

2914 self._add_object( 

2915 DictionaryObject( 

2916 {NameObject("/V"): new_article.indirect_reference} 

2917 ) 

2918 ).get_object(), 

2919 ) 

2920 new_article[NameObject("/N")] = new_article2.indirect_reference 

2921 new_article = new_article2 

2922 new_article[NameObject("/P")] = pag 

2923 new_article[NameObject("/T")] = nthread.indirect_reference 

2924 new_article[NameObject("/R")] = current_article["/R"] 

2925 pag_obj = cast("PageObject", pag.get_object()) 

2926 if "/B" not in pag_obj: 

2927 pag_obj[NameObject("/B")] = ArrayObject() 

2928 cast("ArrayObject", pag_obj["/B"]).append( 

2929 new_article.indirect_reference 

2930 ) 

2931 current_article = cast("DictionaryObject", current_article["/N"]) 

2932 if current_article == first_article: 

2933 new_article[NameObject("/N")] = new_first.indirect_reference # type: ignore 

2934 new_first[NameObject("/V")] = new_article.indirect_reference # type: ignore 

2935 current_article = None 

2936 assert nthread.indirect_reference is not None 

2937 return nthread.indirect_reference 

2938 

2939 def add_filtered_articles( 

2940 self, 

2941 fltr: Union[ 

2942 Pattern[Any], str 

2943 ], # thread entry from the reader's array of threads 

2944 pages: dict[int, PageObject], 

2945 reader: PdfReader, 

2946 ) -> None: 

2947 """ 

2948 Add articles matching the defined criteria. 

2949 

2950 Args: 

2951 fltr: 

2952 pages: 

2953 reader: 

2954 

2955 """ 

2956 if isinstance(fltr, str): 

2957 fltr = re.compile(fltr) 

2958 elif not isinstance(fltr, Pattern): 

2959 fltr = re.compile("") 

2960 for p in pages.values(): 

2961 pp = p.original_page 

2962 for a in pp.get("/B", ()): 

2963 a_obj = a.get_object() 

2964 if is_null_or_none(a_obj): 

2965 continue 

2966 thr = a_obj.get("/T") 

2967 if thr is None: 

2968 continue 

2969 thr = thr.get_object() 

2970 if thr.indirect_reference.idnum not in self._id_translated[ 

2971 id(reader) 

2972 ] and fltr.search((thr.get("/I", {})).get("/Title", "")): 

2973 self._add_articles_thread(thr, pages, reader) 

2974 

2975 def _get_cloned_page( 

2976 self, 

2977 page: Union[None, IndirectObject, PageObject, NullObject], 

2978 pages: dict[int, PageObject], 

2979 reader: PdfReader, 

2980 ) -> Optional[IndirectObject]: 

2981 if isinstance(page, NullObject): 

2982 return None 

2983 if isinstance(page, DictionaryObject) and page.get("/Type", "") == "/Page": 

2984 _i = page.indirect_reference 

2985 elif isinstance(page, IndirectObject): 

2986 _i = page 

2987 try: 

2988 return pages[_i.idnum].indirect_reference # type: ignore 

2989 except Exception: 

2990 return None 

2991 

2992 def _insert_filtered_annotations( 

2993 self, 

2994 annots: Union[IndirectObject, list[DictionaryObject], None], 

2995 page: PageObject, 

2996 pages: dict[int, PageObject], 

2997 reader: PdfReader, 

2998 ) -> list[Destination]: 

2999 outlist = ArrayObject() 

3000 if isinstance(annots, IndirectObject): 

3001 annots = cast("list[Any]", annots.get_object()) 

3002 if annots is None: 

3003 return outlist 

3004 if not isinstance(annots, list): 

3005 logger_warning(f"Expected list of annotations, got {annots} of type {annots.__class__.__name__}.", __name__) 

3006 return outlist 

3007 for an in annots: 

3008 ano = cast("DictionaryObject", an.get_object()) 

3009 if ( 

3010 ano["/Subtype"] != "/Link" # type: ignore[comparison-overlap] 

3011 or "/A" not in ano 

3012 or cast("DictionaryObject", ano["/A"])["/S"] != "/GoTo" # type: ignore[comparison-overlap] 

3013 or "/Dest" in ano 

3014 ): 

3015 if "/Dest" not in ano: 

3016 outlist.append(self._add_object(ano.clone(self))) 

3017 else: 

3018 d = ano["/Dest"] 

3019 if isinstance(d, str): 

3020 # it is a named dest 

3021 if str(d) in self.get_named_dest_root(): 

3022 outlist.append(ano.clone(self).indirect_reference) 

3023 else: 

3024 d = cast("ArrayObject", d) 

3025 p = self._get_cloned_page(d[0], pages, reader) 

3026 if p is not None: 

3027 anc = ano.clone(self, ignore_fields=("/Dest",)) 

3028 anc[NameObject("/Dest")] = ArrayObject([p, *d[1:]]) 

3029 outlist.append(self._add_object(anc)) 

3030 else: 

3031 d = cast("DictionaryObject", ano["/A"]).get("/D", NullObject()) 

3032 if is_null_or_none(d): 

3033 continue 

3034 if isinstance(d, str): 

3035 # it is a named dest 

3036 if str(d) in self.get_named_dest_root(): 

3037 outlist.append(ano.clone(self).indirect_reference) 

3038 else: 

3039 d = cast("ArrayObject", d) 

3040 p = self._get_cloned_page(d[0], pages, reader) 

3041 if p is not None: 

3042 anc = ano.clone(self, ignore_fields=("/D",)) 

3043 cast("DictionaryObject", anc["/A"])[ 

3044 NameObject("/D") 

3045 ] = ArrayObject([p, *d[1:]]) 

3046 outlist.append(self._add_object(anc)) 

3047 return outlist 

3048 

3049 def _get_filtered_outline( 

3050 self, 

3051 node: Any, 

3052 pages: dict[int, PageObject], 

3053 reader: PdfReader, 

3054 ) -> list[Destination]: 

3055 """ 

3056 Extract outline item entries that are part of the specified page set. 

3057 

3058 Args: 

3059 node: 

3060 pages: 

3061 reader: 

3062 

3063 Returns: 

3064 A list of destination objects. 

3065 

3066 """ 

3067 new_outline = [] 

3068 if node is None: 

3069 node = NullObject() 

3070 node = node.get_object() 

3071 if is_null_or_none(node): 

3072 node = DictionaryObject() 

3073 if node.get("/Type", "") == "/Outlines" or "/Title" not in node: 

3074 node = node.get("/First", None) 

3075 if node is not None: 

3076 node = node.get_object() 

3077 new_outline += self._get_filtered_outline(node, pages, reader) 

3078 else: 

3079 v: Union[None, IndirectObject, NullObject] 

3080 while node is not None: 

3081 node = node.get_object() 

3082 o = cast("Destination", reader._build_outline_item(node)) 

3083 v = self._get_cloned_page(cast("PageObject", o["/Page"]), pages, reader) 

3084 if v is None: 

3085 v = NullObject() 

3086 o[NameObject("/Page")] = v 

3087 if "/First" in node: 

3088 o._filtered_children = self._get_filtered_outline( 

3089 node["/First"], pages, reader 

3090 ) 

3091 else: 

3092 o._filtered_children = [] 

3093 if ( 

3094 not isinstance(o["/Page"], NullObject) 

3095 or len(o._filtered_children) > 0 

3096 ): 

3097 new_outline.append(o) 

3098 node = node.get("/Next", None) 

3099 return new_outline 

3100 

3101 def _clone_outline(self, dest: Destination) -> TreeObject: 

3102 n_ol = TreeObject() 

3103 self._add_object(n_ol) 

3104 n_ol[NameObject("/Title")] = TextStringObject(dest["/Title"]) 

3105 if not isinstance(dest["/Page"], NullObject): 

3106 if dest.node is not None and "/A" in dest.node: 

3107 n_ol[NameObject("/A")] = dest.node["/A"].clone(self) 

3108 else: 

3109 n_ol[NameObject("/Dest")] = dest.dest_array 

3110 # TODO: /SE 

3111 if dest.node is not None: 

3112 n_ol[NameObject("/F")] = NumberObject(dest.node.get("/F", 0)) 

3113 n_ol[NameObject("/C")] = ArrayObject( 

3114 dest.node.get( 

3115 "/C", [FloatObject(0.0), FloatObject(0.0), FloatObject(0.0)] 

3116 ) 

3117 ) 

3118 return n_ol 

3119 

3120 def _insert_filtered_outline( 

3121 self, 

3122 outlines: list[Destination], 

3123 parent: Union[TreeObject, IndirectObject], 

3124 before: Union[None, TreeObject, IndirectObject] = None, 

3125 ) -> None: 

3126 for dest in outlines: 

3127 # TODO: can be improved to keep A and SE entries (ignored for the moment) 

3128 # with np=self.add_outline_item_destination(dest,parent,before) 

3129 if dest.get("/Type", "") == "/Outlines" or "/Title" not in dest: 

3130 np = parent 

3131 else: 

3132 np = self._clone_outline(dest) 

3133 cast(TreeObject, parent.get_object()).insert_child(np, before, self) 

3134 self._insert_filtered_outline(dest._filtered_children, np, None) 

3135 

3136 def close(self) -> None: 

3137 """Implemented for API harmonization.""" 

3138 return 

3139 

3140 def find_outline_item( 

3141 self, 

3142 outline_item: dict[str, Any], 

3143 root: Optional[OutlineType] = None, 

3144 ) -> Optional[list[int]]: 

3145 if root is None: 

3146 o = self.get_outline_root() 

3147 else: 

3148 o = cast("TreeObject", root) 

3149 

3150 i = 0 

3151 while o is not None: 

3152 if ( 

3153 o.indirect_reference == outline_item 

3154 or o.get("/Title", None) == outline_item 

3155 ): 

3156 return [i] 

3157 if "/First" in o: 

3158 res = self.find_outline_item( 

3159 outline_item, cast(OutlineType, o["/First"]) 

3160 ) 

3161 if res: 

3162 return ([i] if "/Title" in o else []) + res 

3163 if "/Next" in o: 

3164 i += 1 

3165 o = cast(TreeObject, o["/Next"]) 

3166 else: 

3167 return None 

3168 raise PyPdfError("This line is theoretically unreachable.") # pragma: no cover 

3169 

3170 def reset_translation( 

3171 self, reader: Union[None, PdfReader, IndirectObject] = None 

3172 ) -> None: 

3173 """ 

3174 Reset the translation table between reader and the writer object. 

3175 

3176 Late cloning will create new independent objects. 

3177 

3178 Args: 

3179 reader: PdfReader or IndirectObject referencing a PdfReader object. 

3180 if set to None or omitted, all tables will be reset. 

3181 

3182 """ 

3183 if reader is None: 

3184 self._id_translated = {} 

3185 elif isinstance(reader, PdfReader): 

3186 try: 

3187 del self._id_translated[id(reader)] 

3188 except Exception: 

3189 pass 

3190 elif isinstance(reader, IndirectObject): 

3191 try: 

3192 del self._id_translated[id(reader.pdf)] 

3193 except Exception: 

3194 pass 

3195 else: 

3196 raise Exception("invalid parameter {reader}") 

3197 

3198 def set_page_label( 

3199 self, 

3200 page_index_from: int, 

3201 page_index_to: int, 

3202 style: Optional[PageLabelStyle] = None, 

3203 prefix: Optional[str] = None, 

3204 start: Optional[int] = 0, 

3205 ) -> None: 

3206 """ 

3207 Set a page label to a range of pages. 

3208 

3209 Page indexes must be given starting from 0. 

3210 Labels must have a style, a prefix or both. 

3211 If a range is not assigned any page label, a decimal label starting from 1 is applied. 

3212 

3213 Args: 

3214 page_index_from: page index of the beginning of the range starting from 0 

3215 page_index_to: page index of the beginning of the range starting from 0 

3216 style: The numbering style to be used for the numeric portion of each page label: 

3217 

3218 * ``/D`` Decimal Arabic numerals 

3219 * ``/R`` Uppercase Roman numerals 

3220 * ``/r`` Lowercase Roman numerals 

3221 * ``/A`` Uppercase letters (A to Z for the first 26 pages, 

3222 AA to ZZ for the next 26, and so on) 

3223 * ``/a`` Lowercase letters (a to z for the first 26 pages, 

3224 aa to zz for the next 26, and so on) 

3225 

3226 prefix: The label prefix for page labels in this range. 

3227 start: The value of the numeric portion for the first page label 

3228 in the range. 

3229 Subsequent pages are numbered sequentially from this value, 

3230 which must be greater than or equal to 1. 

3231 Default value: 1. 

3232 

3233 """ 

3234 if style is None and prefix is None: 

3235 raise ValueError("At least one of style and prefix must be given") 

3236 if page_index_from < 0: 

3237 raise ValueError("page_index_from must be greater or equal than 0") 

3238 if page_index_to < page_index_from: 

3239 raise ValueError( 

3240 "page_index_to must be greater or equal than page_index_from" 

3241 ) 

3242 if page_index_to >= len(self.pages): 

3243 raise ValueError("page_index_to exceeds number of pages") 

3244 if start is not None and start != 0 and start < 1: 

3245 raise ValueError("If given, start must be greater or equal than one") 

3246 

3247 self._set_page_label(page_index_from, page_index_to, style, prefix, start) 

3248 

3249 def _set_page_label( 

3250 self, 

3251 page_index_from: int, 

3252 page_index_to: int, 

3253 style: Optional[PageLabelStyle] = None, 

3254 prefix: Optional[str] = None, 

3255 start: Optional[int] = 0, 

3256 ) -> None: 

3257 """ 

3258 Set a page label to a range of pages. 

3259 

3260 Page indexes must be given starting from 0. 

3261 Labels must have a style, a prefix or both. 

3262 If a range is not assigned any page label a decimal label starting from 1 is applied. 

3263 

3264 Args: 

3265 page_index_from: page index of the beginning of the range starting from 0 

3266 page_index_to: page index of the beginning of the range starting from 0 

3267 style: The numbering style to be used for the numeric portion of each page label: 

3268 /D Decimal Arabic numerals 

3269 /R Uppercase Roman numerals 

3270 /r Lowercase Roman numerals 

3271 /A Uppercase letters (A to Z for the first 26 pages, 

3272 AA to ZZ for the next 26, and so on) 

3273 /a Lowercase letters (a to z for the first 26 pages, 

3274 aa to zz for the next 26, and so on) 

3275 prefix: The label prefix for page labels in this range. 

3276 start: The value of the numeric portion for the first page label 

3277 in the range. 

3278 Subsequent pages are numbered sequentially from this value, 

3279 which must be greater than or equal to 1. Default value: 1. 

3280 

3281 """ 

3282 default_page_label = DictionaryObject() 

3283 default_page_label[NameObject("/S")] = NameObject("/D") 

3284 

3285 new_page_label = DictionaryObject() 

3286 if style is not None: 

3287 new_page_label[NameObject("/S")] = NameObject(style) 

3288 if prefix is not None: 

3289 new_page_label[NameObject("/P")] = TextStringObject(prefix) 

3290 if start != 0: 

3291 new_page_label[NameObject("/St")] = NumberObject(start) 

3292 

3293 if NameObject(CatalogDictionary.PAGE_LABELS) not in self._root_object: 

3294 nums = ArrayObject() 

3295 nums_insert(NumberObject(0), default_page_label, nums) 

3296 page_labels = TreeObject() 

3297 page_labels[NameObject("/Nums")] = nums 

3298 self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] = page_labels 

3299 

3300 page_labels = cast( 

3301 TreeObject, self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] 

3302 ) 

3303 nums = cast(ArrayObject, page_labels[NameObject("/Nums")]) 

3304 

3305 nums_insert(NumberObject(page_index_from), new_page_label, nums) 

3306 nums_clear_range(NumberObject(page_index_from), page_index_to, nums) 

3307 next_label_pos, *_ = nums_next(NumberObject(page_index_from), nums) 

3308 if next_label_pos != page_index_to + 1 and page_index_to + 1 < len(self.pages): 

3309 nums_insert(NumberObject(page_index_to + 1), default_page_label, nums) 

3310 

3311 page_labels[NameObject("/Nums")] = nums 

3312 self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] = page_labels 

3313 

3314 def _repr_mimebundle_( 

3315 self, 

3316 include: Union[None, Iterable[str]] = None, 

3317 exclude: Union[None, Iterable[str]] = None, 

3318 ) -> dict[str, Any]: 

3319 """ 

3320 Integration into Jupyter Notebooks. 

3321 

3322 This method returns a dictionary that maps a mime-type to its 

3323 representation. 

3324 

3325 .. seealso:: 

3326 

3327 https://ipython.readthedocs.io/en/stable/config/integrating.html 

3328 """ 

3329 pdf_data = BytesIO() 

3330 self.write(pdf_data) 

3331 data = { 

3332 "application/pdf": pdf_data, 

3333 } 

3334 

3335 if include is not None: 

3336 # Filter representations based on include list 

3337 data = {k: v for k, v in data.items() if k in include} 

3338 

3339 if exclude is not None: 

3340 # Remove representations based on exclude list 

3341 data = {k: v for k, v in data.items() if k not in exclude} 

3342 

3343 return data 

3344 

3345 

3346def _pdf_objectify(obj: Union[dict[str, Any], str, float, list[Any]]) -> PdfObject: 

3347 if isinstance(obj, PdfObject): 

3348 return obj 

3349 if isinstance(obj, dict): 

3350 to_add = DictionaryObject() 

3351 for key, value in obj.items(): 

3352 to_add[NameObject(key)] = _pdf_objectify(value) 

3353 return to_add 

3354 if isinstance(obj, str): 

3355 if obj.startswith("/"): 

3356 return NameObject(obj) 

3357 return TextStringObject(obj) 

3358 if isinstance(obj, (float, int)): 

3359 return FloatObject(obj) 

3360 if isinstance(obj, list): 

3361 return ArrayObject(_pdf_objectify(i) for i in obj) 

3362 raise NotImplementedError( 

3363 f"{type(obj)=} could not be cast to a PdfObject" 

3364 ) 

3365 

3366 

3367def _create_outline_item( 

3368 action_ref: Union[None, IndirectObject], 

3369 title: str, 

3370 color: Union[tuple[float, float, float], str, None], 

3371 italic: bool, 

3372 bold: bool, 

3373) -> TreeObject: 

3374 outline_item = TreeObject() 

3375 if action_ref is not None: 

3376 outline_item[NameObject("/A")] = action_ref 

3377 outline_item.update( 

3378 { 

3379 NameObject("/Title"): create_string_object(title), 

3380 } 

3381 ) 

3382 if color: 

3383 if isinstance(color, str): 

3384 color = hex_to_rgb(color) 

3385 outline_item.update( 

3386 {NameObject("/C"): ArrayObject([FloatObject(c) for c in color])} 

3387 ) 

3388 if italic or bold: 

3389 format_flag = 0 

3390 if italic: 

3391 format_flag += OutlineFontFlag.italic 

3392 if bold: 

3393 format_flag += OutlineFontFlag.bold 

3394 outline_item.update({NameObject("/F"): NumberObject(format_flag)}) 

3395 return outline_item