Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/_writer.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1473 statements  

1# Copyright (c) 2006, Mathieu Fenniak 

2# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com> 

3# 

4# All rights reserved. 

5# 

6# Redistribution and use in source and binary forms, with or without 

7# modification, are permitted provided that the following conditions are 

8# met: 

9# 

10# * Redistributions of source code must retain the above copyright notice, 

11# this list of conditions and the following disclaimer. 

12# * Redistributions in binary form must reproduce the above copyright notice, 

13# this list of conditions and the following disclaimer in the documentation 

14# and/or other materials provided with the distribution. 

15# * The name of the author may not be used to endorse or promote products 

16# derived from this software without specific prior written permission. 

17# 

18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 

19# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 

20# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 

21# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 

22# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 

23# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 

24# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 

25# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 

26# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 

27# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 

28# POSSIBILITY OF SUCH DAMAGE. 

29 

30import decimal 

31import enum 

32import hashlib 

33import re 

34import struct 

35import uuid 

36from collections.abc import Iterable, Mapping 

37from io import BytesIO, FileIO, IOBase 

38from itertools import compress 

39from pathlib import Path 

40from re import Pattern 

41from types import TracebackType 

42from typing import ( 

43 IO, 

44 Any, 

45 Callable, 

46 Optional, 

47 Union, 

48 cast, 

49) 

50 

51from ._cmap import _default_fonts_space_width, build_char_map_from_dict 

52from ._doc_common import DocumentInformation, PdfDocCommon 

53from ._encryption import EncryptAlgorithm, Encryption 

54from ._page import PageObject, Transformation 

55from ._page_labels import nums_clear_range, nums_insert, nums_next 

56from ._reader import PdfReader 

57from ._utils import ( 

58 StrByteType, 

59 StreamType, 

60 _get_max_pdf_version_header, 

61 deprecation_no_replacement, 

62 logger_warning, 

63) 

64from .constants import AnnotationDictionaryAttributes as AA 

65from .constants import CatalogAttributes as CA 

66from .constants import ( 

67 CatalogDictionary, 

68 GoToActionArguments, 

69 ImageType, 

70 InteractiveFormDictEntries, 

71 OutlineFontFlag, 

72 PageLabelStyle, 

73 PagesAttributes, 

74 TypFitArguments, 

75 UserAccessPermissions, 

76) 

77from .constants import Core as CO 

78from .constants import FieldDictionaryAttributes as FA 

79from .constants import PageAttributes as PG 

80from .constants import TrailerKeys as TK 

81from .errors import PyPdfError 

82from .generic import ( 

83 PAGE_FIT, 

84 ArrayObject, 

85 BooleanObject, 

86 ByteStringObject, 

87 ContentStream, 

88 DecodedStreamObject, 

89 Destination, 

90 DictionaryObject, 

91 EmbeddedFile, 

92 Fit, 

93 FloatObject, 

94 IndirectObject, 

95 NameObject, 

96 NullObject, 

97 NumberObject, 

98 PdfObject, 

99 RectangleObject, 

100 ReferenceLink, 

101 StreamObject, 

102 TextStringObject, 

103 TreeObject, 

104 ViewerPreferences, 

105 create_string_object, 

106 extract_links, 

107 hex_to_rgb, 

108 is_null_or_none, 

109) 

110from .pagerange import PageRange, PageRangeSpec 

111from .types import ( 

112 AnnotationSubtype, 

113 BorderArrayType, 

114 LayoutType, 

115 OutlineItemType, 

116 OutlineType, 

117 PagemodeType, 

118) 

119from .xmp import XmpInformation 

120 

121ALL_DOCUMENT_PERMISSIONS = UserAccessPermissions.all() 

122DEFAULT_FONT_HEIGHT_IN_MULTILINE = 12 

123 

124 

125class ObjectDeletionFlag(enum.IntFlag): 

126 NONE = 0 

127 TEXT = enum.auto() 

128 LINKS = enum.auto() 

129 ATTACHMENTS = enum.auto() 

130 OBJECTS_3D = enum.auto() 

131 ALL_ANNOTATIONS = enum.auto() 

132 XOBJECT_IMAGES = enum.auto() 

133 INLINE_IMAGES = enum.auto() 

134 DRAWING_IMAGES = enum.auto() 

135 IMAGES = XOBJECT_IMAGES | INLINE_IMAGES | DRAWING_IMAGES 

136 

137 

138def _rolling_checksum(stream: BytesIO, blocksize: int = 65536) -> str: 

139 hash = hashlib.md5(usedforsecurity=False) 

140 for block in iter(lambda: stream.read(blocksize), b""): 

141 hash.update(block) 

142 return hash.hexdigest() 

143 

144 

145class PdfWriter(PdfDocCommon): 

146 """ 

147 Write a PDF file out, given pages produced by another class or through 

148 cloning a PDF file during initialization. 

149 

150 Typically data is added from a :class:`PdfReader<pypdf.PdfReader>`. 

151 

152 Args: 

153 clone_from: identical to fileobj (for compatibility) 

154 

155 incremental: If true, loads the document and set the PdfWriter in incremental mode. 

156 

157 When writing incrementally, the original document is written first and new/modified 

158 content is appended. To be used for signed document/forms to keep signature valid. 

159 

160 full: If true, loads all the objects (always full if incremental = True). 

161 This parameter may allow loading large PDFs. 

162 

163 """ 

164 

165 def __init__( 

166 self, 

167 fileobj: Union[None, PdfReader, StrByteType, Path] = "", 

168 clone_from: Union[None, PdfReader, StrByteType, Path] = None, 

169 incremental: bool = False, 

170 full: bool = False, 

171 ) -> None: 

172 self.incremental = incremental or full 

173 """ 

174 Returns if the PdfWriter object has been started in incremental mode. 

175 """ 

176 

177 self._objects: list[Optional[PdfObject]] = [] 

178 """ 

179 The indirect objects in the PDF. 

180 For the incremental case, it will be filled with None 

181 in clone_reader_document_root. 

182 """ 

183 

184 self._original_hash: list[int] = [] 

185 """ 

186 List of hashes after import; used to identify changes. 

187 """ 

188 

189 self._idnum_hash: dict[bytes, tuple[IndirectObject, list[IndirectObject]]] = {} 

190 """ 

191 Maps hash values of indirect objects to the list of IndirectObjects. 

192 This is used for compression. 

193 """ 

194 

195 self._id_translated: dict[int, dict[int, int]] = {} 

196 """List of already translated IDs. 

197 dict[id(pdf)][(idnum, generation)] 

198 """ 

199 

200 self._info_obj: Optional[PdfObject] 

201 """The PDF files's document information dictionary, 

202 the Info entry in the PDF file's trailer dictionary.""" 

203 

204 self._ID: Union[ArrayObject, None] = None 

205 """The PDF file identifier, 

206 defined by the ID in the PDF file's trailer dictionary.""" 

207 

208 self._unresolved_links: list[tuple[ReferenceLink, ReferenceLink]] = [] 

209 "Tracks links in pages added to the writer for resolving later." 

210 self._merged_in_pages: dict[Optional[IndirectObject], Optional[IndirectObject]] = {} 

211 "Tracks pages added to the writer and what page they turned into." 

212 

213 if self.incremental: 

214 if isinstance(fileobj, (str, Path)): 

215 with open(fileobj, "rb") as f: 

216 fileobj = BytesIO(f.read(-1)) 

217 if isinstance(fileobj, BytesIO): 

218 fileobj = PdfReader(fileobj) 

219 if not isinstance(fileobj, PdfReader): 

220 raise PyPdfError("Invalid type for incremental mode") 

221 self._reader = fileobj # prev content is in _reader.stream 

222 self._header = fileobj.pdf_header.encode() 

223 self._readonly = True # TODO: to be analysed 

224 else: 

225 self._header = b"%PDF-1.3" 

226 self._info_obj = self._add_object( 

227 DictionaryObject( 

228 {NameObject("/Producer"): create_string_object("pypdf")} 

229 ) 

230 ) 

231 

232 def _get_clone_from( 

233 fileobj: Union[None, PdfReader, str, Path, IO[Any], BytesIO], 

234 clone_from: Union[None, PdfReader, str, Path, IO[Any], BytesIO], 

235 ) -> Union[None, PdfReader, str, Path, IO[Any], BytesIO]: 

236 if isinstance(fileobj, (str, Path, IO, BytesIO)) and ( 

237 fileobj == "" or clone_from is not None 

238 ): 

239 return clone_from 

240 cloning = True 

241 if isinstance(fileobj, (str, Path)) and ( 

242 not Path(str(fileobj)).exists() 

243 or Path(str(fileobj)).stat().st_size == 0 

244 ): 

245 cloning = False 

246 if isinstance(fileobj, (IOBase, BytesIO)): 

247 t = fileobj.tell() 

248 if fileobj.seek(0, 2) == 0: 

249 cloning = False 

250 fileobj.seek(t, 0) 

251 if cloning: 

252 clone_from = fileobj 

253 return clone_from 

254 

255 clone_from = _get_clone_from(fileobj, clone_from) 

256 # To prevent overwriting 

257 self.temp_fileobj = fileobj 

258 self.fileobj = "" 

259 self._with_as_usage = False 

260 self._cloned = False 

261 # The root of our page tree node 

262 pages = DictionaryObject( 

263 { 

264 NameObject(PagesAttributes.TYPE): NameObject("/Pages"), 

265 NameObject(PagesAttributes.COUNT): NumberObject(0), 

266 NameObject(PagesAttributes.KIDS): ArrayObject(), 

267 } 

268 ) 

269 self.flattened_pages = [] 

270 self._encryption: Optional[Encryption] = None 

271 self._encrypt_entry: Optional[DictionaryObject] = None 

272 

273 if clone_from is not None: 

274 if not isinstance(clone_from, PdfReader): 

275 clone_from = PdfReader(clone_from) 

276 self.clone_document_from_reader(clone_from) 

277 self._cloned = True 

278 else: 

279 self._pages = self._add_object(pages) 

280 self._root_object = DictionaryObject( 

281 { 

282 NameObject(PagesAttributes.TYPE): NameObject(CO.CATALOG), 

283 NameObject(CO.PAGES): self._pages, 

284 } 

285 ) 

286 self._add_object(self._root_object) 

287 if full and not incremental: 

288 self.incremental = False 

289 if isinstance(self._ID, list): 

290 if isinstance(self._ID[0], TextStringObject): 

291 self._ID[0] = ByteStringObject(self._ID[0].get_original_bytes()) 

292 if isinstance(self._ID[1], TextStringObject): 

293 self._ID[1] = ByteStringObject(self._ID[1].get_original_bytes()) 

294 

295 # for commonality 

296 @property 

297 def is_encrypted(self) -> bool: 

298 """ 

299 Read-only boolean property showing whether this PDF file is encrypted. 

300 

301 Note that this property, if true, will remain true even after the 

302 :meth:`decrypt()<pypdf.PdfReader.decrypt>` method is called. 

303 """ 

304 return False 

305 

306 @property 

307 def root_object(self) -> DictionaryObject: 

308 """ 

309 Provide direct access to PDF Structure. 

310 

311 Note: 

312 Recommended only for read access. 

313 

314 """ 

315 return self._root_object 

316 

317 @property 

318 def _info(self) -> Optional[DictionaryObject]: 

319 """ 

320 Provide access to "/Info". Standardized with PdfReader. 

321 

322 Returns: 

323 /Info Dictionary; None if the entry does not exist 

324 

325 """ 

326 return ( 

327 None 

328 if self._info_obj is None 

329 else cast(DictionaryObject, self._info_obj.get_object()) 

330 ) 

331 

332 @_info.setter 

333 def _info(self, value: Optional[Union[IndirectObject, DictionaryObject]]) -> None: 

334 if value is None: 

335 try: 

336 self._objects[self._info_obj.indirect_reference.idnum - 1] = None # type: ignore 

337 except (KeyError, AttributeError): 

338 pass 

339 self._info_obj = None 

340 else: 

341 if self._info_obj is None: 

342 self._info_obj = self._add_object(DictionaryObject()) 

343 obj = cast(DictionaryObject, self._info_obj.get_object()) 

344 obj.clear() 

345 obj.update(cast(DictionaryObject, value.get_object())) 

346 

347 @property 

348 def xmp_metadata(self) -> Optional[XmpInformation]: 

349 """XMP (Extensible Metadata Platform) data.""" 

350 return cast(XmpInformation, self.root_object.xmp_metadata) 

351 

352 @xmp_metadata.setter 

353 def xmp_metadata(self, value: Union[XmpInformation, bytes, None]) -> None: 

354 """XMP (Extensible Metadata Platform) data.""" 

355 if value is None: 

356 if "/Metadata" in self.root_object: 

357 del self.root_object["/Metadata"] 

358 return 

359 

360 metadata = self.root_object.get("/Metadata", None) 

361 if not isinstance(metadata, IndirectObject): 

362 if metadata is not None: 

363 del self.root_object["/Metadata"] 

364 metadata_stream = StreamObject() 

365 stream_reference = self._add_object(metadata_stream) 

366 self.root_object[NameObject("/Metadata")] = stream_reference 

367 else: 

368 metadata_stream = cast(StreamObject, metadata.get_object()) 

369 

370 if isinstance(value, XmpInformation): 

371 bytes_data = value.stream.get_data() 

372 else: 

373 bytes_data = value 

374 metadata_stream.set_data(bytes_data) 

375 

376 @property 

377 def with_as_usage(self) -> bool: 

378 deprecation_no_replacement("with_as_usage", "5.0") 

379 return self._with_as_usage 

380 

381 @with_as_usage.setter 

382 def with_as_usage(self, value: bool) -> None: 

383 deprecation_no_replacement("with_as_usage", "5.0") 

384 self._with_as_usage = value 

385 

386 def __enter__(self) -> "PdfWriter": 

387 """Store how writer is initialized by 'with'.""" 

388 c: bool = self._cloned 

389 t = self.temp_fileobj 

390 self.__init__() # type: ignore 

391 self._cloned = c 

392 self._with_as_usage = True 

393 self.fileobj = t # type: ignore 

394 return self 

395 

396 def __exit__( 

397 self, 

398 exc_type: Optional[type[BaseException]], 

399 exc: Optional[BaseException], 

400 traceback: Optional[TracebackType], 

401 ) -> None: 

402 """Write data to the fileobj.""" 

403 if self.fileobj and not self._cloned: 

404 self.write(self.fileobj) 

405 

406 @property 

407 def pdf_header(self) -> str: 

408 """ 

409 Read/Write property of the PDF header that is written. 

410 

411 This should be something like ``'%PDF-1.5'``. It is recommended to set 

412 the lowest version that supports all features which are used within the 

413 PDF file. 

414 

415 Note: `pdf_header` returns a string but accepts bytes or str for writing 

416 """ 

417 return self._header.decode() 

418 

419 @pdf_header.setter 

420 def pdf_header(self, new_header: Union[str, bytes]) -> None: 

421 if isinstance(new_header, str): 

422 new_header = new_header.encode() 

423 self._header = new_header 

424 

425 def _add_object(self, obj: PdfObject) -> IndirectObject: 

426 if ( 

427 getattr(obj, "indirect_reference", None) is not None 

428 and obj.indirect_reference.pdf == self # type: ignore 

429 ): 

430 return obj.indirect_reference # type: ignore 

431 # check for /Contents in Pages (/Contents in annotations are strings) 

432 if isinstance(obj, DictionaryObject) and isinstance( 

433 obj.get(PG.CONTENTS, None), (ArrayObject, DictionaryObject) 

434 ): 

435 obj[NameObject(PG.CONTENTS)] = self._add_object(obj[PG.CONTENTS]) 

436 self._objects.append(obj) 

437 obj.indirect_reference = IndirectObject(len(self._objects), 0, self) 

438 return obj.indirect_reference 

439 

440 def get_object( 

441 self, 

442 indirect_reference: Union[int, IndirectObject], 

443 ) -> PdfObject: 

444 if isinstance(indirect_reference, int): 

445 obj = self._objects[indirect_reference - 1] 

446 elif indirect_reference.pdf != self: 

447 raise ValueError("PDF must be self") 

448 else: 

449 obj = self._objects[indirect_reference.idnum - 1] 

450 assert obj is not None, "mypy" 

451 return obj 

452 

453 def _replace_object( 

454 self, 

455 indirect_reference: Union[int, IndirectObject], 

456 obj: PdfObject, 

457 ) -> PdfObject: 

458 if isinstance(indirect_reference, IndirectObject): 

459 if indirect_reference.pdf != self: 

460 raise ValueError("PDF must be self") 

461 indirect_reference = indirect_reference.idnum 

462 gen = self._objects[indirect_reference - 1].indirect_reference.generation # type: ignore 

463 if ( 

464 getattr(obj, "indirect_reference", None) is not None 

465 and obj.indirect_reference.pdf != self # type: ignore 

466 ): 

467 obj = obj.clone(self) 

468 self._objects[indirect_reference - 1] = obj 

469 obj.indirect_reference = IndirectObject(indirect_reference, gen, self) 

470 

471 assert isinstance(obj, PdfObject), "mypy" 

472 return obj 

473 

474 def _add_page( 

475 self, 

476 page: PageObject, 

477 index: int, 

478 excluded_keys: Iterable[str] = (), 

479 ) -> PageObject: 

480 if not isinstance(page, PageObject) or page.get(PagesAttributes.TYPE, None) != CO.PAGE: 

481 raise ValueError("Invalid page object") 

482 assert self.flattened_pages is not None, "for mypy" 

483 page_org = page 

484 excluded_keys = list(excluded_keys) 

485 excluded_keys += [PagesAttributes.PARENT, "/StructParents"] 

486 # Acrobat does not accept two indirect references pointing on the same 

487 # page; therefore in order to add multiple copies of the same 

488 # page, we need to create a new dictionary for the page, however the 

489 # objects below (including content) are not duplicated: 

490 try: # delete an already existing page 

491 del self._id_translated[id(page_org.indirect_reference.pdf)][ # type: ignore 

492 page_org.indirect_reference.idnum # type: ignore 

493 ] 

494 except Exception: 

495 pass 

496 

497 page = cast( 

498 "PageObject", page_org.clone(self, False, excluded_keys).get_object() 

499 ) 

500 if page_org.pdf is not None: 

501 other = page_org.pdf.pdf_header 

502 self.pdf_header = _get_max_pdf_version_header(self.pdf_header, other) 

503 

504 node, idx = self._get_page_in_node(index) 

505 page[NameObject(PagesAttributes.PARENT)] = node.indirect_reference 

506 

507 if idx >= 0: 

508 cast(ArrayObject, node[PagesAttributes.KIDS]).insert(idx, page.indirect_reference) 

509 self.flattened_pages.insert(index, page) 

510 else: 

511 cast(ArrayObject, node[PagesAttributes.KIDS]).append(page.indirect_reference) 

512 self.flattened_pages.append(page) 

513 recurse = 0 

514 while not is_null_or_none(node): 

515 node = cast(DictionaryObject, node.get_object()) 

516 node[NameObject(PagesAttributes.COUNT)] = NumberObject(cast(int, node[PagesAttributes.COUNT]) + 1) 

517 node = node.get(PagesAttributes.PARENT, None) # type: ignore[assignment] # TODO: Fix. 

518 recurse += 1 

519 if recurse > 1000: 

520 raise PyPdfError("Too many recursive calls!") 

521 

522 if page_org.pdf is not None: 

523 # the page may contain links to other pages, and those other 

524 # pages may or may not already be added. we store the 

525 # information we need, so that we can resolve the references 

526 # later. 

527 self._unresolved_links.extend(extract_links(page, page_org)) 

528 self._merged_in_pages[page_org.indirect_reference] = page.indirect_reference 

529 

530 return page 

531 

532 def set_need_appearances_writer(self, state: bool = True) -> None: 

533 """ 

534 Sets the "NeedAppearances" flag in the PDF writer. 

535 

536 The "NeedAppearances" flag indicates whether the appearance dictionary 

537 for form fields should be automatically generated by the PDF viewer or 

538 if the embedded appearance should be used. 

539 

540 Args: 

541 state: The actual value of the NeedAppearances flag. 

542 

543 Returns: 

544 None 

545 

546 """ 

547 # See §12.7.2 and §7.7.2 for more information: 

548 # https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf 

549 try: 

550 # get the AcroForm tree 

551 if CatalogDictionary.ACRO_FORM not in self._root_object: 

552 self._root_object[ 

553 NameObject(CatalogDictionary.ACRO_FORM) 

554 ] = self._add_object(DictionaryObject()) 

555 

556 need_appearances = NameObject(InteractiveFormDictEntries.NeedAppearances) 

557 cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM])[ 

558 need_appearances 

559 ] = BooleanObject(state) 

560 except Exception as exc: # pragma: no cover 

561 logger_warning( 

562 f"set_need_appearances_writer({state}) catch : {exc}", __name__ 

563 ) 

564 

565 def create_viewer_preferences(self) -> ViewerPreferences: 

566 o = ViewerPreferences() 

567 self._root_object[ 

568 NameObject(CatalogDictionary.VIEWER_PREFERENCES) 

569 ] = self._add_object(o) 

570 return o 

571 

572 def add_page( 

573 self, 

574 page: PageObject, 

575 excluded_keys: Iterable[str] = (), 

576 ) -> PageObject: 

577 """ 

578 Add a page to this PDF file. 

579 

580 Recommended for advanced usage including the adequate excluded_keys. 

581 

582 The page is usually acquired from a :class:`PdfReader<pypdf.PdfReader>` 

583 instance. 

584 

585 Args: 

586 page: The page to add to the document. Should be 

587 an instance of :class:`PageObject<pypdf._page.PageObject>` 

588 excluded_keys: 

589 

590 Returns: 

591 The added PageObject. 

592 

593 """ 

594 assert self.flattened_pages is not None, "mypy" 

595 return self._add_page(page, len(self.flattened_pages), excluded_keys) 

596 

597 def insert_page( 

598 self, 

599 page: PageObject, 

600 index: int = 0, 

601 excluded_keys: Iterable[str] = (), 

602 ) -> PageObject: 

603 """ 

604 Insert a page in this PDF file. The page is usually acquired from a 

605 :class:`PdfReader<pypdf.PdfReader>` instance. 

606 

607 Args: 

608 page: The page to add to the document. 

609 index: Position at which the page will be inserted. 

610 excluded_keys: 

611 

612 Returns: 

613 The added PageObject. 

614 

615 """ 

616 assert self.flattened_pages is not None, "mypy" 

617 if index < 0: 

618 index = len(self.flattened_pages) + index 

619 if index < 0: 

620 raise ValueError("Invalid index value") 

621 if index >= len(self.flattened_pages): 

622 return self.add_page(page, excluded_keys) 

623 return self._add_page(page, index, excluded_keys) 

624 

625 def _get_page_number_by_indirect( 

626 self, indirect_reference: Union[None, int, NullObject, IndirectObject] 

627 ) -> Optional[int]: 

628 """ 

629 Generate _page_id2num. 

630 

631 Args: 

632 indirect_reference: 

633 

634 Returns: 

635 The page number or None 

636 

637 """ 

638 # To provide same function as in PdfReader 

639 if is_null_or_none(indirect_reference): 

640 return None 

641 assert indirect_reference is not None, "mypy" 

642 if isinstance(indirect_reference, int): 

643 indirect_reference = IndirectObject(indirect_reference, 0, self) 

644 obj = indirect_reference.get_object() 

645 if isinstance(obj, PageObject): 

646 return obj.page_number 

647 return None 

648 

649 def add_blank_page( 

650 self, width: Optional[float] = None, height: Optional[float] = None 

651 ) -> PageObject: 

652 """ 

653 Append a blank page to this PDF file and return it. 

654 

655 If no page size is specified, use the size of the last page. 

656 

657 Args: 

658 width: The width of the new page expressed in default user 

659 space units. 

660 height: The height of the new page expressed in default 

661 user space units. 

662 

663 Returns: 

664 The newly appended page. 

665 

666 Raises: 

667 PageSizeNotDefinedError: if width and height are not defined 

668 and previous page does not exist. 

669 

670 """ 

671 page = PageObject.create_blank_page(self, width, height) 

672 return self.add_page(page) 

673 

674 def insert_blank_page( 

675 self, 

676 width: Optional[Union[float, decimal.Decimal]] = None, 

677 height: Optional[Union[float, decimal.Decimal]] = None, 

678 index: int = 0, 

679 ) -> PageObject: 

680 """ 

681 Insert a blank page to this PDF file and return it. 

682 

683 If no page size is specified, use the size of the last page. 

684 

685 Args: 

686 width: The width of the new page expressed in default user 

687 space units. 

688 height: The height of the new page expressed in default 

689 user space units. 

690 index: Position to add the page. 

691 

692 Returns: 

693 The newly inserted page. 

694 

695 Raises: 

696 PageSizeNotDefinedError: if width and height are not defined 

697 and previous page does not exist. 

698 

699 """ 

700 if width is None or (height is None and index < self.get_num_pages()): 

701 oldpage = self.pages[index] 

702 width = oldpage.mediabox.width 

703 height = oldpage.mediabox.height 

704 page = PageObject.create_blank_page(self, width, height) 

705 self.insert_page(page, index) 

706 return page 

707 

708 @property 

709 def open_destination( 

710 self, 

711 ) -> Union[None, Destination, TextStringObject, ByteStringObject]: 

712 return super().open_destination 

713 

714 @open_destination.setter 

715 def open_destination(self, dest: Union[None, str, Destination, PageObject]) -> None: 

716 if dest is None: 

717 try: 

718 del self._root_object["/OpenAction"] 

719 except KeyError: 

720 pass 

721 elif isinstance(dest, str): 

722 self._root_object[NameObject("/OpenAction")] = TextStringObject(dest) 

723 elif isinstance(dest, Destination): 

724 self._root_object[NameObject("/OpenAction")] = dest.dest_array 

725 elif isinstance(dest, PageObject): 

726 self._root_object[NameObject("/OpenAction")] = Destination( 

727 "Opening", 

728 dest.indirect_reference 

729 if dest.indirect_reference is not None 

730 else NullObject(), 

731 PAGE_FIT, 

732 ).dest_array 

733 

734 def add_js(self, javascript: str) -> None: 

735 """ 

736 Add JavaScript which will launch upon opening this PDF. 

737 

738 Args: 

739 javascript: Your JavaScript. 

740 

741 >>> output.add_js("this.print({bUI:true,bSilent:false,bShrinkToFit:true});") 

742 # Example: This will launch the print window when the PDF is opened. 

743 

744 """ 

745 # Names / JavaScript preferred to be able to add multiple scripts 

746 if "/Names" not in self._root_object: 

747 self._root_object[NameObject(CA.NAMES)] = DictionaryObject() 

748 names = cast(DictionaryObject, self._root_object[CA.NAMES]) 

749 if "/JavaScript" not in names: 

750 names[NameObject("/JavaScript")] = DictionaryObject( 

751 {NameObject("/Names"): ArrayObject()} 

752 ) 

753 js_list = cast( 

754 ArrayObject, cast(DictionaryObject, names["/JavaScript"])["/Names"] 

755 ) 

756 # We need a name for parameterized JavaScript in the PDF file, 

757 # but it can be anything. 

758 js_list.append(create_string_object(str(uuid.uuid4()))) 

759 

760 js = DictionaryObject( 

761 { 

762 NameObject(PagesAttributes.TYPE): NameObject("/Action"), 

763 NameObject("/S"): NameObject("/JavaScript"), 

764 NameObject("/JS"): TextStringObject(f"{javascript}"), 

765 } 

766 ) 

767 js_list.append(self._add_object(js)) 

768 

769 def add_attachment(self, filename: str, data: Union[str, bytes]) -> "EmbeddedFile": 

770 """ 

771 Embed a file inside the PDF. 

772 

773 Reference: 

774 https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf 

775 Section 7.11.3 

776 

777 Args: 

778 filename: The filename to display. 

779 data: The data in the file. 

780 

781 Returns: 

782 EmbeddedFile instance for the newly created embedded file. 

783 

784 """ 

785 return EmbeddedFile._create_new(self, filename, data) 

786 

787 def append_pages_from_reader( 

788 self, 

789 reader: PdfReader, 

790 after_page_append: Optional[Callable[[PageObject], None]] = None, 

791 ) -> None: 

792 """ 

793 Copy pages from reader to writer. Includes an optional callback 

794 parameter which is invoked after pages are appended to the writer. 

795 

796 ``append`` should be preferred. 

797 

798 Args: 

799 reader: a PdfReader object from which to copy page 

800 annotations to this writer object. The writer's annots 

801 will then be updated. 

802 after_page_append: 

803 Callback function that is invoked after each page is appended to 

804 the writer. Signature includes a reference to the appended page 

805 (delegates to append_pages_from_reader). The single parameter of 

806 the callback is a reference to the page just appended to the 

807 document. 

808 

809 """ 

810 reader_num_pages = len(reader.pages) 

811 # Copy pages from reader to writer 

812 for reader_page_number in range(reader_num_pages): 

813 reader_page = reader.pages[reader_page_number] 

814 writer_page = self.add_page(reader_page) 

815 # Trigger callback, pass writer page as parameter 

816 if callable(after_page_append): 

817 after_page_append(writer_page) 

818 

819 def _merge_content_stream_to_page( 

820 self, 

821 page: PageObject, 

822 new_content_data: bytes, 

823 ) -> None: 

824 """ 

825 Combines existing content stream(s) with new content (as bytes). 

826 

827 Args: 

828 page: The page to which the new content data will be added. 

829 new_content_data: A binary-encoded new content stream, for 

830 instance the commands to draw an XObject. 

831 """ 

832 # First resolve the existing page content. This always is an IndirectObject: 

833 # PDF Explained by John Whitington 

834 # https://www.oreilly.com/library/view/pdf-explained/9781449321581/ch04.html 

835 if NameObject("/Contents") in page: 

836 existing_content_ref = page[NameObject("/Contents")] 

837 existing_content = existing_content_ref.get_object() 

838 

839 if isinstance(existing_content, ArrayObject): 

840 # Create a new StreamObject for the new_content_data 

841 new_stream_obj = StreamObject() 

842 new_stream_obj.set_data(new_content_data) 

843 existing_content.append(self._add_object(new_stream_obj)) 

844 page[NameObject("/Contents")] = self._add_object(existing_content) 

845 if isinstance(existing_content, StreamObject): 

846 # Merge new content to existing StreamObject 

847 merged_data = existing_content.get_data() + b"\n" + new_content_data 

848 new_stream = StreamObject() 

849 new_stream.set_data(merged_data) 

850 page[NameObject("/Contents")] = self._add_object(new_stream) 

851 else: 

852 # If no existing content, then we have an empty page. 

853 # Create a new StreamObject in a new /Contents entry. 

854 new_stream = StreamObject() 

855 new_stream.set_data(new_content_data) 

856 page[NameObject("/Contents")] = self._add_object(new_stream) 

857 

858 def _add_apstream_object( 

859 self, 

860 page: PageObject, 

861 appearance_stream_obj: StreamObject, 

862 object_name: str, 

863 x_offset: float, 

864 y_offset: float, 

865 font_res: Optional[DictionaryObject] = None 

866 ) -> None: 

867 """ 

868 Adds an appearance stream to the page content in the form of 

869 an XObject. 

870 

871 Args: 

872 page: The page to which to add the appearance stream. 

873 appearance_stream_obj: The appearance stream. 

874 object_name: The name of the appearance stream. 

875 x_offset: The horizontal offset for the appearance stream. 

876 y_offset: The vertical offset for the appearance stream. 

877 font_res: The appearance stream's font resource (if given). 

878 """ 

879 # Prepare XObject resource dictionary on the page 

880 pg_res = cast(DictionaryObject, page[PG.RESOURCES]) 

881 if font_res is not None: 

882 font_name = font_res["/BaseFont"] # [/"Name"] often also exists, but is deprecated 

883 if "/Font" not in pg_res: 

884 pg_res[NameObject("/Font")] = DictionaryObject() 

885 pg_ft_res = cast(DictionaryObject, pg_res[NameObject("/Font")]) 

886 if font_name not in pg_ft_res: 

887 pg_ft_res[NameObject(font_name)] = font_res 

888 # Always add the resolved stream object to the writer to get a new IndirectObject. 

889 # This ensures we have a valid IndirectObject managed by *this* writer. 

890 xobject_ref = self._add_object(appearance_stream_obj) 

891 xobject_name = NameObject(f"/Fm_{object_name}")._sanitize() 

892 if "/XObject" not in pg_res: 

893 pg_res[NameObject("/XObject")] = DictionaryObject() 

894 pg_xo_res = cast(DictionaryObject, pg_res["/XObject"]) 

895 if xobject_name not in pg_xo_res: 

896 pg_xo_res[xobject_name] = xobject_ref 

897 else: 

898 logger_warning( 

899 f"XObject {xobject_name!r} already added to page resources. This might be an issue.", 

900 __name__ 

901 ) 

902 xobject_cm = Transformation().translate(x_offset, y_offset) 

903 xobject_drawing_commands = f"q\n{xobject_cm._to_cm()}\n{xobject_name} Do\nQ".encode() 

904 self._merge_content_stream_to_page(page, xobject_drawing_commands) 

905 

906 def _update_field_annotation( 

907 self, 

908 page: PageObject, 

909 field: DictionaryObject, 

910 annotation: DictionaryObject, 

911 font_name: str = "", 

912 font_size: float = -1, 

913 flatten: bool = False, 

914 ) -> None: 

915 # Calculate rectangle dimensions 

916 _rct = cast(RectangleObject, annotation[AA.Rect]) 

917 rct = RectangleObject((0, 0, abs(_rct[2] - _rct[0]), abs(_rct[3] - _rct[1]))) 

918 

919 # Extract font information 

920 da = annotation.get_inherited( 

921 AA.DA, 

922 cast(DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]).get( 

923 AA.DA, None 

924 ), 

925 ) 

926 if da is None: 

927 da = TextStringObject("/Helv 0 Tf 0 g") 

928 else: 

929 da = da.get_object() 

930 font_properties = da.replace("\n", " ").replace("\r", " ").split(" ") 

931 font_properties = [x for x in font_properties if x != ""] 

932 if font_name: 

933 font_properties[font_properties.index("Tf") - 2] = font_name 

934 else: 

935 font_name = font_properties[font_properties.index("Tf") - 2] 

936 font_height = ( 

937 font_size 

938 if font_size >= 0 

939 else float(font_properties[font_properties.index("Tf") - 1]) 

940 ) 

941 if font_height == 0: 

942 if field.get(FA.Ff, 0) & FA.FfBits.Multiline: 

943 font_height = DEFAULT_FONT_HEIGHT_IN_MULTILINE 

944 else: 

945 font_height = rct.height - 2 

946 font_properties[font_properties.index("Tf") - 1] = str(font_height) 

947 da = " ".join(font_properties) 

948 y_offset = rct.height - 1 - font_height 

949 

950 # Retrieve font information from local DR ... 

951 dr: Any = cast( 

952 DictionaryObject, 

953 cast( 

954 DictionaryObject, 

955 annotation.get_inherited( 

956 "/DR", 

957 cast( 

958 DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM] 

959 ).get("/DR", DictionaryObject()), 

960 ), 

961 ).get_object(), 

962 ) 

963 dr = dr.get("/Font", DictionaryObject()).get_object() 

964 # _default_fonts_space_width keys is the list of Standard fonts 

965 if font_name not in dr and font_name not in _default_fonts_space_width: 

966 # ...or AcroForm dictionary 

967 dr = cast( 

968 dict[Any, Any], 

969 cast( 

970 DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM] 

971 ).get("/DR", {}), 

972 ) 

973 dr = dr.get_object().get("/Font", DictionaryObject()).get_object() 

974 font_res = dr.get(font_name, None) 

975 if not is_null_or_none(font_res): 

976 font_res = cast(DictionaryObject, font_res.get_object()) 

977 _font_subtype, _, font_encoding, font_map = build_char_map_from_dict( 

978 200, font_res 

979 ) 

980 try: # remove width stored in -1 key 

981 del font_map[-1] 

982 except KeyError: 

983 pass 

984 font_full_rev: dict[str, bytes] 

985 if isinstance(font_encoding, str): 

986 font_full_rev = { 

987 v: k.encode(font_encoding) for k, v in font_map.items() 

988 } 

989 else: 

990 font_full_rev = {v: bytes((k,)) for k, v in font_encoding.items()} 

991 font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()} 

992 for key, value in font_map.items(): 

993 font_full_rev[value] = font_encoding_rev.get(key, key) 

994 else: 

995 logger_warning(f"Font dictionary for {font_name} not found.", __name__) 

996 font_full_rev = {} 

997 

998 # Retrieve field text and selected values 

999 field_flags = field.get(FA.Ff, 0) 

1000 if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0: 

1001 txt = "\n".join(annotation.get_inherited(FA.Opt, [])) 

1002 sel = field.get("/V", []) 

1003 if not isinstance(sel, list): 

1004 sel = [sel] 

1005 else: # /Tx 

1006 txt = field.get("/V", "") 

1007 sel = [] 

1008 # Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings) 

1009 txt = txt.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)") 

1010 # Generate appearance stream 

1011 ap_stream = generate_appearance_stream( 

1012 txt, sel, da, font_full_rev, rct, font_height, y_offset 

1013 ) 

1014 

1015 # Create appearance dictionary 

1016 dct = DecodedStreamObject.initialize_from_dictionary( 

1017 { 

1018 NameObject("/Type"): NameObject("/XObject"), 

1019 NameObject("/Subtype"): NameObject("/Form"), 

1020 NameObject("/BBox"): rct, 

1021 "__streamdata__": ByteStringObject(ap_stream), 

1022 "/Length": 0, 

1023 } 

1024 ) 

1025 if AA.AP in annotation: 

1026 for k, v in cast(DictionaryObject, annotation[AA.AP]).get("/N", {}).items(): 

1027 if k not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}: 

1028 dct[k] = v 

1029 

1030 # Update Resources with font information if necessary 

1031 if font_res is not None: 

1032 dct[NameObject("/Resources")] = DictionaryObject( 

1033 { 

1034 NameObject("/Font"): DictionaryObject( 

1035 { 

1036 NameObject(font_name): getattr( 

1037 font_res, "indirect_reference", font_res 

1038 ) 

1039 } 

1040 ) 

1041 } 

1042 ) 

1043 if AA.AP not in annotation: 

1044 annotation[NameObject(AA.AP)] = DictionaryObject( 

1045 {NameObject("/N"): self._add_object(dct)} 

1046 ) 

1047 elif "/N" not in cast(DictionaryObject, annotation[AA.AP]): 

1048 cast(DictionaryObject, annotation[NameObject(AA.AP)])[ 

1049 NameObject("/N") 

1050 ] = self._add_object(dct) 

1051 else: # [/AP][/N] exists 

1052 n = annotation[AA.AP]["/N"].indirect_reference.idnum # type: ignore 

1053 self._objects[n - 1] = dct 

1054 dct.indirect_reference = IndirectObject(n, 0, self) 

1055 

1056 if flatten: 

1057 field_name = self._get_qualified_field_name(annotation) 

1058 self._add_apstream_object(page, dct, field_name, _rct[0], _rct[1], font_res) 

1059 

1060 FFBITS_NUL = FA.FfBits(0) 

1061 

1062 def update_page_form_field_values( 

1063 self, 

1064 page: Union[PageObject, list[PageObject], None], 

1065 fields: Mapping[str, Union[str, list[str], tuple[str, str, float]]], 

1066 flags: FA.FfBits = FFBITS_NUL, 

1067 auto_regenerate: Optional[bool] = True, 

1068 flatten: bool = False, 

1069 ) -> None: 

1070 """ 

1071 Update the form field values for a given page from a fields dictionary. 

1072 

1073 Copy field texts and values from fields to page. 

1074 If the field links to a parent object, add the information to the parent. 

1075 

1076 Args: 

1077 page: `PageObject` - references **PDF writer's page** where the 

1078 annotations and field data will be updated. 

1079 `List[Pageobject]` - provides list of pages to be processed. 

1080 `None` - all pages. 

1081 fields: a Python dictionary of: 

1082 

1083 * field names (/T) as keys and text values (/V) as value 

1084 * field names (/T) as keys and list of text values (/V) for multiple choice list 

1085 * field names (/T) as keys and tuple of: 

1086 * text values (/V) 

1087 * font id (e.g. /F1, the font id must exist) 

1088 * font size (0 for autosize) 

1089 

1090 flags: A set of flags from :class:`~pypdf.constants.FieldDictionaryAttributes.FfBits`. 

1091 

1092 auto_regenerate: Set/unset the need_appearances flag; 

1093 the flag is unchanged if auto_regenerate is None. 

1094 

1095 flatten: Whether or not to flatten the annotation. If True, this adds the annotation's 

1096 appearance stream to the page contents. Note that this option does not remove the 

1097 annotation itself. 

1098 

1099 """ 

1100 if CatalogDictionary.ACRO_FORM not in self._root_object: 

1101 raise PyPdfError("No /AcroForm dictionary in PDF of PdfWriter Object") 

1102 af = cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM]) 

1103 if InteractiveFormDictEntries.Fields not in af: 

1104 raise PyPdfError("No /Fields dictionary in PDF of PdfWriter Object") 

1105 if isinstance(auto_regenerate, bool): 

1106 self.set_need_appearances_writer(auto_regenerate) 

1107 # Iterate through pages, update field values 

1108 if page is None: 

1109 page = list(self.pages) 

1110 if isinstance(page, list): 

1111 for p in page: 

1112 if PG.ANNOTS in p: # just to prevent warnings 

1113 self.update_page_form_field_values(p, fields, flags, None, flatten=flatten) 

1114 return 

1115 if PG.ANNOTS not in page: 

1116 logger_warning("No fields to update on this page", __name__) 

1117 return 

1118 for annotation in page[PG.ANNOTS]: # type: ignore 

1119 annotation = cast(DictionaryObject, annotation.get_object()) 

1120 if annotation.get("/Subtype", "") != "/Widget": 

1121 continue 

1122 if "/FT" in annotation and "/T" in annotation: 

1123 parent_annotation = annotation 

1124 else: 

1125 parent_annotation = annotation.get( 

1126 PG.PARENT, DictionaryObject() 

1127 ).get_object() 

1128 

1129 for field, value in fields.items(): 

1130 if not ( 

1131 self._get_qualified_field_name(parent_annotation) == field 

1132 or parent_annotation.get("/T", None) == field 

1133 ): 

1134 continue 

1135 if ( 

1136 parent_annotation.get("/FT", None) == "/Ch" 

1137 and "/I" in parent_annotation 

1138 ): 

1139 del parent_annotation["/I"] 

1140 if flags: 

1141 annotation[NameObject(FA.Ff)] = NumberObject(flags) 

1142 if not (value is None and flatten): # Only change values if given by user and not flattening. 

1143 if isinstance(value, list): 

1144 lst = ArrayObject(TextStringObject(v) for v in value) 

1145 parent_annotation[NameObject(FA.V)] = lst 

1146 elif isinstance(value, tuple): 

1147 annotation[NameObject(FA.V)] = TextStringObject( 

1148 value[0], 

1149 ) 

1150 else: 

1151 parent_annotation[NameObject(FA.V)] = TextStringObject(value) 

1152 if parent_annotation.get(FA.FT) == "/Btn": 

1153 # Checkbox button (no /FT found in Radio widgets) 

1154 v = NameObject(value) 

1155 ap = cast(DictionaryObject, annotation[NameObject(AA.AP)]) 

1156 normal_ap = cast(DictionaryObject, ap["/N"]) 

1157 if v not in normal_ap: 

1158 v = NameObject("/Off") 

1159 appearance_stream_obj = normal_ap.get(v) 

1160 # other cases will be updated through the for loop 

1161 annotation[NameObject(AA.AS)] = v 

1162 annotation[NameObject(FA.V)] = v 

1163 if flatten and appearance_stream_obj is not None: 

1164 # We basically copy the entire appearance stream, which should be an XObject that 

1165 # is already registered. No need to add font resources. 

1166 rct = cast(RectangleObject, annotation[AA.Rect]) 

1167 self._add_apstream_object(page, appearance_stream_obj, field, rct[0], rct[1]) 

1168 elif ( 

1169 parent_annotation.get(FA.FT) == "/Tx" 

1170 or parent_annotation.get(FA.FT) == "/Ch" 

1171 ): 

1172 # textbox 

1173 if isinstance(value, tuple): 

1174 self._update_field_annotation( 

1175 page, parent_annotation, annotation, value[1], value[2], flatten=flatten 

1176 ) 

1177 else: 

1178 self._update_field_annotation(page, parent_annotation, annotation, flatten=flatten) 

1179 elif ( 

1180 annotation.get(FA.FT) == "/Sig" 

1181 ): # deprecated # not implemented yet 

1182 logger_warning("Signature forms not implemented yet", __name__) 

1183 

1184 def reattach_fields( 

1185 self, page: Optional[PageObject] = None 

1186 ) -> list[DictionaryObject]: 

1187 """ 

1188 Parse annotations within the page looking for orphan fields and 

1189 reattach then into the Fields Structure. 

1190 

1191 Args: 

1192 page: page to analyze. 

1193 If none is provided, all pages will be analyzed. 

1194 

1195 Returns: 

1196 list of reattached fields. 

1197 

1198 """ 

1199 lst = [] 

1200 if page is None: 

1201 for p in self.pages: 

1202 lst += self.reattach_fields(p) 

1203 return lst 

1204 

1205 try: 

1206 af = cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM]) 

1207 except KeyError: 

1208 af = DictionaryObject() 

1209 self._root_object[NameObject(CatalogDictionary.ACRO_FORM)] = af 

1210 try: 

1211 fields = cast(ArrayObject, af[InteractiveFormDictEntries.Fields]) 

1212 except KeyError: 

1213 fields = ArrayObject() 

1214 af[NameObject(InteractiveFormDictEntries.Fields)] = fields 

1215 

1216 if "/Annots" not in page: 

1217 return lst 

1218 annotations = cast(ArrayObject, page["/Annots"]) 

1219 for idx, annotation in enumerate(annotations): 

1220 is_indirect = isinstance(annotation, IndirectObject) 

1221 annotation = cast(DictionaryObject, annotation.get_object()) 

1222 if annotation.get("/Subtype", "") == "/Widget" and "/FT" in annotation: 

1223 if ( 

1224 "indirect_reference" in annotation.__dict__ 

1225 and annotation.indirect_reference in fields 

1226 ): 

1227 continue 

1228 if not is_indirect: 

1229 annotations[idx] = self._add_object(annotation) 

1230 fields.append(annotation.indirect_reference) 

1231 lst.append(annotation) 

1232 return lst 

1233 

1234 def clone_reader_document_root(self, reader: PdfReader) -> None: 

1235 """ 

1236 Copy the reader document root to the writer and all sub-elements, 

1237 including pages, threads, outlines,... For partial insertion, ``append`` 

1238 should be considered. 

1239 

1240 Args: 

1241 reader: PdfReader from which the document root should be copied. 

1242 

1243 """ 

1244 self._info_obj = None 

1245 if self.incremental: 

1246 self._objects = [None] * (cast(int, reader.trailer["/Size"]) - 1) 

1247 for i in range(len(self._objects)): 

1248 o = reader.get_object(i + 1) 

1249 if o is not None: 

1250 self._objects[i] = o.replicate(self) 

1251 else: 

1252 self._objects.clear() 

1253 self._root_object = reader.root_object.clone(self) 

1254 self._pages = self._root_object.raw_get("/Pages") 

1255 

1256 assert len(self._objects) <= cast(int, reader.trailer["/Size"]) # for pytest 

1257 # must be done here before rewriting 

1258 if self.incremental: 

1259 self._original_hash = [ 

1260 (obj.hash_bin() if obj is not None else 0) for obj in self._objects 

1261 ] 

1262 self._flatten() 

1263 assert self.flattened_pages is not None 

1264 for p in self.flattened_pages: 

1265 self._replace_object(cast(IndirectObject, p.indirect_reference).idnum, p) 

1266 if not self.incremental: 

1267 p[NameObject("/Parent")] = self._pages 

1268 if not self.incremental: 

1269 cast(DictionaryObject, self._pages.get_object())[ 

1270 NameObject("/Kids") 

1271 ] = ArrayObject([p.indirect_reference for p in self.flattened_pages]) 

1272 

1273 def clone_document_from_reader( 

1274 self, 

1275 reader: PdfReader, 

1276 after_page_append: Optional[Callable[[PageObject], None]] = None, 

1277 ) -> None: 

1278 """ 

1279 Create a copy (clone) of a document from a PDF file reader cloning 

1280 section '/Root' and '/Info' and '/ID' of the pdf. 

1281 

1282 Args: 

1283 reader: PDF file reader instance from which the clone 

1284 should be created. 

1285 after_page_append: 

1286 Callback function that is invoked after each page is appended to 

1287 the writer. Signature includes a reference to the appended page 

1288 (delegates to append_pages_from_reader). The single parameter of 

1289 the callback is a reference to the page just appended to the 

1290 document. 

1291 

1292 """ 

1293 self.clone_reader_document_root(reader) 

1294 inf = reader._info 

1295 if self.incremental: 

1296 if inf is not None: 

1297 self._info_obj = cast( 

1298 IndirectObject, inf.clone(self).indirect_reference 

1299 ) 

1300 assert isinstance(self._info, DictionaryObject), "for mypy" 

1301 self._original_hash[ 

1302 self._info_obj.indirect_reference.idnum - 1 

1303 ] = self._info.hash_bin() 

1304 elif inf is not None: 

1305 self._info_obj = self._add_object( 

1306 DictionaryObject(cast(DictionaryObject, inf.get_object())) 

1307 ) 

1308 # else: _info_obj = None done in clone_reader_document_root() 

1309 

1310 try: 

1311 self._ID = cast(ArrayObject, reader._ID).clone(self) 

1312 except AttributeError: 

1313 pass 

1314 

1315 if callable(after_page_append): 

1316 for page in cast( 

1317 ArrayObject, cast(DictionaryObject, self._pages.get_object())["/Kids"] 

1318 ): 

1319 after_page_append(page.get_object()) 

1320 

1321 def _compute_document_identifier(self) -> ByteStringObject: 

1322 stream = BytesIO() 

1323 self._write_pdf_structure(stream) 

1324 stream.seek(0) 

1325 return ByteStringObject(_rolling_checksum(stream).encode("utf8")) 

1326 

1327 def generate_file_identifiers(self) -> None: 

1328 """ 

1329 Generate an identifier for the PDF that will be written. 

1330 

1331 The only point of this is ensuring uniqueness. Reproducibility is not 

1332 required. 

1333 When a file is first written, both identifiers shall be set to the same value. 

1334 If both identifiers match when a file reference is resolved, it is very 

1335 likely that the correct and unchanged file has been found. If only the first 

1336 identifier matches, a different version of the correct file has been found. 

1337 see §14.4 "File Identifiers". 

1338 """ 

1339 if self._ID: 

1340 id1 = self._ID[0] 

1341 id2 = self._compute_document_identifier() 

1342 else: 

1343 id1 = self._compute_document_identifier() 

1344 id2 = id1 

1345 self._ID = ArrayObject((id1, id2)) 

1346 

1347 def encrypt( 

1348 self, 

1349 user_password: str, 

1350 owner_password: Optional[str] = None, 

1351 use_128bit: bool = True, 

1352 permissions_flag: UserAccessPermissions = ALL_DOCUMENT_PERMISSIONS, 

1353 *, 

1354 algorithm: Optional[str] = None, 

1355 ) -> None: 

1356 """ 

1357 Encrypt this PDF file with the PDF Standard encryption handler. 

1358 

1359 Args: 

1360 user_password: The password which allows for opening 

1361 and reading the PDF file with the restrictions provided. 

1362 owner_password: The password which allows for 

1363 opening the PDF files without any restrictions. By default, 

1364 the owner password is the same as the user password. 

1365 use_128bit: flag as to whether to use 128bit 

1366 encryption. When false, 40bit encryption will be used. 

1367 By default, this flag is on. 

1368 permissions_flag: permissions as described in 

1369 Table 3.20 of the PDF 1.7 specification. A bit value of 1 means 

1370 the permission is granted. 

1371 Hence an integer value of -1 will set all flags. 

1372 Bit position 3 is for printing, 4 is for modifying content, 

1373 5 and 6 control annotations, 9 for form fields, 

1374 10 for extraction of text and graphics. 

1375 algorithm: encrypt algorithm. Values may be one of "RC4-40", "RC4-128", 

1376 "AES-128", "AES-256-R5", "AES-256". If it is valid, 

1377 `use_128bit` will be ignored. 

1378 

1379 """ 

1380 if owner_password is None: 

1381 owner_password = user_password 

1382 

1383 if algorithm is not None: 

1384 try: 

1385 alg = getattr(EncryptAlgorithm, algorithm.replace("-", "_")) 

1386 except AttributeError: 

1387 raise ValueError(f"Algorithm '{algorithm}' NOT supported") 

1388 else: 

1389 alg = EncryptAlgorithm.RC4_128 

1390 if not use_128bit: 

1391 alg = EncryptAlgorithm.RC4_40 

1392 self.generate_file_identifiers() 

1393 assert self._ID 

1394 self._encryption = Encryption.make(alg, permissions_flag, self._ID[0]) 

1395 # in case call `encrypt` again 

1396 entry = self._encryption.write_entry(user_password, owner_password) 

1397 if self._encrypt_entry: 

1398 # replace old encrypt_entry 

1399 assert self._encrypt_entry.indirect_reference is not None 

1400 entry.indirect_reference = self._encrypt_entry.indirect_reference 

1401 self._objects[entry.indirect_reference.idnum - 1] = entry 

1402 else: 

1403 self._add_object(entry) 

1404 self._encrypt_entry = entry 

1405 

1406 def _resolve_links(self) -> None: 

1407 """Patch up links that were added to the document earlier, to 

1408 make sure they still point to the same pages. 

1409 """ 

1410 for (new_link, old_link) in self._unresolved_links: 

1411 old_page = old_link.find_referenced_page() 

1412 if not old_page: 

1413 continue 

1414 new_page = self._merged_in_pages.get(old_page) 

1415 if new_page is None: 

1416 continue 

1417 new_link.patch_reference(self, new_page) 

1418 

1419 def write_stream(self, stream: StreamType) -> None: 

1420 if hasattr(stream, "mode") and "b" not in stream.mode: 

1421 logger_warning( 

1422 f"File <{stream.name}> to write to is not in binary mode. " 

1423 "It may not be written to correctly.", 

1424 __name__, 

1425 ) 

1426 self._resolve_links() 

1427 

1428 if self.incremental: 

1429 self._reader.stream.seek(0) 

1430 stream.write(self._reader.stream.read(-1)) 

1431 if len(self.list_objects_in_increment()) > 0: 

1432 self._write_increment(stream) # writes objs, xref stream and startxref 

1433 else: 

1434 object_positions, free_objects = self._write_pdf_structure(stream) 

1435 xref_location = self._write_xref_table( 

1436 stream, object_positions, free_objects 

1437 ) 

1438 self._write_trailer(stream, xref_location) 

1439 

1440 def write(self, stream: Union[Path, StrByteType]) -> tuple[bool, IO[Any]]: 

1441 """ 

1442 Write the collection of pages added to this object out as a PDF file. 

1443 

1444 Args: 

1445 stream: An object to write the file to. The object can support 

1446 the write method and the tell method, similar to a file object, or 

1447 be a file path, just like the fileobj, just named it stream to keep 

1448 existing workflow. 

1449 

1450 Returns: 

1451 A tuple (bool, IO). 

1452 

1453 """ 

1454 my_file = False 

1455 

1456 if stream == "": 

1457 raise ValueError(f"Output({stream=}) is empty.") 

1458 

1459 if isinstance(stream, (str, Path)): 

1460 stream = FileIO(stream, "wb") 

1461 my_file = True 

1462 

1463 self.write_stream(stream) 

1464 

1465 if my_file: 

1466 stream.close() 

1467 else: 

1468 stream.flush() 

1469 

1470 return my_file, stream 

1471 

1472 def list_objects_in_increment(self) -> list[IndirectObject]: 

1473 """ 

1474 For analysis or debugging. 

1475 Provides the list of new or modified objects that will be written 

1476 in the increment. 

1477 Deleted objects will not be freed but will become orphans. 

1478 

1479 Returns: 

1480 List of new or modified IndirectObjects 

1481 

1482 """ 

1483 original_hash_count = len(self._original_hash) 

1484 return [ 

1485 cast(IndirectObject, obj).indirect_reference 

1486 for i, obj in enumerate(self._objects) 

1487 if ( 

1488 obj is not None 

1489 and ( 

1490 i >= original_hash_count 

1491 or obj.hash_bin() != self._original_hash[i] 

1492 ) 

1493 ) 

1494 ] 

1495 

1496 def _write_increment(self, stream: StreamType) -> None: 

1497 object_positions = {} 

1498 object_blocks = [] 

1499 current_start = -1 

1500 current_stop = -2 

1501 original_hash_count = len(self._original_hash) 

1502 for i, obj in enumerate(self._objects): 

1503 if obj is not None and ( 

1504 i >= original_hash_count 

1505 or obj.hash_bin() != self._original_hash[i] 

1506 ): 

1507 idnum = i + 1 

1508 assert isinstance(obj, PdfObject), "mypy" 

1509 # first write new/modified object 

1510 object_positions[idnum] = stream.tell() 

1511 stream.write(f"{idnum} 0 obj\n".encode()) 

1512 """ encryption is not operational 

1513 if self._encryption and obj != self._encrypt_entry: 

1514 obj = self._encryption.encrypt_object(obj, idnum, 0) 

1515 """ 

1516 obj.write_to_stream(stream) 

1517 stream.write(b"\nendobj\n") 

1518 

1519 # prepare xref 

1520 if idnum != current_stop: 

1521 if current_start > 0: 

1522 object_blocks.append( 

1523 [current_start, current_stop - current_start] 

1524 ) 

1525 current_start = idnum 

1526 current_stop = idnum + 1 

1527 assert current_start > 0, "for pytest only" 

1528 object_blocks.append([current_start, current_stop - current_start]) 

1529 # write incremented xref 

1530 xref_location = stream.tell() 

1531 xr_id = len(self._objects) + 1 

1532 stream.write(f"{xr_id} 0 obj".encode()) 

1533 init_data = { 

1534 NameObject("/Type"): NameObject("/XRef"), 

1535 NameObject("/Size"): NumberObject(xr_id + 1), 

1536 NameObject("/Root"): self.root_object.indirect_reference, 

1537 NameObject("/Filter"): NameObject("/FlateDecode"), 

1538 NameObject("/Index"): ArrayObject( 

1539 [NumberObject(_it) for _su in object_blocks for _it in _su] 

1540 ), 

1541 NameObject("/W"): ArrayObject( 

1542 [NumberObject(1), NumberObject(4), NumberObject(1)] 

1543 ), 

1544 "__streamdata__": b"", 

1545 } 

1546 if self._info is not None and ( 

1547 self._info.indirect_reference.idnum - 1 # type: ignore 

1548 >= len(self._original_hash) 

1549 or cast(IndirectObject, self._info).hash_bin() # kept for future 

1550 != self._original_hash[ 

1551 self._info.indirect_reference.idnum - 1 # type: ignore 

1552 ] 

1553 ): 

1554 init_data[NameObject(TK.INFO)] = self._info.indirect_reference 

1555 init_data[NameObject(TK.PREV)] = NumberObject(self._reader._startxref) 

1556 if self._ID: 

1557 init_data[NameObject(TK.ID)] = self._ID 

1558 xr = StreamObject.initialize_from_dictionary(init_data) 

1559 xr.set_data( 

1560 b"".join( 

1561 [struct.pack(b">BIB", 1, _pos, 0) for _pos in object_positions.values()] 

1562 ) 

1563 ) 

1564 xr.write_to_stream(stream) 

1565 stream.write(f"\nendobj\nstartxref\n{xref_location}\n%%EOF\n".encode()) # eof 

1566 

1567 def _write_pdf_structure(self, stream: StreamType) -> tuple[list[int], list[int]]: 

1568 object_positions = [] 

1569 free_objects = [] 

1570 stream.write(self.pdf_header.encode() + b"\n") 

1571 stream.write(b"%\xE2\xE3\xCF\xD3\n") 

1572 

1573 for idnum, obj in enumerate(self._objects, start=1): 

1574 if obj is not None: 

1575 object_positions.append(stream.tell()) 

1576 stream.write(f"{idnum} 0 obj\n".encode()) 

1577 if self._encryption and obj != self._encrypt_entry: 

1578 obj = self._encryption.encrypt_object(obj, idnum, 0) 

1579 obj.write_to_stream(stream) 

1580 stream.write(b"\nendobj\n") 

1581 else: 

1582 object_positions.append(-1) 

1583 free_objects.append(idnum) 

1584 free_objects.append(0) # add 0 to loop in accordance with specification 

1585 return object_positions, free_objects 

1586 

1587 def _write_xref_table( 

1588 self, stream: StreamType, object_positions: list[int], free_objects: list[int] 

1589 ) -> int: 

1590 xref_location = stream.tell() 

1591 stream.write(b"xref\n") 

1592 stream.write(f"0 {len(self._objects) + 1}\n".encode()) 

1593 stream.write(f"{free_objects[0]:0>10} {65535:0>5} f \n".encode()) 

1594 free_idx = 1 

1595 for offset in object_positions: 

1596 if offset > 0: 

1597 stream.write(f"{offset:0>10} {0:0>5} n \n".encode()) 

1598 else: 

1599 stream.write(f"{free_objects[free_idx]:0>10} {1:0>5} f \n".encode()) 

1600 free_idx += 1 

1601 return xref_location 

1602 

1603 def _write_trailer(self, stream: StreamType, xref_location: int) -> None: 

1604 """ 

1605 Write the PDF trailer to the stream. 

1606 

1607 To quote the PDF specification: 

1608 [The] trailer [gives] the location of the cross-reference table and 

1609 of certain special objects within the body of the file. 

1610 """ 

1611 stream.write(b"trailer\n") 

1612 trailer = DictionaryObject( 

1613 { 

1614 NameObject(TK.SIZE): NumberObject(len(self._objects) + 1), 

1615 NameObject(TK.ROOT): self.root_object.indirect_reference, 

1616 } 

1617 ) 

1618 if self._info is not None: 

1619 trailer[NameObject(TK.INFO)] = self._info.indirect_reference 

1620 if self._ID is not None: 

1621 trailer[NameObject(TK.ID)] = self._ID 

1622 if self._encrypt_entry: 

1623 trailer[NameObject(TK.ENCRYPT)] = self._encrypt_entry.indirect_reference 

1624 trailer.write_to_stream(stream) 

1625 stream.write(f"\nstartxref\n{xref_location}\n%%EOF\n".encode()) # eof 

1626 

1627 @property 

1628 def metadata(self) -> Optional[DocumentInformation]: 

1629 """ 

1630 Retrieve/set the PDF file's document information dictionary, if it exists. 

1631 

1632 Args: 

1633 value: dict with the entries to be set. if None : remove the /Info entry from the pdf. 

1634 

1635 Note that some PDF files use (XMP) metadata streams instead of document 

1636 information dictionaries, and these metadata streams will not be 

1637 accessed by this function, but by :meth:`~xmp_metadata`. 

1638 

1639 """ 

1640 return super().metadata 

1641 

1642 @metadata.setter 

1643 def metadata( 

1644 self, 

1645 value: Optional[Union[DocumentInformation, DictionaryObject, dict[Any, Any]]], 

1646 ) -> None: 

1647 if value is None: 

1648 self._info = None 

1649 else: 

1650 if self._info is not None: 

1651 self._info.clear() 

1652 

1653 self.add_metadata(value) 

1654 

1655 def add_metadata(self, infos: dict[str, Any]) -> None: 

1656 """ 

1657 Add custom metadata to the output. 

1658 

1659 Args: 

1660 infos: a Python dictionary where each key is a field 

1661 and each value is your new metadata. 

1662 

1663 """ 

1664 args = {} 

1665 if isinstance(infos, PdfObject): 

1666 infos = cast(DictionaryObject, infos.get_object()) 

1667 for key, value in list(infos.items()): 

1668 if isinstance(value, PdfObject): 

1669 value = value.get_object() 

1670 args[NameObject(key)] = create_string_object(str(value)) 

1671 if self._info is None: 

1672 self._info = DictionaryObject() 

1673 self._info.update(args) 

1674 

1675 def compress_identical_objects( 

1676 self, 

1677 remove_identicals: bool = True, 

1678 remove_orphans: bool = True, 

1679 ) -> None: 

1680 """ 

1681 Parse the PDF file and merge objects that have the same hash. 

1682 This will make objects common to multiple pages. 

1683 Recommended to be used just before writing output. 

1684 

1685 Args: 

1686 remove_identicals: Remove identical objects. 

1687 remove_orphans: Remove unreferenced objects. 

1688 

1689 """ 

1690 

1691 def replace_in_obj( 

1692 obj: PdfObject, crossref: dict[IndirectObject, IndirectObject] 

1693 ) -> None: 

1694 if isinstance(obj, DictionaryObject): 

1695 key_val = obj.items() 

1696 elif isinstance(obj, ArrayObject): 

1697 key_val = enumerate(obj) # type: ignore 

1698 else: 

1699 return 

1700 assert isinstance(obj, (DictionaryObject, ArrayObject)) 

1701 for k, v in key_val: 

1702 if isinstance(v, IndirectObject): 

1703 orphans[v.idnum - 1] = False 

1704 if v in crossref: 

1705 obj[k] = crossref[v] 

1706 else: 

1707 """the filtering on DictionaryObject and ArrayObject only 

1708 will be performed within replace_in_obj""" 

1709 replace_in_obj(v, crossref) 

1710 

1711 # _idnum_hash :dict[hash]=(1st_ind_obj,[other_indir_objs,...]) 

1712 self._idnum_hash = {} 

1713 orphans = [True] * len(self._objects) 

1714 # look for similar objects 

1715 for idx, obj in enumerate(self._objects): 

1716 if is_null_or_none(obj): 

1717 continue 

1718 assert obj is not None, "mypy" # mypy: TypeGuard of `is_null_or_none` does not help here. 

1719 assert isinstance(obj.indirect_reference, IndirectObject) 

1720 h = obj.hash_value() 

1721 if remove_identicals and h in self._idnum_hash: 

1722 self._idnum_hash[h][1].append(obj.indirect_reference) 

1723 self._objects[idx] = None 

1724 else: 

1725 self._idnum_hash[h] = (obj.indirect_reference, []) 

1726 

1727 # generate the dict converting others to 1st 

1728 cnv = {v[0]: v[1] for v in self._idnum_hash.values() if len(v[1]) > 0} 

1729 cnv_rev: dict[IndirectObject, IndirectObject] = {} 

1730 for k, v in cnv.items(): 

1731 cnv_rev.update(zip(v, (k,) * len(v))) 

1732 

1733 # replace reference to merged objects 

1734 for obj in self._objects: 

1735 if isinstance(obj, (DictionaryObject, ArrayObject)): 

1736 replace_in_obj(obj, cnv_rev) 

1737 

1738 # remove orphans (if applicable) 

1739 orphans[self.root_object.indirect_reference.idnum - 1] = False # type: ignore 

1740 

1741 orphans[self._info.indirect_reference.idnum - 1] = False # type: ignore 

1742 

1743 try: 

1744 orphans[self._ID.indirect_reference.idnum - 1] = False # type: ignore 

1745 except AttributeError: 

1746 pass 

1747 for i in compress(range(len(self._objects)), orphans): 

1748 self._objects[i] = None 

1749 

1750 def get_reference(self, obj: PdfObject) -> IndirectObject: 

1751 idnum = self._objects.index(obj) + 1 

1752 ref = IndirectObject(idnum, 0, self) 

1753 assert ref.get_object() == obj 

1754 return ref 

1755 

1756 def get_outline_root(self) -> TreeObject: 

1757 if CO.OUTLINES in self._root_object: 

1758 # Entries in the catalog dictionary 

1759 outline = cast(TreeObject, self._root_object[CO.OUTLINES]) 

1760 if not isinstance(outline, TreeObject): 

1761 t = TreeObject(outline) 

1762 self._replace_object(outline.indirect_reference.idnum, t) 

1763 outline = t 

1764 idnum = self._objects.index(outline) + 1 

1765 outline_ref = IndirectObject(idnum, 0, self) 

1766 assert outline_ref.get_object() == outline 

1767 else: 

1768 outline = TreeObject() 

1769 outline.update({}) 

1770 outline_ref = self._add_object(outline) 

1771 self._root_object[NameObject(CO.OUTLINES)] = outline_ref 

1772 

1773 return outline 

1774 

1775 def get_threads_root(self) -> ArrayObject: 

1776 """ 

1777 The list of threads. 

1778 

1779 See §12.4.3 of the PDF 1.7 or PDF 2.0 specification. 

1780 

1781 Returns: 

1782 An array (possibly empty) of Dictionaries with an ``/F`` key, 

1783 and optionally information about the thread in ``/I`` or ``/Metadata`` keys. 

1784 

1785 """ 

1786 if CO.THREADS in self._root_object: 

1787 # Entries in the catalog dictionary 

1788 threads = cast(ArrayObject, self._root_object[CO.THREADS]) 

1789 else: 

1790 threads = ArrayObject() 

1791 self._root_object[NameObject(CO.THREADS)] = threads 

1792 return threads 

1793 

1794 @property 

1795 def threads(self) -> ArrayObject: 

1796 """ 

1797 Read-only property for the list of threads. 

1798 

1799 See §12.4.3 of the PDF 1.7 or PDF 2.0 specification. 

1800 

1801 Each element is a dictionary with an ``/F`` key, and optionally 

1802 information about the thread in ``/I`` or ``/Metadata`` keys. 

1803 """ 

1804 return self.get_threads_root() 

1805 

1806 def add_outline_item_destination( 

1807 self, 

1808 page_destination: Union[IndirectObject, PageObject, TreeObject], 

1809 parent: Union[None, TreeObject, IndirectObject] = None, 

1810 before: Union[None, TreeObject, IndirectObject] = None, 

1811 is_open: bool = True, 

1812 ) -> IndirectObject: 

1813 page_destination = cast(PageObject, page_destination.get_object()) 

1814 if isinstance(page_destination, PageObject): 

1815 return self.add_outline_item_destination( 

1816 Destination( 

1817 f"page #{page_destination.page_number}", 

1818 cast(IndirectObject, page_destination.indirect_reference), 

1819 Fit.fit(), 

1820 ) 

1821 ) 

1822 

1823 if parent is None: 

1824 parent = self.get_outline_root() 

1825 

1826 page_destination[NameObject("/%is_open%")] = BooleanObject(is_open) 

1827 parent = cast(TreeObject, parent.get_object()) 

1828 page_destination_ref = self._add_object(page_destination) 

1829 if before is not None: 

1830 before = before.indirect_reference 

1831 parent.insert_child( 

1832 page_destination_ref, 

1833 before, 

1834 self, 

1835 page_destination.inc_parent_counter_outline 

1836 if is_open 

1837 else (lambda x, y: 0), # noqa: ARG005 

1838 ) 

1839 if "/Count" not in page_destination: 

1840 page_destination[NameObject("/Count")] = NumberObject(0) 

1841 

1842 return page_destination_ref 

1843 

1844 def add_outline_item_dict( 

1845 self, 

1846 outline_item: OutlineItemType, 

1847 parent: Union[None, TreeObject, IndirectObject] = None, 

1848 before: Union[None, TreeObject, IndirectObject] = None, 

1849 is_open: bool = True, 

1850 ) -> IndirectObject: 

1851 outline_item_object = TreeObject() 

1852 outline_item_object.update(outline_item) 

1853 

1854 """code currently unreachable 

1855 if "/A" in outline_item: 

1856 action = DictionaryObject() 

1857 a_dict = cast(DictionaryObject, outline_item["/A"]) 

1858 for k, v in list(a_dict.items()): 

1859 action[NameObject(str(k))] = v 

1860 action_ref = self._add_object(action) 

1861 outline_item_object[NameObject("/A")] = action_ref 

1862 """ 

1863 return self.add_outline_item_destination( 

1864 outline_item_object, parent, before, is_open 

1865 ) 

1866 

1867 def add_outline_item( 

1868 self, 

1869 title: str, 

1870 page_number: Union[None, PageObject, IndirectObject, int], 

1871 parent: Union[None, TreeObject, IndirectObject] = None, 

1872 before: Union[None, TreeObject, IndirectObject] = None, 

1873 color: Optional[Union[tuple[float, float, float], str]] = None, 

1874 bold: bool = False, 

1875 italic: bool = False, 

1876 fit: Fit = PAGE_FIT, 

1877 is_open: bool = True, 

1878 ) -> IndirectObject: 

1879 """ 

1880 Add an outline item (commonly referred to as a "Bookmark") to the PDF file. 

1881 

1882 Args: 

1883 title: Title to use for this outline item. 

1884 page_number: Page number this outline item will point to. 

1885 parent: A reference to a parent outline item to create nested 

1886 outline items. 

1887 before: 

1888 color: Color of the outline item's font as a red, green, blue tuple 

1889 from 0.0 to 1.0 or as a Hex String (#RRGGBB) 

1890 bold: Outline item font is bold 

1891 italic: Outline item font is italic 

1892 fit: The fit of the destination page. 

1893 

1894 Returns: 

1895 The added outline item as an indirect object. 

1896 

1897 """ 

1898 page_ref: Union[None, NullObject, IndirectObject, NumberObject] 

1899 if isinstance(italic, Fit): # it means that we are on the old params 

1900 if fit is not None and page_number is None: 

1901 page_number = fit 

1902 return self.add_outline_item( 

1903 title, page_number, parent, None, before, color, bold, italic, is_open=is_open 

1904 ) 

1905 if page_number is None: 

1906 action_ref = None 

1907 else: 

1908 if isinstance(page_number, IndirectObject): 

1909 page_ref = page_number 

1910 elif isinstance(page_number, PageObject): 

1911 page_ref = page_number.indirect_reference 

1912 elif isinstance(page_number, int): 

1913 try: 

1914 page_ref = self.pages[page_number].indirect_reference 

1915 except IndexError: 

1916 page_ref = NumberObject(page_number) 

1917 if page_ref is None: 

1918 logger_warning( 

1919 f"can not find reference of page {page_number}", 

1920 __name__, 

1921 ) 

1922 page_ref = NullObject() 

1923 dest = Destination( 

1924 NameObject("/" + title + " outline item"), 

1925 page_ref, 

1926 fit, 

1927 ) 

1928 

1929 action_ref = self._add_object( 

1930 DictionaryObject( 

1931 { 

1932 NameObject(GoToActionArguments.D): dest.dest_array, 

1933 NameObject(GoToActionArguments.S): NameObject("/GoTo"), 

1934 } 

1935 ) 

1936 ) 

1937 outline_item = self._add_object( 

1938 _create_outline_item(action_ref, title, color, italic, bold) 

1939 ) 

1940 

1941 if parent is None: 

1942 parent = self.get_outline_root() 

1943 return self.add_outline_item_destination(outline_item, parent, before, is_open) 

1944 

1945 def add_outline(self) -> None: 

1946 raise NotImplementedError( 

1947 "This method is not yet implemented. Use :meth:`add_outline_item` instead." 

1948 ) 

1949 

1950 def add_named_destination_array( 

1951 self, title: TextStringObject, destination: Union[IndirectObject, ArrayObject] 

1952 ) -> None: 

1953 named_dest = self.get_named_dest_root() 

1954 i = 0 

1955 while i < len(named_dest): 

1956 if title < named_dest[i]: 

1957 named_dest.insert(i, destination) 

1958 named_dest.insert(i, TextStringObject(title)) 

1959 return 

1960 i += 2 

1961 named_dest.extend([TextStringObject(title), destination]) 

1962 return 

1963 

1964 def add_named_destination_object( 

1965 self, 

1966 page_destination: PdfObject, 

1967 ) -> IndirectObject: 

1968 page_destination_ref = self._add_object(page_destination.dest_array) # type: ignore 

1969 self.add_named_destination_array( 

1970 cast("TextStringObject", page_destination["/Title"]), page_destination_ref # type: ignore 

1971 ) 

1972 

1973 return page_destination_ref 

1974 

1975 def add_named_destination( 

1976 self, 

1977 title: str, 

1978 page_number: int, 

1979 ) -> IndirectObject: 

1980 page_ref = self.get_object(self._pages)[PagesAttributes.KIDS][page_number] # type: ignore 

1981 dest = DictionaryObject() 

1982 dest.update( 

1983 { 

1984 NameObject(GoToActionArguments.D): ArrayObject( 

1985 [page_ref, NameObject(TypFitArguments.FIT_H), NumberObject(826)] 

1986 ), 

1987 NameObject(GoToActionArguments.S): NameObject("/GoTo"), 

1988 } 

1989 ) 

1990 

1991 dest_ref = self._add_object(dest) 

1992 if not isinstance(title, TextStringObject): 

1993 title = TextStringObject(str(title)) 

1994 

1995 self.add_named_destination_array(title, dest_ref) 

1996 return dest_ref 

1997 

1998 def remove_links(self) -> None: 

1999 """Remove links and annotations from this output.""" 

2000 for page in self.pages: 

2001 self.remove_objects_from_page(page, ObjectDeletionFlag.ALL_ANNOTATIONS) 

2002 

2003 def remove_annotations( 

2004 self, subtypes: Optional[Union[AnnotationSubtype, Iterable[AnnotationSubtype]]] 

2005 ) -> None: 

2006 """ 

2007 Remove annotations by annotation subtype. 

2008 

2009 Args: 

2010 subtypes: subtype or list of subtypes to be removed. 

2011 Examples are: "/Link", "/FileAttachment", "/Sound", 

2012 "/Movie", "/Screen", ... 

2013 If you want to remove all annotations, use subtypes=None. 

2014 

2015 """ 

2016 for page in self.pages: 

2017 self._remove_annots_from_page(page, subtypes) 

2018 

2019 def _remove_annots_from_page( 

2020 self, 

2021 page: Union[IndirectObject, PageObject, DictionaryObject], 

2022 subtypes: Optional[Iterable[str]], 

2023 ) -> None: 

2024 page = cast(DictionaryObject, page.get_object()) 

2025 if PG.ANNOTS in page: 

2026 i = 0 

2027 while i < len(cast(ArrayObject, page[PG.ANNOTS])): 

2028 an = cast(ArrayObject, page[PG.ANNOTS])[i] 

2029 obj = cast(DictionaryObject, an.get_object()) 

2030 if subtypes is None or cast(str, obj["/Subtype"]) in subtypes: 

2031 if isinstance(an, IndirectObject): 

2032 self._objects[an.idnum - 1] = NullObject() # to reduce PDF size 

2033 del page[PG.ANNOTS][i] # type:ignore 

2034 else: 

2035 i += 1 

2036 

2037 def remove_objects_from_page( 

2038 self, 

2039 page: Union[PageObject, DictionaryObject], 

2040 to_delete: Union[ObjectDeletionFlag, Iterable[ObjectDeletionFlag]], 

2041 text_filters: Optional[dict[str, Any]] = None 

2042 ) -> None: 

2043 """ 

2044 Remove objects specified by ``to_delete`` from the given page. 

2045 

2046 Args: 

2047 page: Page object to clean up. 

2048 to_delete: Objects to be deleted; can be a ``ObjectDeletionFlag`` 

2049 or a list of ObjectDeletionFlag 

2050 text_filters: Properties of text to be deleted, if applicable. Optional. 

2051 This is a Python dictionary with the following properties: 

2052 

2053 * font_ids: List of font resource IDs (such as /F1 or /T1_0) to be deleted. 

2054 

2055 """ 

2056 if isinstance(to_delete, (list, tuple)): 

2057 for to_d in to_delete: 

2058 self.remove_objects_from_page(page, to_d) 

2059 return None 

2060 assert isinstance(to_delete, ObjectDeletionFlag) 

2061 

2062 if to_delete & ObjectDeletionFlag.LINKS: 

2063 return self._remove_annots_from_page(page, ("/Link",)) 

2064 if to_delete & ObjectDeletionFlag.ATTACHMENTS: 

2065 return self._remove_annots_from_page( 

2066 page, ("/FileAttachment", "/Sound", "/Movie", "/Screen") 

2067 ) 

2068 if to_delete & ObjectDeletionFlag.OBJECTS_3D: 

2069 return self._remove_annots_from_page(page, ("/3D",)) 

2070 if to_delete & ObjectDeletionFlag.ALL_ANNOTATIONS: 

2071 return self._remove_annots_from_page(page, None) 

2072 

2073 jump_operators = [] 

2074 if to_delete & ObjectDeletionFlag.DRAWING_IMAGES: 

2075 jump_operators = ( 

2076 [ 

2077 b"w", b"J", b"j", b"M", b"d", b"i", 

2078 b"W", b"W*", 

2079 b"b", b"b*", b"B", b"B*", b"S", b"s", b"f", b"f*", b"F", b"n", 

2080 b"m", b"l", b"c", b"v", b"y", b"h", b"re", 

2081 b"sh" 

2082 ] 

2083 ) 

2084 if to_delete & ObjectDeletionFlag.TEXT: 

2085 jump_operators = [b"Tj", b"TJ", b"'", b'"'] 

2086 

2087 def clean( 

2088 content: ContentStream, 

2089 images: list[str], 

2090 forms: list[str], 

2091 text_filters: Optional[dict[str, Any]] = None 

2092 ) -> None: 

2093 nonlocal jump_operators, to_delete 

2094 

2095 font_id = None 

2096 font_ids_to_delete = [] 

2097 if text_filters and to_delete & ObjectDeletionFlag.TEXT: 

2098 font_ids_to_delete = text_filters.get("font_ids", []) 

2099 

2100 i = 0 

2101 while i < len(content.operations): 

2102 operands, operator = content.operations[i] 

2103 if operator == b"Tf": 

2104 font_id = operands[0] 

2105 if ( 

2106 ( 

2107 operator == b"INLINE IMAGE" 

2108 and (to_delete & ObjectDeletionFlag.INLINE_IMAGES) 

2109 ) 

2110 or (operator in jump_operators) 

2111 or ( 

2112 operator == b"Do" 

2113 and (to_delete & ObjectDeletionFlag.XOBJECT_IMAGES) 

2114 and (operands[0] in images) 

2115 ) 

2116 ): 

2117 if ( 

2118 not to_delete & ObjectDeletionFlag.TEXT 

2119 or (to_delete & ObjectDeletionFlag.TEXT and not text_filters) 

2120 or (to_delete & ObjectDeletionFlag.TEXT and font_id in font_ids_to_delete) 

2121 ): 

2122 del content.operations[i] 

2123 else: 

2124 i += 1 

2125 else: 

2126 i += 1 

2127 content.get_data() # this ensures ._data is rebuilt from the .operations 

2128 

2129 def clean_forms( 

2130 elt: DictionaryObject, stack: list[DictionaryObject] 

2131 ) -> tuple[list[str], list[str]]: 

2132 nonlocal to_delete 

2133 # elt in recursive call is a new ContentStream object, so we have to check the indirect_reference 

2134 if (elt in stack) or ( 

2135 hasattr(elt, "indirect_reference") 

2136 and any( 

2137 elt.indirect_reference == getattr(x, "indirect_reference", -1) 

2138 for x in stack 

2139 ) 

2140 ): 

2141 # to prevent infinite looping 

2142 return [], [] # pragma: no cover 

2143 try: 

2144 d = cast( 

2145 dict[Any, Any], 

2146 cast(DictionaryObject, elt["/Resources"])["/XObject"], 

2147 ) 

2148 except KeyError: 

2149 d = {} 

2150 images = [] 

2151 forms = [] 

2152 for k, v in d.items(): 

2153 o = v.get_object() 

2154 try: 

2155 content: Any = None 

2156 if ( 

2157 to_delete & ObjectDeletionFlag.XOBJECT_IMAGES 

2158 and o["/Subtype"] == "/Image" 

2159 ): 

2160 content = NullObject() # to delete the image keeping the entry 

2161 images.append(k) 

2162 if o["/Subtype"] == "/Form": 

2163 forms.append(k) 

2164 if isinstance(o, ContentStream): 

2165 content = o 

2166 else: 

2167 content = ContentStream(o, self) 

2168 content.update( 

2169 { 

2170 k1: v1 

2171 for k1, v1 in o.items() 

2172 if k1 not in ["/Length", "/Filter", "/DecodeParms"] 

2173 } 

2174 ) 

2175 try: 

2176 content.indirect_reference = o.indirect_reference 

2177 except AttributeError: # pragma: no cover 

2178 pass 

2179 stack.append(elt) 

2180 clean_forms(content, stack) # clean subforms 

2181 if content is not None: 

2182 if isinstance(v, IndirectObject): 

2183 self._objects[v.idnum - 1] = content 

2184 else: 

2185 # should only occur in a PDF not respecting PDF spec 

2186 # where streams must be indirected. 

2187 d[k] = self._add_object(content) # pragma: no cover 

2188 except (TypeError, KeyError): 

2189 pass 

2190 for im in images: 

2191 del d[im] # for clean-up 

2192 if isinstance(elt, StreamObject): # for /Form 

2193 if not isinstance(elt, ContentStream): # pragma: no cover 

2194 e = ContentStream(elt, self) 

2195 e.update(elt.items()) 

2196 elt = e 

2197 clean(elt, images, forms, text_filters) # clean the content 

2198 return images, forms 

2199 

2200 if not isinstance(page, PageObject): 

2201 page = PageObject(self, page.indirect_reference) # pragma: no cover 

2202 if "/Contents" in page: 

2203 content = cast(ContentStream, page.get_contents()) 

2204 

2205 images, forms = clean_forms(page, []) 

2206 

2207 clean(content, images, forms, text_filters) 

2208 page.replace_contents(content) 

2209 

2210 def remove_images( 

2211 self, 

2212 to_delete: ImageType = ImageType.ALL, 

2213 ) -> None: 

2214 """ 

2215 Remove images from this output. 

2216 

2217 Args: 

2218 to_delete: The type of images to be deleted 

2219 (default = all images types) 

2220 

2221 """ 

2222 if isinstance(to_delete, bool): 

2223 to_delete = ImageType.ALL 

2224 

2225 i = ObjectDeletionFlag.NONE 

2226 

2227 for image in ("XOBJECT_IMAGES", "INLINE_IMAGES", "DRAWING_IMAGES"): 

2228 if to_delete & ImageType[image]: 

2229 i |= ObjectDeletionFlag[image] 

2230 

2231 for page in self.pages: 

2232 self.remove_objects_from_page(page, i) 

2233 

2234 def remove_text(self, font_names: Optional[list[str]] = None) -> None: 

2235 """ 

2236 Remove text from the PDF. 

2237 

2238 Args: 

2239 font_names: List of font names to remove, such as "Helvetica-Bold". 

2240 Optional. If not specified, all text will be removed. 

2241 """ 

2242 if not font_names: 

2243 font_names = [] 

2244 

2245 for page in self.pages: 

2246 resource_ids_to_remove = [] 

2247 

2248 # Content streams reference fonts and other resources with names like "/F1" or "/T1_0" 

2249 # Font names need to be converted to resource names/IDs for easier removal 

2250 if font_names: 

2251 # Recursively loop through page objects to gather font info 

2252 def get_font_info( 

2253 obj: Any, 

2254 font_info: Optional[dict[str, Any]] = None, 

2255 key: Optional[str] = None 

2256 ) -> dict[str, Any]: 

2257 if font_info is None: 

2258 font_info = {} 

2259 if isinstance(obj, IndirectObject): 

2260 obj = obj.get_object() 

2261 if isinstance(obj, dict): 

2262 if obj.get("/Type") == "/Font": 

2263 font_name = obj.get("/BaseFont", "") 

2264 # Normalize font names like "/RRXFFV+Palatino-Bold" to "Palatino-Bold" 

2265 normalized_font_name = font_name.lstrip("/").split("+")[-1] 

2266 if normalized_font_name not in font_info: 

2267 font_info[normalized_font_name] = { 

2268 "normalized_font_name": normalized_font_name, 

2269 "resource_ids": [], 

2270 } 

2271 if key not in font_info[normalized_font_name]["resource_ids"]: 

2272 font_info[normalized_font_name]["resource_ids"].append(key) 

2273 for k in obj: 

2274 font_info = get_font_info(obj[k], font_info, k) 

2275 elif isinstance(obj, (list, ArrayObject)): 

2276 for child_obj in obj: 

2277 font_info = get_font_info(child_obj, font_info) 

2278 return font_info 

2279 

2280 # Add relevant resource names for removal 

2281 font_info = get_font_info(page.get("/Resources")) 

2282 for font_name in font_names: 

2283 if font_name in font_info: 

2284 resource_ids_to_remove.extend(font_info[font_name]["resource_ids"]) 

2285 

2286 text_filters = {} 

2287 if font_names: 

2288 text_filters["font_ids"] = resource_ids_to_remove 

2289 self.remove_objects_from_page(page, ObjectDeletionFlag.TEXT, text_filters=text_filters) 

2290 

2291 def add_uri( 

2292 self, 

2293 page_number: int, 

2294 uri: str, 

2295 rect: RectangleObject, 

2296 border: Optional[ArrayObject] = None, 

2297 ) -> None: 

2298 """ 

2299 Add an URI from a rectangular area to the specified page. 

2300 

2301 Args: 

2302 page_number: index of the page on which to place the URI action. 

2303 uri: URI of resource to link to. 

2304 rect: :class:`RectangleObject<pypdf.generic.RectangleObject>` or 

2305 array of four integers specifying the clickable rectangular area 

2306 ``[xLL, yLL, xUR, yUR]``, or string in the form 

2307 ``"[ xLL yLL xUR yUR ]"``. 

2308 border: if provided, an array describing border-drawing 

2309 properties. See the PDF spec for details. No border will be 

2310 drawn if this argument is omitted. 

2311 

2312 """ 

2313 page_link = self.get_object(self._pages)[PagesAttributes.KIDS][page_number] # type: ignore 

2314 page_ref = cast(dict[str, Any], self.get_object(page_link)) 

2315 

2316 border_arr: BorderArrayType 

2317 if border is not None: 

2318 border_arr = [NumberObject(n) for n in border[:3]] 

2319 if len(border) == 4: 

2320 dash_pattern = ArrayObject([NumberObject(n) for n in border[3]]) 

2321 border_arr.append(dash_pattern) 

2322 else: 

2323 border_arr = [NumberObject(2), NumberObject(2), NumberObject(2)] 

2324 

2325 if isinstance(rect, str): 

2326 rect = NumberObject(rect) 

2327 elif isinstance(rect, RectangleObject): 

2328 pass 

2329 else: 

2330 rect = RectangleObject(rect) 

2331 

2332 lnk2 = DictionaryObject() 

2333 lnk2.update( 

2334 { 

2335 NameObject("/S"): NameObject("/URI"), 

2336 NameObject("/URI"): TextStringObject(uri), 

2337 } 

2338 ) 

2339 lnk = DictionaryObject() 

2340 lnk.update( 

2341 { 

2342 NameObject(AA.Type): NameObject("/Annot"), 

2343 NameObject(AA.Subtype): NameObject("/Link"), 

2344 NameObject(AA.P): page_link, 

2345 NameObject(AA.Rect): rect, 

2346 NameObject("/H"): NameObject("/I"), 

2347 NameObject(AA.Border): ArrayObject(border_arr), 

2348 NameObject("/A"): lnk2, 

2349 } 

2350 ) 

2351 lnk_ref = self._add_object(lnk) 

2352 

2353 if PG.ANNOTS in page_ref: 

2354 page_ref[PG.ANNOTS].append(lnk_ref) 

2355 else: 

2356 page_ref[NameObject(PG.ANNOTS)] = ArrayObject([lnk_ref]) 

2357 

2358 _valid_layouts = ( 

2359 "/NoLayout", 

2360 "/SinglePage", 

2361 "/OneColumn", 

2362 "/TwoColumnLeft", 

2363 "/TwoColumnRight", 

2364 "/TwoPageLeft", 

2365 "/TwoPageRight", 

2366 ) 

2367 

2368 def _get_page_layout(self) -> Optional[LayoutType]: 

2369 try: 

2370 return cast(LayoutType, self._root_object["/PageLayout"]) 

2371 except KeyError: 

2372 return None 

2373 

2374 def _set_page_layout(self, layout: Union[NameObject, LayoutType]) -> None: 

2375 """ 

2376 Set the page layout. 

2377 

2378 Args: 

2379 layout: The page layout to be used. 

2380 

2381 .. list-table:: Valid ``layout`` arguments 

2382 :widths: 50 200 

2383 

2384 * - /NoLayout 

2385 - Layout explicitly not specified 

2386 * - /SinglePage 

2387 - Show one page at a time 

2388 * - /OneColumn 

2389 - Show one column at a time 

2390 * - /TwoColumnLeft 

2391 - Show pages in two columns, odd-numbered pages on the left 

2392 * - /TwoColumnRight 

2393 - Show pages in two columns, odd-numbered pages on the right 

2394 * - /TwoPageLeft 

2395 - Show two pages at a time, odd-numbered pages on the left 

2396 * - /TwoPageRight 

2397 - Show two pages at a time, odd-numbered pages on the right 

2398 

2399 """ 

2400 if not isinstance(layout, NameObject): 

2401 if layout not in self._valid_layouts: 

2402 logger_warning( 

2403 f"Layout should be one of: {'', ''.join(self._valid_layouts)}", 

2404 __name__, 

2405 ) 

2406 layout = NameObject(layout) 

2407 self._root_object.update({NameObject("/PageLayout"): layout}) 

2408 

2409 def set_page_layout(self, layout: LayoutType) -> None: 

2410 """ 

2411 Set the page layout. 

2412 

2413 Args: 

2414 layout: The page layout to be used 

2415 

2416 .. list-table:: Valid ``layout`` arguments 

2417 :widths: 50 200 

2418 

2419 * - /NoLayout 

2420 - Layout explicitly not specified 

2421 * - /SinglePage 

2422 - Show one page at a time 

2423 * - /OneColumn 

2424 - Show one column at a time 

2425 * - /TwoColumnLeft 

2426 - Show pages in two columns, odd-numbered pages on the left 

2427 * - /TwoColumnRight 

2428 - Show pages in two columns, odd-numbered pages on the right 

2429 * - /TwoPageLeft 

2430 - Show two pages at a time, odd-numbered pages on the left 

2431 * - /TwoPageRight 

2432 - Show two pages at a time, odd-numbered pages on the right 

2433 

2434 """ 

2435 self._set_page_layout(layout) 

2436 

2437 @property 

2438 def page_layout(self) -> Optional[LayoutType]: 

2439 """ 

2440 Page layout property. 

2441 

2442 .. list-table:: Valid ``layout`` values 

2443 :widths: 50 200 

2444 

2445 * - /NoLayout 

2446 - Layout explicitly not specified 

2447 * - /SinglePage 

2448 - Show one page at a time 

2449 * - /OneColumn 

2450 - Show one column at a time 

2451 * - /TwoColumnLeft 

2452 - Show pages in two columns, odd-numbered pages on the left 

2453 * - /TwoColumnRight 

2454 - Show pages in two columns, odd-numbered pages on the right 

2455 * - /TwoPageLeft 

2456 - Show two pages at a time, odd-numbered pages on the left 

2457 * - /TwoPageRight 

2458 - Show two pages at a time, odd-numbered pages on the right 

2459 """ 

2460 return self._get_page_layout() 

2461 

2462 @page_layout.setter 

2463 def page_layout(self, layout: LayoutType) -> None: 

2464 self._set_page_layout(layout) 

2465 

2466 _valid_modes = ( 

2467 "/UseNone", 

2468 "/UseOutlines", 

2469 "/UseThumbs", 

2470 "/FullScreen", 

2471 "/UseOC", 

2472 "/UseAttachments", 

2473 ) 

2474 

2475 def _get_page_mode(self) -> Optional[PagemodeType]: 

2476 try: 

2477 return cast(PagemodeType, self._root_object["/PageMode"]) 

2478 except KeyError: 

2479 return None 

2480 

2481 @property 

2482 def page_mode(self) -> Optional[PagemodeType]: 

2483 """ 

2484 Page mode property. 

2485 

2486 .. list-table:: Valid ``mode`` values 

2487 :widths: 50 200 

2488 

2489 * - /UseNone 

2490 - Do not show outline or thumbnails panels 

2491 * - /UseOutlines 

2492 - Show outline (aka bookmarks) panel 

2493 * - /UseThumbs 

2494 - Show page thumbnails panel 

2495 * - /FullScreen 

2496 - Fullscreen view 

2497 * - /UseOC 

2498 - Show Optional Content Group (OCG) panel 

2499 * - /UseAttachments 

2500 - Show attachments panel 

2501 """ 

2502 return self._get_page_mode() 

2503 

2504 @page_mode.setter 

2505 def page_mode(self, mode: PagemodeType) -> None: 

2506 if isinstance(mode, NameObject): 

2507 mode_name: NameObject = mode 

2508 else: 

2509 if mode not in self._valid_modes: 

2510 logger_warning( 

2511 f"Mode should be one of: {', '.join(self._valid_modes)}", __name__ 

2512 ) 

2513 mode_name = NameObject(mode) 

2514 self._root_object.update({NameObject("/PageMode"): mode_name}) 

2515 

2516 def add_annotation( 

2517 self, 

2518 page_number: Union[int, PageObject], 

2519 annotation: dict[str, Any], 

2520 ) -> DictionaryObject: 

2521 """ 

2522 Add a single annotation to the page. 

2523 The added annotation must be a new annotation. 

2524 It cannot be recycled. 

2525 

2526 Args: 

2527 page_number: PageObject or page index. 

2528 annotation: Annotation to be added (created with annotation). 

2529 

2530 Returns: 

2531 The inserted object. 

2532 This can be used for popup creation, for example. 

2533 

2534 """ 

2535 page = page_number 

2536 if isinstance(page, int): 

2537 page = self.pages[page] 

2538 elif not isinstance(page, PageObject): 

2539 raise TypeError("page: invalid type") 

2540 

2541 to_add = cast(DictionaryObject, _pdf_objectify(annotation)) 

2542 to_add[NameObject("/P")] = page.indirect_reference 

2543 

2544 if page.annotations is None: 

2545 page[NameObject("/Annots")] = ArrayObject() 

2546 assert page.annotations is not None 

2547 

2548 # Internal link annotations need the correct object type for the 

2549 # destination 

2550 if to_add.get("/Subtype") == "/Link" and "/Dest" in to_add: 

2551 tmp = cast(dict[Any, Any], to_add[NameObject("/Dest")]) 

2552 dest = Destination( 

2553 NameObject("/LinkName"), 

2554 tmp["target_page_index"], 

2555 Fit( 

2556 fit_type=tmp["fit"], fit_args=dict(tmp)["fit_args"] 

2557 ), # I have no clue why this dict-hack is necessary 

2558 ) 

2559 to_add[NameObject("/Dest")] = dest.dest_array 

2560 

2561 page.annotations.append(self._add_object(to_add)) 

2562 

2563 if to_add.get("/Subtype") == "/Popup" and NameObject("/Parent") in to_add: 

2564 cast(DictionaryObject, to_add["/Parent"].get_object())[ 

2565 NameObject("/Popup") 

2566 ] = to_add.indirect_reference 

2567 

2568 return to_add 

2569 

2570 def clean_page(self, page: Union[PageObject, IndirectObject]) -> PageObject: 

2571 """ 

2572 Perform some clean up in the page. 

2573 Currently: convert NameObject named destination to TextStringObject 

2574 (required for names/dests list) 

2575 

2576 Args: 

2577 page: 

2578 

2579 Returns: 

2580 The cleaned PageObject 

2581 

2582 """ 

2583 page = cast("PageObject", page.get_object()) 

2584 for a in page.get("/Annots", []): 

2585 a_obj = a.get_object() 

2586 d = a_obj.get("/Dest", None) 

2587 act = a_obj.get("/A", None) 

2588 if isinstance(d, NameObject): 

2589 a_obj[NameObject("/Dest")] = TextStringObject(d) 

2590 elif act is not None: 

2591 act = act.get_object() 

2592 d = act.get("/D", None) 

2593 if isinstance(d, NameObject): 

2594 act[NameObject("/D")] = TextStringObject(d) 

2595 return page 

2596 

2597 def _create_stream( 

2598 self, fileobj: Union[Path, StrByteType, PdfReader] 

2599 ) -> tuple[IOBase, Optional[Encryption]]: 

2600 # If the fileobj parameter is a string, assume it is a path 

2601 # and create a file object at that location. If it is a file, 

2602 # copy the file's contents into a BytesIO stream object; if 

2603 # it is a PdfReader, copy that reader's stream into a 

2604 # BytesIO stream. 

2605 # If fileobj is none of the above types, it is not modified 

2606 encryption_obj = None 

2607 stream: IOBase 

2608 if isinstance(fileobj, (str, Path)): 

2609 with FileIO(fileobj, "rb") as f: 

2610 stream = BytesIO(f.read()) 

2611 elif isinstance(fileobj, PdfReader): 

2612 if fileobj._encryption: 

2613 encryption_obj = fileobj._encryption 

2614 orig_tell = fileobj.stream.tell() 

2615 fileobj.stream.seek(0) 

2616 stream = BytesIO(fileobj.stream.read()) 

2617 

2618 # reset the stream to its original location 

2619 fileobj.stream.seek(orig_tell) 

2620 elif hasattr(fileobj, "seek") and hasattr(fileobj, "read"): 

2621 fileobj.seek(0) 

2622 filecontent = fileobj.read() 

2623 stream = BytesIO(filecontent) 

2624 else: 

2625 raise NotImplementedError( 

2626 "Merging requires an object that PdfReader can parse. " 

2627 "Typically, that is a Path or a string representing a Path, " 

2628 "a file object, or an object implementing .seek and .read. " 

2629 "Passing a PdfReader directly works as well." 

2630 ) 

2631 return stream, encryption_obj 

2632 

2633 def append( 

2634 self, 

2635 fileobj: Union[StrByteType, PdfReader, Path], 

2636 outline_item: Union[ 

2637 str, None, PageRange, tuple[int, int], tuple[int, int, int], list[int] 

2638 ] = None, 

2639 pages: Union[ 

2640 None, 

2641 PageRange, 

2642 tuple[int, int], 

2643 tuple[int, int, int], 

2644 list[int], 

2645 list[PageObject], 

2646 ] = None, 

2647 import_outline: bool = True, 

2648 excluded_fields: Optional[Union[list[str], tuple[str, ...]]] = None, 

2649 ) -> None: 

2650 """ 

2651 Identical to the :meth:`merge()<merge>` method, but assumes you want to 

2652 concatenate all pages onto the end of the file instead of specifying a 

2653 position. 

2654 

2655 Args: 

2656 fileobj: A File Object or an object that supports the standard 

2657 read and seek methods similar to a File Object. Could also be a 

2658 string representing a path to a PDF file. 

2659 outline_item: Optionally, you may specify a string to build an 

2660 outline (aka 'bookmark') to identify the beginning of the 

2661 included file. 

2662 pages: Can be a :class:`PageRange<pypdf.pagerange.PageRange>` 

2663 or a ``(start, stop[, step])`` tuple 

2664 or a list of pages to be processed 

2665 to merge only the specified range of pages from the source 

2666 document into the output document. 

2667 import_outline: You may prevent the source document's 

2668 outline (collection of outline items, previously referred to as 

2669 'bookmarks') from being imported by specifying this as ``False``. 

2670 excluded_fields: Provide the list of fields/keys to be ignored 

2671 if ``/Annots`` is part of the list, the annotation will be ignored 

2672 if ``/B`` is part of the list, the articles will be ignored 

2673 

2674 """ 

2675 if excluded_fields is None: 

2676 excluded_fields = () 

2677 if isinstance(outline_item, (tuple, list, PageRange)): 

2678 if isinstance(pages, bool): 

2679 if not isinstance(import_outline, bool): 

2680 excluded_fields = import_outline 

2681 import_outline = pages 

2682 pages = outline_item 

2683 self.merge( 

2684 None, 

2685 fileobj, 

2686 None, 

2687 pages, 

2688 import_outline, 

2689 excluded_fields, 

2690 ) 

2691 else: # if isinstance(outline_item, str): 

2692 self.merge( 

2693 None, 

2694 fileobj, 

2695 outline_item, 

2696 pages, 

2697 import_outline, 

2698 excluded_fields, 

2699 ) 

2700 

2701 def merge( 

2702 self, 

2703 position: Optional[int], 

2704 fileobj: Union[Path, StrByteType, PdfReader], 

2705 outline_item: Optional[str] = None, 

2706 pages: Optional[Union[PageRangeSpec, list[PageObject]]] = None, 

2707 import_outline: bool = True, 

2708 excluded_fields: Optional[Union[list[str], tuple[str, ...]]] = (), 

2709 ) -> None: 

2710 """ 

2711 Merge the pages from the given file into the output file at the 

2712 specified page number. 

2713 

2714 Args: 

2715 position: The *page number* to insert this file. File will 

2716 be inserted after the given number. 

2717 fileobj: A File Object or an object that supports the standard 

2718 read and seek methods similar to a File Object. Could also be a 

2719 string representing a path to a PDF file. 

2720 outline_item: Optionally, you may specify a string to build an outline 

2721 (aka 'bookmark') to identify the 

2722 beginning of the included file. 

2723 pages: can be a :class:`PageRange<pypdf.pagerange.PageRange>` 

2724 or a ``(start, stop[, step])`` tuple 

2725 or a list of pages to be processed 

2726 to merge only the specified range of pages from the source 

2727 document into the output document. 

2728 import_outline: You may prevent the source document's 

2729 outline (collection of outline items, previously referred to as 

2730 'bookmarks') from being imported by specifying this as ``False``. 

2731 excluded_fields: provide the list of fields/keys to be ignored 

2732 if ``/Annots`` is part of the list, the annotation will be ignored 

2733 if ``/B`` is part of the list, the articles will be ignored 

2734 

2735 Raises: 

2736 TypeError: The pages attribute is not configured properly 

2737 

2738 """ 

2739 if isinstance(fileobj, PdfDocCommon): 

2740 reader = fileobj 

2741 else: 

2742 stream, _encryption_obj = self._create_stream(fileobj) 

2743 # Create a new PdfReader instance using the stream 

2744 # (either file or BytesIO or StringIO) created above 

2745 reader = PdfReader(stream, strict=False) # type: ignore[arg-type] 

2746 

2747 if excluded_fields is None: 

2748 excluded_fields = () 

2749 # Find the range of pages to merge. 

2750 if pages is None: 

2751 pages = list(range(len(reader.pages))) 

2752 elif isinstance(pages, PageRange): 

2753 pages = list(range(*pages.indices(len(reader.pages)))) 

2754 elif isinstance(pages, list): 

2755 pass # keep unchanged 

2756 elif isinstance(pages, tuple) and len(pages) <= 3: 

2757 pages = list(range(*pages)) 

2758 elif not isinstance(pages, tuple): 

2759 raise TypeError( 

2760 '"pages" must be a tuple of (start, stop[, step]) or a list' 

2761 ) 

2762 

2763 srcpages = {} 

2764 for page in pages: 

2765 if isinstance(page, PageObject): 

2766 pg = page 

2767 else: 

2768 pg = reader.pages[page] 

2769 assert pg.indirect_reference is not None 

2770 if position is None: 

2771 # numbers in the exclude list identifies that the exclusion is 

2772 # only applicable to 1st level of cloning 

2773 srcpages[pg.indirect_reference.idnum] = self.add_page( 

2774 pg, [*list(excluded_fields), 1, "/B", 1, "/Annots"] # type: ignore 

2775 ) 

2776 else: 

2777 srcpages[pg.indirect_reference.idnum] = self.insert_page( 

2778 pg, position, [*list(excluded_fields), 1, "/B", 1, "/Annots"] # type: ignore 

2779 ) 

2780 position += 1 

2781 srcpages[pg.indirect_reference.idnum].original_page = pg 

2782 

2783 reader._named_destinations = ( 

2784 reader.named_destinations 

2785 ) # need for the outline processing below 

2786 

2787 arr: Any 

2788 

2789 def _process_named_dests(dest: Any) -> None: 

2790 arr = dest.dest_array 

2791 if "/Names" in self._root_object and dest["/Title"] in cast( 

2792 list[Any], 

2793 cast( 

2794 DictionaryObject, 

2795 cast(DictionaryObject, self._root_object["/Names"]).get("/Dests", DictionaryObject()), 

2796 ).get("/Names", DictionaryObject()), 

2797 ): 

2798 # already exists: should not duplicate it 

2799 pass 

2800 elif dest["/Page"] is None or isinstance(dest["/Page"], NullObject): 

2801 pass 

2802 elif isinstance(dest["/Page"], int): 

2803 # the page reference is a page number normally not a PDF Reference 

2804 # page numbers as int are normally accepted only in external goto 

2805 try: 

2806 p = reader.pages[dest["/Page"]] 

2807 except IndexError: 

2808 return 

2809 assert p.indirect_reference is not None 

2810 try: 

2811 arr[NumberObject(0)] = NumberObject( 

2812 srcpages[p.indirect_reference.idnum].page_number 

2813 ) 

2814 self.add_named_destination_array(dest["/Title"], arr) 

2815 except KeyError: 

2816 pass 

2817 elif dest["/Page"].indirect_reference.idnum in srcpages: 

2818 arr[NumberObject(0)] = srcpages[ 

2819 dest["/Page"].indirect_reference.idnum 

2820 ].indirect_reference 

2821 self.add_named_destination_array(dest["/Title"], arr) 

2822 

2823 for dest in reader._named_destinations.values(): 

2824 _process_named_dests(dest) 

2825 

2826 outline_item_typ: TreeObject 

2827 if outline_item is not None: 

2828 outline_item_typ = cast( 

2829 "TreeObject", 

2830 self.add_outline_item( 

2831 TextStringObject(outline_item), 

2832 next(iter(srcpages.values())).indirect_reference, 

2833 fit=PAGE_FIT, 

2834 ).get_object(), 

2835 ) 

2836 else: 

2837 outline_item_typ = self.get_outline_root() 

2838 

2839 _ro = reader.root_object 

2840 if import_outline and CO.OUTLINES in _ro: 

2841 outline = self._get_filtered_outline( 

2842 _ro.get(CO.OUTLINES, None), srcpages, reader 

2843 ) 

2844 self._insert_filtered_outline( 

2845 outline, outline_item_typ, None 

2846 ) # TODO: use before parameter 

2847 

2848 if "/Annots" not in excluded_fields: 

2849 for pag in srcpages.values(): 

2850 lst = self._insert_filtered_annotations( 

2851 pag.original_page.get("/Annots", []), pag, srcpages, reader 

2852 ) 

2853 if len(lst) > 0: 

2854 pag[NameObject("/Annots")] = lst 

2855 self.clean_page(pag) 

2856 

2857 if "/AcroForm" in _ro and _ro["/AcroForm"] is not None: 

2858 if "/AcroForm" not in self._root_object: 

2859 self._root_object[NameObject("/AcroForm")] = self._add_object( 

2860 cast( 

2861 DictionaryObject, 

2862 reader.root_object["/AcroForm"], 

2863 ).clone(self, False, ("/Fields",)) 

2864 ) 

2865 arr = ArrayObject() 

2866 else: 

2867 arr = cast( 

2868 ArrayObject, 

2869 cast(DictionaryObject, self._root_object["/AcroForm"])["/Fields"], 

2870 ) 

2871 trslat = self._id_translated[id(reader)] 

2872 try: 

2873 for f in reader.root_object["/AcroForm"]["/Fields"]: # type: ignore 

2874 try: 

2875 ind = IndirectObject(trslat[f.idnum], 0, self) 

2876 if ind not in arr: 

2877 arr.append(ind) 

2878 except KeyError: 

2879 # for trslat[] which mean the field has not be copied 

2880 # through the page 

2881 pass 

2882 except KeyError: # for /Acroform or /Fields are not existing 

2883 arr = self._add_object(ArrayObject()) 

2884 cast(DictionaryObject, self._root_object["/AcroForm"])[ 

2885 NameObject("/Fields") 

2886 ] = arr 

2887 

2888 if "/B" not in excluded_fields: 

2889 self.add_filtered_articles("", srcpages, reader) 

2890 

2891 def _add_articles_thread( 

2892 self, 

2893 thread: DictionaryObject, # thread entry from the reader's array of threads 

2894 pages: dict[int, PageObject], 

2895 reader: PdfReader, 

2896 ) -> IndirectObject: 

2897 """ 

2898 Clone the thread with only the applicable articles. 

2899 

2900 Args: 

2901 thread: 

2902 pages: 

2903 reader: 

2904 

2905 Returns: 

2906 The added thread as an indirect reference 

2907 

2908 """ 

2909 nthread = thread.clone( 

2910 self, force_duplicate=True, ignore_fields=("/F",) 

2911 ) # use of clone to keep link between reader and writer 

2912 self.threads.append(nthread.indirect_reference) 

2913 first_article = cast("DictionaryObject", thread["/F"]) 

2914 current_article: Optional[DictionaryObject] = first_article 

2915 new_article: Optional[DictionaryObject] = None 

2916 while current_article is not None: 

2917 pag = self._get_cloned_page( 

2918 cast("PageObject", current_article["/P"]), pages, reader 

2919 ) 

2920 if pag is not None: 

2921 if new_article is None: 

2922 new_article = cast( 

2923 "DictionaryObject", 

2924 self._add_object(DictionaryObject()).get_object(), 

2925 ) 

2926 new_first = new_article 

2927 nthread[NameObject("/F")] = new_article.indirect_reference 

2928 else: 

2929 new_article2 = cast( 

2930 "DictionaryObject", 

2931 self._add_object( 

2932 DictionaryObject( 

2933 {NameObject("/V"): new_article.indirect_reference} 

2934 ) 

2935 ).get_object(), 

2936 ) 

2937 new_article[NameObject("/N")] = new_article2.indirect_reference 

2938 new_article = new_article2 

2939 new_article[NameObject("/P")] = pag 

2940 new_article[NameObject("/T")] = nthread.indirect_reference 

2941 new_article[NameObject("/R")] = current_article["/R"] 

2942 pag_obj = cast("PageObject", pag.get_object()) 

2943 if "/B" not in pag_obj: 

2944 pag_obj[NameObject("/B")] = ArrayObject() 

2945 cast("ArrayObject", pag_obj["/B"]).append( 

2946 new_article.indirect_reference 

2947 ) 

2948 current_article = cast("DictionaryObject", current_article["/N"]) 

2949 if current_article == first_article: 

2950 new_article[NameObject("/N")] = new_first.indirect_reference # type: ignore 

2951 new_first[NameObject("/V")] = new_article.indirect_reference # type: ignore 

2952 current_article = None 

2953 assert nthread.indirect_reference is not None 

2954 return nthread.indirect_reference 

2955 

2956 def add_filtered_articles( 

2957 self, 

2958 fltr: Union[ 

2959 Pattern[Any], str 

2960 ], # thread entry from the reader's array of threads 

2961 pages: dict[int, PageObject], 

2962 reader: PdfReader, 

2963 ) -> None: 

2964 """ 

2965 Add articles matching the defined criteria. 

2966 

2967 Args: 

2968 fltr: 

2969 pages: 

2970 reader: 

2971 

2972 """ 

2973 if isinstance(fltr, str): 

2974 fltr = re.compile(fltr) 

2975 elif not isinstance(fltr, Pattern): 

2976 fltr = re.compile("") 

2977 for p in pages.values(): 

2978 pp = p.original_page 

2979 for a in pp.get("/B", ()): 

2980 thr = a.get_object().get("/T") 

2981 if thr is None: 

2982 continue 

2983 thr = thr.get_object() 

2984 if thr.indirect_reference.idnum not in self._id_translated[ 

2985 id(reader) 

2986 ] and fltr.search((thr.get("/I", {})).get("/Title", "")): 

2987 self._add_articles_thread(thr, pages, reader) 

2988 

2989 def _get_cloned_page( 

2990 self, 

2991 page: Union[None, IndirectObject, PageObject, NullObject], 

2992 pages: dict[int, PageObject], 

2993 reader: PdfReader, 

2994 ) -> Optional[IndirectObject]: 

2995 if isinstance(page, NullObject): 

2996 return None 

2997 if isinstance(page, DictionaryObject) and page.get("/Type", "") == "/Page": 

2998 _i = page.indirect_reference 

2999 elif isinstance(page, IndirectObject): 

3000 _i = page 

3001 try: 

3002 return pages[_i.idnum].indirect_reference # type: ignore 

3003 except Exception: 

3004 return None 

3005 

3006 def _insert_filtered_annotations( 

3007 self, 

3008 annots: Union[IndirectObject, list[DictionaryObject], None], 

3009 page: PageObject, 

3010 pages: dict[int, PageObject], 

3011 reader: PdfReader, 

3012 ) -> list[Destination]: 

3013 outlist = ArrayObject() 

3014 if isinstance(annots, IndirectObject): 

3015 annots = cast("list[Any]", annots.get_object()) 

3016 if annots is None: 

3017 return outlist 

3018 if not isinstance(annots, list): 

3019 logger_warning(f"Expected list of annotations, got {annots} of type {annots.__class__.__name__}.", __name__) 

3020 return outlist 

3021 for an in annots: 

3022 ano = cast("DictionaryObject", an.get_object()) 

3023 if ( 

3024 ano["/Subtype"] != "/Link" 

3025 or "/A" not in ano 

3026 or cast("DictionaryObject", ano["/A"])["/S"] != "/GoTo" 

3027 or "/Dest" in ano 

3028 ): 

3029 if "/Dest" not in ano: 

3030 outlist.append(self._add_object(ano.clone(self))) 

3031 else: 

3032 d = ano["/Dest"] 

3033 if isinstance(d, str): 

3034 # it is a named dest 

3035 if str(d) in self.get_named_dest_root(): 

3036 outlist.append(ano.clone(self).indirect_reference) 

3037 else: 

3038 d = cast("ArrayObject", d) 

3039 p = self._get_cloned_page(d[0], pages, reader) 

3040 if p is not None: 

3041 anc = ano.clone(self, ignore_fields=("/Dest",)) 

3042 anc[NameObject("/Dest")] = ArrayObject([p, *d[1:]]) 

3043 outlist.append(self._add_object(anc)) 

3044 else: 

3045 d = cast("DictionaryObject", ano["/A"]).get("/D", NullObject()) 

3046 if d is None or isinstance(d, NullObject): 

3047 continue 

3048 if isinstance(d, str): 

3049 # it is a named dest 

3050 if str(d) in self.get_named_dest_root(): 

3051 outlist.append(ano.clone(self).indirect_reference) 

3052 else: 

3053 d = cast("ArrayObject", d) 

3054 p = self._get_cloned_page(d[0], pages, reader) 

3055 if p is not None: 

3056 anc = ano.clone(self, ignore_fields=("/D",)) 

3057 cast("DictionaryObject", anc["/A"])[ 

3058 NameObject("/D") 

3059 ] = ArrayObject([p, *d[1:]]) 

3060 outlist.append(self._add_object(anc)) 

3061 return outlist 

3062 

3063 def _get_filtered_outline( 

3064 self, 

3065 node: Any, 

3066 pages: dict[int, PageObject], 

3067 reader: PdfReader, 

3068 ) -> list[Destination]: 

3069 """ 

3070 Extract outline item entries that are part of the specified page set. 

3071 

3072 Args: 

3073 node: 

3074 pages: 

3075 reader: 

3076 

3077 Returns: 

3078 A list of destination objects. 

3079 

3080 """ 

3081 new_outline = [] 

3082 if node is None: 

3083 node = NullObject() 

3084 node = node.get_object() 

3085 if is_null_or_none(node): 

3086 node = DictionaryObject() 

3087 if node.get("/Type", "") == "/Outlines" or "/Title" not in node: 

3088 node = node.get("/First", None) 

3089 if node is not None: 

3090 node = node.get_object() 

3091 new_outline += self._get_filtered_outline(node, pages, reader) 

3092 else: 

3093 v: Union[None, IndirectObject, NullObject] 

3094 while node is not None: 

3095 node = node.get_object() 

3096 o = cast("Destination", reader._build_outline_item(node)) 

3097 v = self._get_cloned_page(cast("PageObject", o["/Page"]), pages, reader) 

3098 if v is None: 

3099 v = NullObject() 

3100 o[NameObject("/Page")] = v 

3101 if "/First" in node: 

3102 o._filtered_children = self._get_filtered_outline( 

3103 node["/First"], pages, reader 

3104 ) 

3105 else: 

3106 o._filtered_children = [] 

3107 if ( 

3108 not isinstance(o["/Page"], NullObject) 

3109 or len(o._filtered_children) > 0 

3110 ): 

3111 new_outline.append(o) 

3112 node = node.get("/Next", None) 

3113 return new_outline 

3114 

3115 def _clone_outline(self, dest: Destination) -> TreeObject: 

3116 n_ol = TreeObject() 

3117 self._add_object(n_ol) 

3118 n_ol[NameObject("/Title")] = TextStringObject(dest["/Title"]) 

3119 if not isinstance(dest["/Page"], NullObject): 

3120 if dest.node is not None and "/A" in dest.node: 

3121 n_ol[NameObject("/A")] = dest.node["/A"].clone(self) 

3122 else: 

3123 n_ol[NameObject("/Dest")] = dest.dest_array 

3124 # TODO: /SE 

3125 if dest.node is not None: 

3126 n_ol[NameObject("/F")] = NumberObject(dest.node.get("/F", 0)) 

3127 n_ol[NameObject("/C")] = ArrayObject( 

3128 dest.node.get( 

3129 "/C", [FloatObject(0.0), FloatObject(0.0), FloatObject(0.0)] 

3130 ) 

3131 ) 

3132 return n_ol 

3133 

3134 def _insert_filtered_outline( 

3135 self, 

3136 outlines: list[Destination], 

3137 parent: Union[TreeObject, IndirectObject], 

3138 before: Union[None, TreeObject, IndirectObject] = None, 

3139 ) -> None: 

3140 for dest in outlines: 

3141 # TODO: can be improved to keep A and SE entries (ignored for the moment) 

3142 # with np=self.add_outline_item_destination(dest,parent,before) 

3143 if dest.get("/Type", "") == "/Outlines" or "/Title" not in dest: 

3144 np = parent 

3145 else: 

3146 np = self._clone_outline(dest) 

3147 cast(TreeObject, parent.get_object()).insert_child(np, before, self) 

3148 self._insert_filtered_outline(dest._filtered_children, np, None) 

3149 

3150 def close(self) -> None: 

3151 """Implemented for API harmonization.""" 

3152 return 

3153 

3154 def find_outline_item( 

3155 self, 

3156 outline_item: dict[str, Any], 

3157 root: Optional[OutlineType] = None, 

3158 ) -> Optional[list[int]]: 

3159 if root is None: 

3160 o = self.get_outline_root() 

3161 else: 

3162 o = cast("TreeObject", root) 

3163 

3164 i = 0 

3165 while o is not None: 

3166 if ( 

3167 o.indirect_reference == outline_item 

3168 or o.get("/Title", None) == outline_item 

3169 ): 

3170 return [i] 

3171 if "/First" in o: 

3172 res = self.find_outline_item( 

3173 outline_item, cast(OutlineType, o["/First"]) 

3174 ) 

3175 if res: 

3176 return ([i] if "/Title" in o else []) + res 

3177 if "/Next" in o: 

3178 i += 1 

3179 o = cast(TreeObject, o["/Next"]) 

3180 else: 

3181 return None 

3182 

3183 def reset_translation( 

3184 self, reader: Union[None, PdfReader, IndirectObject] = None 

3185 ) -> None: 

3186 """ 

3187 Reset the translation table between reader and the writer object. 

3188 

3189 Late cloning will create new independent objects. 

3190 

3191 Args: 

3192 reader: PdfReader or IndirectObject referencing a PdfReader object. 

3193 if set to None or omitted, all tables will be reset. 

3194 

3195 """ 

3196 if reader is None: 

3197 self._id_translated = {} 

3198 elif isinstance(reader, PdfReader): 

3199 try: 

3200 del self._id_translated[id(reader)] 

3201 except Exception: 

3202 pass 

3203 elif isinstance(reader, IndirectObject): 

3204 try: 

3205 del self._id_translated[id(reader.pdf)] 

3206 except Exception: 

3207 pass 

3208 else: 

3209 raise Exception("invalid parameter {reader}") 

3210 

3211 def set_page_label( 

3212 self, 

3213 page_index_from: int, 

3214 page_index_to: int, 

3215 style: Optional[PageLabelStyle] = None, 

3216 prefix: Optional[str] = None, 

3217 start: Optional[int] = 0, 

3218 ) -> None: 

3219 """ 

3220 Set a page label to a range of pages. 

3221 

3222 Page indexes must be given starting from 0. 

3223 Labels must have a style, a prefix or both. 

3224 If a range is not assigned any page label, a decimal label starting from 1 is applied. 

3225 

3226 Args: 

3227 page_index_from: page index of the beginning of the range starting from 0 

3228 page_index_to: page index of the beginning of the range starting from 0 

3229 style: The numbering style to be used for the numeric portion of each page label: 

3230 

3231 * ``/D`` Decimal Arabic numerals 

3232 * ``/R`` Uppercase Roman numerals 

3233 * ``/r`` Lowercase Roman numerals 

3234 * ``/A`` Uppercase letters (A to Z for the first 26 pages, 

3235 AA to ZZ for the next 26, and so on) 

3236 * ``/a`` Lowercase letters (a to z for the first 26 pages, 

3237 aa to zz for the next 26, and so on) 

3238 

3239 prefix: The label prefix for page labels in this range. 

3240 start: The value of the numeric portion for the first page label 

3241 in the range. 

3242 Subsequent pages are numbered sequentially from this value, 

3243 which must be greater than or equal to 1. 

3244 Default value: 1. 

3245 

3246 """ 

3247 if style is None and prefix is None: 

3248 raise ValueError("At least one of style and prefix must be given") 

3249 if page_index_from < 0: 

3250 raise ValueError("page_index_from must be greater or equal than 0") 

3251 if page_index_to < page_index_from: 

3252 raise ValueError( 

3253 "page_index_to must be greater or equal than page_index_from" 

3254 ) 

3255 if page_index_to >= len(self.pages): 

3256 raise ValueError("page_index_to exceeds number of pages") 

3257 if start is not None and start != 0 and start < 1: 

3258 raise ValueError("If given, start must be greater or equal than one") 

3259 

3260 self._set_page_label(page_index_from, page_index_to, style, prefix, start) 

3261 

3262 def _set_page_label( 

3263 self, 

3264 page_index_from: int, 

3265 page_index_to: int, 

3266 style: Optional[PageLabelStyle] = None, 

3267 prefix: Optional[str] = None, 

3268 start: Optional[int] = 0, 

3269 ) -> None: 

3270 """ 

3271 Set a page label to a range of pages. 

3272 

3273 Page indexes must be given starting from 0. 

3274 Labels must have a style, a prefix or both. 

3275 If a range is not assigned any page label a decimal label starting from 1 is applied. 

3276 

3277 Args: 

3278 page_index_from: page index of the beginning of the range starting from 0 

3279 page_index_to: page index of the beginning of the range starting from 0 

3280 style: The numbering style to be used for the numeric portion of each page label: 

3281 /D Decimal Arabic numerals 

3282 /R Uppercase Roman numerals 

3283 /r Lowercase Roman numerals 

3284 /A Uppercase letters (A to Z for the first 26 pages, 

3285 AA to ZZ for the next 26, and so on) 

3286 /a Lowercase letters (a to z for the first 26 pages, 

3287 aa to zz for the next 26, and so on) 

3288 prefix: The label prefix for page labels in this range. 

3289 start: The value of the numeric portion for the first page label 

3290 in the range. 

3291 Subsequent pages are numbered sequentially from this value, 

3292 which must be greater than or equal to 1. Default value: 1. 

3293 

3294 """ 

3295 default_page_label = DictionaryObject() 

3296 default_page_label[NameObject("/S")] = NameObject("/D") 

3297 

3298 new_page_label = DictionaryObject() 

3299 if style is not None: 

3300 new_page_label[NameObject("/S")] = NameObject(style) 

3301 if prefix is not None: 

3302 new_page_label[NameObject("/P")] = TextStringObject(prefix) 

3303 if start != 0: 

3304 new_page_label[NameObject("/St")] = NumberObject(start) 

3305 

3306 if NameObject(CatalogDictionary.PAGE_LABELS) not in self._root_object: 

3307 nums = ArrayObject() 

3308 nums_insert(NumberObject(0), default_page_label, nums) 

3309 page_labels = TreeObject() 

3310 page_labels[NameObject("/Nums")] = nums 

3311 self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] = page_labels 

3312 

3313 page_labels = cast( 

3314 TreeObject, self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] 

3315 ) 

3316 nums = cast(ArrayObject, page_labels[NameObject("/Nums")]) 

3317 

3318 nums_insert(NumberObject(page_index_from), new_page_label, nums) 

3319 nums_clear_range(NumberObject(page_index_from), page_index_to, nums) 

3320 next_label_pos, *_ = nums_next(NumberObject(page_index_from), nums) 

3321 if next_label_pos != page_index_to + 1 and page_index_to + 1 < len(self.pages): 

3322 nums_insert(NumberObject(page_index_to + 1), default_page_label, nums) 

3323 

3324 page_labels[NameObject("/Nums")] = nums 

3325 self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] = page_labels 

3326 

3327 def _repr_mimebundle_( 

3328 self, 

3329 include: Union[None, Iterable[str]] = None, 

3330 exclude: Union[None, Iterable[str]] = None, 

3331 ) -> dict[str, Any]: 

3332 """ 

3333 Integration into Jupyter Notebooks. 

3334 

3335 This method returns a dictionary that maps a mime-type to its 

3336 representation. 

3337 

3338 .. seealso:: 

3339 

3340 https://ipython.readthedocs.io/en/stable/config/integrating.html 

3341 """ 

3342 pdf_data = BytesIO() 

3343 self.write(pdf_data) 

3344 data = { 

3345 "application/pdf": pdf_data, 

3346 } 

3347 

3348 if include is not None: 

3349 # Filter representations based on include list 

3350 data = {k: v for k, v in data.items() if k in include} 

3351 

3352 if exclude is not None: 

3353 # Remove representations based on exclude list 

3354 data = {k: v for k, v in data.items() if k not in exclude} 

3355 

3356 return data 

3357 

3358 

3359def _pdf_objectify(obj: Union[dict[str, Any], str, float, list[Any]]) -> PdfObject: 

3360 if isinstance(obj, PdfObject): 

3361 return obj 

3362 if isinstance(obj, dict): 

3363 to_add = DictionaryObject() 

3364 for key, value in obj.items(): 

3365 to_add[NameObject(key)] = _pdf_objectify(value) 

3366 return to_add 

3367 if isinstance(obj, str): 

3368 if obj.startswith("/"): 

3369 return NameObject(obj) 

3370 return TextStringObject(obj) 

3371 if isinstance(obj, (float, int)): 

3372 return FloatObject(obj) 

3373 if isinstance(obj, list): 

3374 return ArrayObject(_pdf_objectify(i) for i in obj) 

3375 raise NotImplementedError( 

3376 f"{type(obj)=} could not be cast to a PdfObject" 

3377 ) 

3378 

3379 

3380def _create_outline_item( 

3381 action_ref: Union[None, IndirectObject], 

3382 title: str, 

3383 color: Union[tuple[float, float, float], str, None], 

3384 italic: bool, 

3385 bold: bool, 

3386) -> TreeObject: 

3387 outline_item = TreeObject() 

3388 if action_ref is not None: 

3389 outline_item[NameObject("/A")] = action_ref 

3390 outline_item.update( 

3391 { 

3392 NameObject("/Title"): create_string_object(title), 

3393 } 

3394 ) 

3395 if color: 

3396 if isinstance(color, str): 

3397 color = hex_to_rgb(color) 

3398 outline_item.update( 

3399 {NameObject("/C"): ArrayObject([FloatObject(c) for c in color])} 

3400 ) 

3401 if italic or bold: 

3402 format_flag = 0 

3403 if italic: 

3404 format_flag += OutlineFontFlag.italic 

3405 if bold: 

3406 format_flag += OutlineFontFlag.bold 

3407 outline_item.update({NameObject("/F"): NumberObject(format_flag)}) 

3408 return outline_item 

3409 

3410 

3411def generate_appearance_stream( 

3412 txt: str, 

3413 sel: list[str], 

3414 da: str, 

3415 font_full_rev: dict[str, bytes], 

3416 rct: RectangleObject, 

3417 font_height: float, 

3418 y_offset: float, 

3419) -> bytes: 

3420 ap_stream = f"q\n/Tx BMC \nq\n1 1 {rct.width - 1} {rct.height - 1} re\nW\nBT\n{da}\n".encode() 

3421 for line_number, line in enumerate(txt.replace("\n", "\r").split("\r")): 

3422 if line in sel: 

3423 # may be improved but cannot find how to get fill working => replaced with lined box 

3424 ap_stream += ( 

3425 f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n" 

3426 f"0.5 0.5 0.5 rg s\n{da}\n" 

3427 ).encode() 

3428 if line_number == 0: 

3429 ap_stream += f"2 {y_offset} Td\n".encode() 

3430 else: 

3431 # Td is a relative translation 

3432 ap_stream += f"0 {- font_height * 1.4} Td\n".encode() 

3433 enc_line: list[bytes] = [ 

3434 font_full_rev.get(c, c.encode("utf-16-be")) for c in line 

3435 ] 

3436 if any(len(c) >= 2 for c in enc_line): 

3437 ap_stream += b"<" + (b"".join(enc_line)).hex().encode() + b"> Tj\n" 

3438 else: 

3439 ap_stream += b"(" + b"".join(enc_line) + b") Tj\n" 

3440 ap_stream += b"ET\nQ\nEMC\nQ\n" 

3441 return ap_stream