Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/_writer.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1473 statements  

1# Copyright (c) 2006, Mathieu Fenniak 

2# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com> 

3# 

4# All rights reserved. 

5# 

6# Redistribution and use in source and binary forms, with or without 

7# modification, are permitted provided that the following conditions are 

8# met: 

9# 

10# * Redistributions of source code must retain the above copyright notice, 

11# this list of conditions and the following disclaimer. 

12# * Redistributions in binary form must reproduce the above copyright notice, 

13# this list of conditions and the following disclaimer in the documentation 

14# and/or other materials provided with the distribution. 

15# * The name of the author may not be used to endorse or promote products 

16# derived from this software without specific prior written permission. 

17# 

18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 

19# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 

20# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 

21# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 

22# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 

23# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 

24# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 

25# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 

26# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 

27# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 

28# POSSIBILITY OF SUCH DAMAGE. 

29 

30import decimal 

31import enum 

32import hashlib 

33import re 

34import struct 

35import uuid 

36from collections.abc import Iterable 

37from io import BytesIO, FileIO, IOBase 

38from itertools import compress 

39from pathlib import Path 

40from re import Pattern 

41from types import TracebackType 

42from typing import ( 

43 IO, 

44 Any, 

45 Callable, 

46 Optional, 

47 Union, 

48 cast, 

49) 

50 

51from ._cmap import _default_fonts_space_width, build_char_map_from_dict 

52from ._doc_common import DocumentInformation, PdfDocCommon 

53from ._encryption import EncryptAlgorithm, Encryption 

54from ._page import PageObject, Transformation 

55from ._page_labels import nums_clear_range, nums_insert, nums_next 

56from ._reader import PdfReader 

57from ._utils import ( 

58 StrByteType, 

59 StreamType, 

60 _get_max_pdf_version_header, 

61 deprecation_no_replacement, 

62 logger_warning, 

63) 

64from .constants import AnnotationDictionaryAttributes as AA 

65from .constants import CatalogAttributes as CA 

66from .constants import ( 

67 CatalogDictionary, 

68 GoToActionArguments, 

69 ImageType, 

70 InteractiveFormDictEntries, 

71 OutlineFontFlag, 

72 PageLabelStyle, 

73 PagesAttributes, 

74 TypFitArguments, 

75 UserAccessPermissions, 

76) 

77from .constants import Core as CO 

78from .constants import FieldDictionaryAttributes as FA 

79from .constants import PageAttributes as PG 

80from .constants import TrailerKeys as TK 

81from .errors import PyPdfError 

82from .generic import ( 

83 PAGE_FIT, 

84 ArrayObject, 

85 BooleanObject, 

86 ByteStringObject, 

87 ContentStream, 

88 DecodedStreamObject, 

89 Destination, 

90 DictionaryObject, 

91 EmbeddedFile, 

92 Fit, 

93 FloatObject, 

94 IndirectObject, 

95 NameObject, 

96 NullObject, 

97 NumberObject, 

98 PdfObject, 

99 RectangleObject, 

100 ReferenceLink, 

101 StreamObject, 

102 TextStringObject, 

103 TreeObject, 

104 ViewerPreferences, 

105 create_string_object, 

106 extract_links, 

107 hex_to_rgb, 

108 is_null_or_none, 

109) 

110from .pagerange import PageRange, PageRangeSpec 

111from .types import ( 

112 AnnotationSubtype, 

113 BorderArrayType, 

114 LayoutType, 

115 OutlineItemType, 

116 OutlineType, 

117 PagemodeType, 

118) 

119from .xmp import XmpInformation 

120 

121ALL_DOCUMENT_PERMISSIONS = UserAccessPermissions.all() 

122DEFAULT_FONT_HEIGHT_IN_MULTILINE = 12 

123 

124 

125class ObjectDeletionFlag(enum.IntFlag): 

126 NONE = 0 

127 TEXT = enum.auto() 

128 LINKS = enum.auto() 

129 ATTACHMENTS = enum.auto() 

130 OBJECTS_3D = enum.auto() 

131 ALL_ANNOTATIONS = enum.auto() 

132 XOBJECT_IMAGES = enum.auto() 

133 INLINE_IMAGES = enum.auto() 

134 DRAWING_IMAGES = enum.auto() 

135 IMAGES = XOBJECT_IMAGES | INLINE_IMAGES | DRAWING_IMAGES 

136 

137 

138def _rolling_checksum(stream: BytesIO, blocksize: int = 65536) -> str: 

139 hash = hashlib.md5(usedforsecurity=False) 

140 for block in iter(lambda: stream.read(blocksize), b""): 

141 hash.update(block) 

142 return hash.hexdigest() 

143 

144 

145class PdfWriter(PdfDocCommon): 

146 """ 

147 Write a PDF file out, given pages produced by another class or through 

148 cloning a PDF file during initialization. 

149 

150 Typically data is added from a :class:`PdfReader<pypdf.PdfReader>`. 

151 

152 Args: 

153 clone_from: identical to fileobj (for compatibility) 

154 

155 incremental: If true, loads the document and set the PdfWriter in incremental mode. 

156 

157 When writing incrementally, the original document is written first and new/modified 

158 content is appended. To be used for signed document/forms to keep signature valid. 

159 

160 full: If true, loads all the objects (always full if incremental = True). 

161 This parameter may allow loading large PDFs. 

162 

163 """ 

164 

165 def __init__( 

166 self, 

167 fileobj: Union[None, PdfReader, StrByteType, Path] = "", 

168 clone_from: Union[None, PdfReader, StrByteType, Path] = None, 

169 incremental: bool = False, 

170 full: bool = False, 

171 ) -> None: 

172 self.incremental = incremental or full 

173 """ 

174 Returns if the PdfWriter object has been started in incremental mode. 

175 """ 

176 

177 self._objects: list[Optional[PdfObject]] = [] 

178 """ 

179 The indirect objects in the PDF. 

180 For the incremental case, it will be filled with None 

181 in clone_reader_document_root. 

182 """ 

183 

184 self._original_hash: list[int] = [] 

185 """ 

186 List of hashes after import; used to identify changes. 

187 """ 

188 

189 self._idnum_hash: dict[bytes, tuple[IndirectObject, list[IndirectObject]]] = {} 

190 """ 

191 Maps hash values of indirect objects to the list of IndirectObjects. 

192 This is used for compression. 

193 """ 

194 

195 self._id_translated: dict[int, dict[int, int]] = {} 

196 """List of already translated IDs. 

197 dict[id(pdf)][(idnum, generation)] 

198 """ 

199 

200 self._info_obj: Optional[PdfObject] 

201 """The PDF files's document information dictionary, 

202 the Info entry in the PDF file's trailer dictionary.""" 

203 

204 self._ID: Union[ArrayObject, None] = None 

205 """The PDF file identifier, 

206 defined by the ID in the PDF file's trailer dictionary.""" 

207 

208 self._unresolved_links: list[tuple[ReferenceLink, ReferenceLink]] = [] 

209 "Tracks links in pages added to the writer for resolving later." 

210 self._merged_in_pages: dict[Optional[IndirectObject], Optional[IndirectObject]] = {} 

211 "Tracks pages added to the writer and what page they turned into." 

212 

213 if self.incremental: 

214 if isinstance(fileobj, (str, Path)): 

215 with open(fileobj, "rb") as f: 

216 fileobj = BytesIO(f.read(-1)) 

217 if isinstance(fileobj, BytesIO): 

218 fileobj = PdfReader(fileobj) 

219 if not isinstance(fileobj, PdfReader): 

220 raise PyPdfError("Invalid type for incremental mode") 

221 self._reader = fileobj # prev content is in _reader.stream 

222 self._header = fileobj.pdf_header.encode() 

223 self._readonly = True # TODO: to be analysed 

224 else: 

225 self._header = b"%PDF-1.3" 

226 self._info_obj = self._add_object( 

227 DictionaryObject( 

228 {NameObject("/Producer"): create_string_object("pypdf")} 

229 ) 

230 ) 

231 

232 def _get_clone_from( 

233 fileobj: Union[None, PdfReader, str, Path, IO[Any], BytesIO], 

234 clone_from: Union[None, PdfReader, str, Path, IO[Any], BytesIO], 

235 ) -> Union[None, PdfReader, str, Path, IO[Any], BytesIO]: 

236 if isinstance(fileobj, (str, Path, IO, BytesIO)) and ( 

237 fileobj == "" or clone_from is not None 

238 ): 

239 return clone_from 

240 cloning = True 

241 if isinstance(fileobj, (str, Path)) and ( 

242 not Path(str(fileobj)).exists() 

243 or Path(str(fileobj)).stat().st_size == 0 

244 ): 

245 cloning = False 

246 if isinstance(fileobj, (IOBase, BytesIO)): 

247 t = fileobj.tell() 

248 if fileobj.seek(0, 2) == 0: 

249 cloning = False 

250 fileobj.seek(t, 0) 

251 if cloning: 

252 clone_from = fileobj 

253 return clone_from 

254 

255 clone_from = _get_clone_from(fileobj, clone_from) 

256 # To prevent overwriting 

257 self.temp_fileobj = fileobj 

258 self.fileobj = "" 

259 self._with_as_usage = False 

260 self._cloned = False 

261 # The root of our page tree node 

262 pages = DictionaryObject( 

263 { 

264 NameObject(PagesAttributes.TYPE): NameObject("/Pages"), 

265 NameObject(PagesAttributes.COUNT): NumberObject(0), 

266 NameObject(PagesAttributes.KIDS): ArrayObject(), 

267 } 

268 ) 

269 self.flattened_pages = [] 

270 self._encryption: Optional[Encryption] = None 

271 self._encrypt_entry: Optional[DictionaryObject] = None 

272 

273 if clone_from is not None: 

274 if not isinstance(clone_from, PdfReader): 

275 clone_from = PdfReader(clone_from) 

276 self.clone_document_from_reader(clone_from) 

277 self._cloned = True 

278 else: 

279 self._pages = self._add_object(pages) 

280 self._root_object = DictionaryObject( 

281 { 

282 NameObject(PagesAttributes.TYPE): NameObject(CO.CATALOG), 

283 NameObject(CO.PAGES): self._pages, 

284 } 

285 ) 

286 self._add_object(self._root_object) 

287 if full and not incremental: 

288 self.incremental = False 

289 if isinstance(self._ID, list): 

290 if isinstance(self._ID[0], TextStringObject): 

291 self._ID[0] = ByteStringObject(self._ID[0].get_original_bytes()) 

292 if isinstance(self._ID[1], TextStringObject): 

293 self._ID[1] = ByteStringObject(self._ID[1].get_original_bytes()) 

294 

295 # for commonality 

296 @property 

297 def is_encrypted(self) -> bool: 

298 """ 

299 Read-only boolean property showing whether this PDF file is encrypted. 

300 

301 Note that this property, if true, will remain true even after the 

302 :meth:`decrypt()<pypdf.PdfReader.decrypt>` method is called. 

303 """ 

304 return False 

305 

306 @property 

307 def root_object(self) -> DictionaryObject: 

308 """ 

309 Provide direct access to PDF Structure. 

310 

311 Note: 

312 Recommended only for read access. 

313 

314 """ 

315 return self._root_object 

316 

317 @property 

318 def _info(self) -> Optional[DictionaryObject]: 

319 """ 

320 Provide access to "/Info". Standardized with PdfReader. 

321 

322 Returns: 

323 /Info Dictionary; None if the entry does not exist 

324 

325 """ 

326 return ( 

327 None 

328 if self._info_obj is None 

329 else cast(DictionaryObject, self._info_obj.get_object()) 

330 ) 

331 

332 @_info.setter 

333 def _info(self, value: Optional[Union[IndirectObject, DictionaryObject]]) -> None: 

334 if value is None: 

335 try: 

336 self._objects[self._info_obj.indirect_reference.idnum - 1] = None # type: ignore 

337 except (KeyError, AttributeError): 

338 pass 

339 self._info_obj = None 

340 else: 

341 if self._info_obj is None: 

342 self._info_obj = self._add_object(DictionaryObject()) 

343 obj = cast(DictionaryObject, self._info_obj.get_object()) 

344 obj.clear() 

345 obj.update(cast(DictionaryObject, value.get_object())) 

346 

347 @property 

348 def xmp_metadata(self) -> Optional[XmpInformation]: 

349 """XMP (Extensible Metadata Platform) data.""" 

350 return cast(XmpInformation, self.root_object.xmp_metadata) 

351 

352 @xmp_metadata.setter 

353 def xmp_metadata(self, value: Optional[XmpInformation]) -> None: 

354 """XMP (Extensible Metadata Platform) data.""" 

355 if value is None: 

356 if "/Metadata" in self.root_object: 

357 del self.root_object["/Metadata"] 

358 return 

359 

360 metadata = self.root_object.get("/Metadata", None) 

361 if not isinstance(metadata, IndirectObject): 

362 if metadata is not None: 

363 del self.root_object["/Metadata"] 

364 metadata_stream = StreamObject() 

365 stream_reference = self._add_object(metadata_stream) 

366 self.root_object[NameObject("/Metadata")] = stream_reference 

367 else: 

368 metadata_stream = cast(StreamObject, metadata.get_object()) 

369 

370 if isinstance(value, XmpInformation): 

371 bytes_data = value.stream.get_data() 

372 else: 

373 bytes_data = value 

374 metadata_stream.set_data(bytes_data) 

375 

376 @property 

377 def with_as_usage(self) -> bool: 

378 deprecation_no_replacement("with_as_usage", "5.0") 

379 return self._with_as_usage 

380 

381 @with_as_usage.setter 

382 def with_as_usage(self, value: bool) -> None: 

383 deprecation_no_replacement("with_as_usage", "5.0") 

384 self._with_as_usage = value 

385 

386 def __enter__(self) -> "PdfWriter": 

387 """Store how writer is initialized by 'with'.""" 

388 c: bool = self._cloned 

389 t = self.temp_fileobj 

390 self.__init__() # type: ignore 

391 self._cloned = c 

392 self._with_as_usage = True 

393 self.fileobj = t # type: ignore 

394 return self 

395 

396 def __exit__( 

397 self, 

398 exc_type: Optional[type[BaseException]], 

399 exc: Optional[BaseException], 

400 traceback: Optional[TracebackType], 

401 ) -> None: 

402 """Write data to the fileobj.""" 

403 if self.fileobj and not self._cloned: 

404 self.write(self.fileobj) 

405 

406 @property 

407 def pdf_header(self) -> str: 

408 """ 

409 Read/Write property of the PDF header that is written. 

410 

411 This should be something like ``'%PDF-1.5'``. It is recommended to set 

412 the lowest version that supports all features which are used within the 

413 PDF file. 

414 

415 Note: `pdf_header` returns a string but accepts bytes or str for writing 

416 """ 

417 return self._header.decode() 

418 

419 @pdf_header.setter 

420 def pdf_header(self, new_header: Union[str, bytes]) -> None: 

421 if isinstance(new_header, str): 

422 new_header = new_header.encode() 

423 self._header = new_header 

424 

425 def _add_object(self, obj: PdfObject) -> IndirectObject: 

426 if ( 

427 getattr(obj, "indirect_reference", None) is not None 

428 and obj.indirect_reference.pdf == self # type: ignore 

429 ): 

430 return obj.indirect_reference # type: ignore 

431 # check for /Contents in Pages (/Contents in annotations are strings) 

432 if isinstance(obj, DictionaryObject) and isinstance( 

433 obj.get(PG.CONTENTS, None), (ArrayObject, DictionaryObject) 

434 ): 

435 obj[NameObject(PG.CONTENTS)] = self._add_object(obj[PG.CONTENTS]) 

436 self._objects.append(obj) 

437 obj.indirect_reference = IndirectObject(len(self._objects), 0, self) 

438 return obj.indirect_reference 

439 

440 def get_object( 

441 self, 

442 indirect_reference: Union[int, IndirectObject], 

443 ) -> PdfObject: 

444 if isinstance(indirect_reference, int): 

445 obj = self._objects[indirect_reference - 1] 

446 elif indirect_reference.pdf != self: 

447 raise ValueError("PDF must be self") 

448 else: 

449 obj = self._objects[indirect_reference.idnum - 1] 

450 assert obj is not None, "mypy" 

451 return obj 

452 

453 def _replace_object( 

454 self, 

455 indirect_reference: Union[int, IndirectObject], 

456 obj: PdfObject, 

457 ) -> PdfObject: 

458 if isinstance(indirect_reference, IndirectObject): 

459 if indirect_reference.pdf != self: 

460 raise ValueError("PDF must be self") 

461 indirect_reference = indirect_reference.idnum 

462 gen = self._objects[indirect_reference - 1].indirect_reference.generation # type: ignore 

463 if ( 

464 getattr(obj, "indirect_reference", None) is not None 

465 and obj.indirect_reference.pdf != self # type: ignore 

466 ): 

467 obj = obj.clone(self) 

468 self._objects[indirect_reference - 1] = obj 

469 obj.indirect_reference = IndirectObject(indirect_reference, gen, self) 

470 

471 assert isinstance(obj, PdfObject), "mypy" 

472 return obj 

473 

474 def _add_page( 

475 self, 

476 page: PageObject, 

477 index: int, 

478 excluded_keys: Iterable[str] = (), 

479 ) -> PageObject: 

480 if not isinstance(page, PageObject) or page.get(PagesAttributes.TYPE, None) != CO.PAGE: 

481 raise ValueError("Invalid page object") 

482 assert self.flattened_pages is not None, "for mypy" 

483 page_org = page 

484 excluded_keys = list(excluded_keys) 

485 excluded_keys += [PagesAttributes.PARENT, "/StructParents"] 

486 # Acrobat does not accept two indirect references pointing on the same 

487 # page; therefore in order to add multiple copies of the same 

488 # page, we need to create a new dictionary for the page, however the 

489 # objects below (including content) are not duplicated: 

490 try: # delete an already existing page 

491 del self._id_translated[id(page_org.indirect_reference.pdf)][ # type: ignore 

492 page_org.indirect_reference.idnum # type: ignore 

493 ] 

494 except Exception: 

495 pass 

496 

497 page = cast( 

498 "PageObject", page_org.clone(self, False, excluded_keys).get_object() 

499 ) 

500 if page_org.pdf is not None: 

501 other = page_org.pdf.pdf_header 

502 self.pdf_header = _get_max_pdf_version_header(self.pdf_header, other) 

503 

504 node, idx = self._get_page_in_node(index) 

505 page[NameObject(PagesAttributes.PARENT)] = node.indirect_reference 

506 

507 if idx >= 0: 

508 cast(ArrayObject, node[PagesAttributes.KIDS]).insert(idx, page.indirect_reference) 

509 self.flattened_pages.insert(index, page) 

510 else: 

511 cast(ArrayObject, node[PagesAttributes.KIDS]).append(page.indirect_reference) 

512 self.flattened_pages.append(page) 

513 recurse = 0 

514 while not is_null_or_none(node): 

515 node = cast(DictionaryObject, node.get_object()) 

516 node[NameObject(PagesAttributes.COUNT)] = NumberObject(cast(int, node[PagesAttributes.COUNT]) + 1) 

517 node = node.get(PagesAttributes.PARENT, None) # type: ignore[assignment] # TODO: Fix. 

518 recurse += 1 

519 if recurse > 1000: 

520 raise PyPdfError("Too many recursive calls!") 

521 

522 if page_org.pdf is not None: 

523 # the page may contain links to other pages, and those other 

524 # pages may or may not already be added. we store the 

525 # information we need, so that we can resolve the references 

526 # later. 

527 self._unresolved_links.extend(extract_links(page, page_org)) 

528 self._merged_in_pages[page_org.indirect_reference] = page.indirect_reference 

529 

530 return page 

531 

532 def set_need_appearances_writer(self, state: bool = True) -> None: 

533 """ 

534 Sets the "NeedAppearances" flag in the PDF writer. 

535 

536 The "NeedAppearances" flag indicates whether the appearance dictionary 

537 for form fields should be automatically generated by the PDF viewer or 

538 if the embedded appearance should be used. 

539 

540 Args: 

541 state: The actual value of the NeedAppearances flag. 

542 

543 Returns: 

544 None 

545 

546 """ 

547 # See §12.7.2 and §7.7.2 for more information: 

548 # https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf 

549 try: 

550 # get the AcroForm tree 

551 if CatalogDictionary.ACRO_FORM not in self._root_object: 

552 self._root_object[ 

553 NameObject(CatalogDictionary.ACRO_FORM) 

554 ] = self._add_object(DictionaryObject()) 

555 

556 need_appearances = NameObject(InteractiveFormDictEntries.NeedAppearances) 

557 cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM])[ 

558 need_appearances 

559 ] = BooleanObject(state) 

560 except Exception as exc: # pragma: no cover 

561 logger_warning( 

562 f"set_need_appearances_writer({state}) catch : {exc}", __name__ 

563 ) 

564 

565 def create_viewer_preferences(self) -> ViewerPreferences: 

566 o = ViewerPreferences() 

567 self._root_object[ 

568 NameObject(CatalogDictionary.VIEWER_PREFERENCES) 

569 ] = self._add_object(o) 

570 return o 

571 

572 def add_page( 

573 self, 

574 page: PageObject, 

575 excluded_keys: Iterable[str] = (), 

576 ) -> PageObject: 

577 """ 

578 Add a page to this PDF file. 

579 

580 Recommended for advanced usage including the adequate excluded_keys. 

581 

582 The page is usually acquired from a :class:`PdfReader<pypdf.PdfReader>` 

583 instance. 

584 

585 Args: 

586 page: The page to add to the document. Should be 

587 an instance of :class:`PageObject<pypdf._page.PageObject>` 

588 excluded_keys: 

589 

590 Returns: 

591 The added PageObject. 

592 

593 """ 

594 assert self.flattened_pages is not None, "mypy" 

595 return self._add_page(page, len(self.flattened_pages), excluded_keys) 

596 

597 def insert_page( 

598 self, 

599 page: PageObject, 

600 index: int = 0, 

601 excluded_keys: Iterable[str] = (), 

602 ) -> PageObject: 

603 """ 

604 Insert a page in this PDF file. The page is usually acquired from a 

605 :class:`PdfReader<pypdf.PdfReader>` instance. 

606 

607 Args: 

608 page: The page to add to the document. 

609 index: Position at which the page will be inserted. 

610 excluded_keys: 

611 

612 Returns: 

613 The added PageObject. 

614 

615 """ 

616 assert self.flattened_pages is not None, "mypy" 

617 if index < 0: 

618 index = len(self.flattened_pages) + index 

619 if index < 0: 

620 raise ValueError("Invalid index value") 

621 if index >= len(self.flattened_pages): 

622 return self.add_page(page, excluded_keys) 

623 return self._add_page(page, index, excluded_keys) 

624 

625 def _get_page_number_by_indirect( 

626 self, indirect_reference: Union[None, int, NullObject, IndirectObject] 

627 ) -> Optional[int]: 

628 """ 

629 Generate _page_id2num. 

630 

631 Args: 

632 indirect_reference: 

633 

634 Returns: 

635 The page number or None 

636 

637 """ 

638 # To provide same function as in PdfReader 

639 if is_null_or_none(indirect_reference): 

640 return None 

641 assert indirect_reference is not None, "mypy" 

642 if isinstance(indirect_reference, int): 

643 indirect_reference = IndirectObject(indirect_reference, 0, self) 

644 obj = indirect_reference.get_object() 

645 if isinstance(obj, PageObject): 

646 return obj.page_number 

647 return None 

648 

649 def add_blank_page( 

650 self, width: Optional[float] = None, height: Optional[float] = None 

651 ) -> PageObject: 

652 """ 

653 Append a blank page to this PDF file and return it. 

654 

655 If no page size is specified, use the size of the last page. 

656 

657 Args: 

658 width: The width of the new page expressed in default user 

659 space units. 

660 height: The height of the new page expressed in default 

661 user space units. 

662 

663 Returns: 

664 The newly appended page. 

665 

666 Raises: 

667 PageSizeNotDefinedError: if width and height are not defined 

668 and previous page does not exist. 

669 

670 """ 

671 page = PageObject.create_blank_page(self, width, height) 

672 return self.add_page(page) 

673 

674 def insert_blank_page( 

675 self, 

676 width: Optional[Union[float, decimal.Decimal]] = None, 

677 height: Optional[Union[float, decimal.Decimal]] = None, 

678 index: int = 0, 

679 ) -> PageObject: 

680 """ 

681 Insert a blank page to this PDF file and return it. 

682 

683 If no page size is specified, use the size of the last page. 

684 

685 Args: 

686 width: The width of the new page expressed in default user 

687 space units. 

688 height: The height of the new page expressed in default 

689 user space units. 

690 index: Position to add the page. 

691 

692 Returns: 

693 The newly inserted page. 

694 

695 Raises: 

696 PageSizeNotDefinedError: if width and height are not defined 

697 and previous page does not exist. 

698 

699 """ 

700 if width is None or (height is None and index < self.get_num_pages()): 

701 oldpage = self.pages[index] 

702 width = oldpage.mediabox.width 

703 height = oldpage.mediabox.height 

704 page = PageObject.create_blank_page(self, width, height) 

705 self.insert_page(page, index) 

706 return page 

707 

708 @property 

709 def open_destination( 

710 self, 

711 ) -> Union[None, Destination, TextStringObject, ByteStringObject]: 

712 return super().open_destination 

713 

714 @open_destination.setter 

715 def open_destination(self, dest: Union[None, str, Destination, PageObject]) -> None: 

716 if dest is None: 

717 try: 

718 del self._root_object["/OpenAction"] 

719 except KeyError: 

720 pass 

721 elif isinstance(dest, str): 

722 self._root_object[NameObject("/OpenAction")] = TextStringObject(dest) 

723 elif isinstance(dest, Destination): 

724 self._root_object[NameObject("/OpenAction")] = dest.dest_array 

725 elif isinstance(dest, PageObject): 

726 self._root_object[NameObject("/OpenAction")] = Destination( 

727 "Opening", 

728 dest.indirect_reference 

729 if dest.indirect_reference is not None 

730 else NullObject(), 

731 PAGE_FIT, 

732 ).dest_array 

733 

734 def add_js(self, javascript: str) -> None: 

735 """ 

736 Add JavaScript which will launch upon opening this PDF. 

737 

738 Args: 

739 javascript: Your JavaScript. 

740 

741 >>> output.add_js("this.print({bUI:true,bSilent:false,bShrinkToFit:true});") 

742 # Example: This will launch the print window when the PDF is opened. 

743 

744 """ 

745 # Names / JavaScript preferred to be able to add multiple scripts 

746 if "/Names" not in self._root_object: 

747 self._root_object[NameObject(CA.NAMES)] = DictionaryObject() 

748 names = cast(DictionaryObject, self._root_object[CA.NAMES]) 

749 if "/JavaScript" not in names: 

750 names[NameObject("/JavaScript")] = DictionaryObject( 

751 {NameObject("/Names"): ArrayObject()} 

752 ) 

753 js_list = cast( 

754 ArrayObject, cast(DictionaryObject, names["/JavaScript"])["/Names"] 

755 ) 

756 # We need a name for parameterized JavaScript in the PDF file, 

757 # but it can be anything. 

758 js_list.append(create_string_object(str(uuid.uuid4()))) 

759 

760 js = DictionaryObject( 

761 { 

762 NameObject(PagesAttributes.TYPE): NameObject("/Action"), 

763 NameObject("/S"): NameObject("/JavaScript"), 

764 NameObject("/JS"): TextStringObject(f"{javascript}"), 

765 } 

766 ) 

767 js_list.append(self._add_object(js)) 

768 

769 def add_attachment(self, filename: str, data: Union[str, bytes]) -> "EmbeddedFile": 

770 """ 

771 Embed a file inside the PDF. 

772 

773 Reference: 

774 https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf 

775 Section 7.11.3 

776 

777 Args: 

778 filename: The filename to display. 

779 data: The data in the file. 

780 

781 Returns: 

782 EmbeddedFile instance for the newly created embedded file. 

783 

784 """ 

785 return EmbeddedFile._create_new(self, filename, data) 

786 

787 def append_pages_from_reader( 

788 self, 

789 reader: PdfReader, 

790 after_page_append: Optional[Callable[[PageObject], None]] = None, 

791 ) -> None: 

792 """ 

793 Copy pages from reader to writer. Includes an optional callback 

794 parameter which is invoked after pages are appended to the writer. 

795 

796 ``append`` should be preferred. 

797 

798 Args: 

799 reader: a PdfReader object from which to copy page 

800 annotations to this writer object. The writer's annots 

801 will then be updated. 

802 after_page_append: 

803 Callback function that is invoked after each page is appended to 

804 the writer. Signature includes a reference to the appended page 

805 (delegates to append_pages_from_reader). The single parameter of 

806 the callback is a reference to the page just appended to the 

807 document. 

808 

809 """ 

810 reader_num_pages = len(reader.pages) 

811 # Copy pages from reader to writer 

812 for reader_page_number in range(reader_num_pages): 

813 reader_page = reader.pages[reader_page_number] 

814 writer_page = self.add_page(reader_page) 

815 # Trigger callback, pass writer page as parameter 

816 if callable(after_page_append): 

817 after_page_append(writer_page) 

818 

819 def _merge_content_stream_to_page( 

820 self, 

821 page: PageObject, 

822 new_content_data: bytes, 

823 ) -> None: 

824 """ 

825 Combines existing content stream(s) with new content (as bytes), 

826 and returns a new single StreamObject. 

827 

828 Args: 

829 page: The page to which the new content data will be added. 

830 new_content_data: A binary-encoded new content stream, for 

831 instance the commands to draw an XObject. 

832 """ 

833 # First resolve the existing page content. This always is an IndirectObject: 

834 # PDF Explained by John Whitington 

835 # https://www.oreilly.com/library/view/pdf-explained/9781449321581/ch04.html 

836 if NameObject("/Contents") in page: 

837 existing_content_ref = page[NameObject("/Contents")] 

838 existing_content = existing_content_ref.get_object() 

839 

840 if isinstance(existing_content, ArrayObject): 

841 # Create a new StreamObject for the new_content_data 

842 new_stream_obj = StreamObject() 

843 new_stream_obj.set_data(new_content_data) 

844 existing_content.append(self._add_object(new_stream_obj)) 

845 page[NameObject("/Contents")] = self._add_object(existing_content) 

846 if isinstance(existing_content, StreamObject): 

847 # Merge new content to existing StreamObject 

848 merged_data = existing_content.get_data() + b"\n" + new_content_data 

849 new_stream = StreamObject() 

850 new_stream.set_data(merged_data) 

851 page[NameObject("/Contents")] = self._add_object(new_stream) 

852 else: 

853 # If no existing content, then we have an empty page. 

854 # Create a new StreamObject in a new /Contents entry. 

855 new_stream = StreamObject() 

856 new_stream.set_data(new_content_data) 

857 page[NameObject("/Contents")] = self._add_object(new_stream) 

858 

859 def _add_apstream_object( 

860 self, 

861 page: PageObject, 

862 appearance_stream_obj: StreamObject, 

863 object_name: str, 

864 x_offset: float, 

865 y_offset: float, 

866 font_res: Optional[DictionaryObject] = None 

867 ) -> None: 

868 """ 

869 Adds an appearance stream to the page content in the form of 

870 an XObject. 

871 

872 Args: 

873 page: The page to which to add the appearance stream. 

874 appearance_stream_obj: The appearance stream. 

875 object_name: The name of the appearance stream. 

876 x_offset: The horizontal offset for the appearance stream. 

877 y_offset: The vertical offset for the appearance stream. 

878 font_res: The appearance stream's font resource (if given). 

879 """ 

880 # Prepare XObject resource dictionary on the page 

881 pg_res = cast(DictionaryObject, page[PG.RESOURCES]) 

882 if font_res is not None: 

883 font_name = font_res["/BaseFont"] # [/"Name"] often also exists, but is deprecated 

884 if "/Font" not in pg_res: 

885 pg_res[NameObject("/Font")] = DictionaryObject() 

886 pg_ft_res = cast(DictionaryObject, pg_res[NameObject("/Font")]) 

887 if font_name not in pg_ft_res: 

888 pg_ft_res[NameObject(font_name)] = font_res 

889 # Always add the resolved stream object to the writer to get a new IndirectObject. 

890 # This ensures we have a valid IndirectObject managed by *this* writer. 

891 xobject_ref = self._add_object(appearance_stream_obj) 

892 xobject_name = NameObject(f"/Fm_{object_name}")._sanitize() 

893 if "/XObject" not in pg_res: 

894 pg_res[NameObject("/XObject")] = DictionaryObject() 

895 pg_xo_res = cast(DictionaryObject, pg_res["/XObject"]) 

896 if xobject_name not in pg_xo_res: 

897 pg_xo_res[xobject_name] = xobject_ref 

898 else: 

899 logger_warning( 

900 f"XObject {xobject_name!r} already added to page resources. This might be an issue.", 

901 __name__ 

902 ) 

903 xobject_cm = Transformation().translate(x_offset, y_offset) 

904 xobject_drawing_commands = f"q\n{xobject_cm._to_cm()}\n{xobject_name} Do\nQ".encode() 

905 self._merge_content_stream_to_page(page, xobject_drawing_commands) 

906 

907 def _update_field_annotation( 

908 self, 

909 page: PageObject, 

910 field: DictionaryObject, 

911 annotation: DictionaryObject, 

912 font_name: str = "", 

913 font_size: float = -1, 

914 flatten: bool = False, 

915 ) -> None: 

916 # Calculate rectangle dimensions 

917 _rct = cast(RectangleObject, annotation[AA.Rect]) 

918 rct = RectangleObject((0, 0, abs(_rct[2] - _rct[0]), abs(_rct[3] - _rct[1]))) 

919 

920 # Extract font information 

921 da = annotation.get_inherited( 

922 AA.DA, 

923 cast(DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]).get( 

924 AA.DA, None 

925 ), 

926 ) 

927 if da is None: 

928 da = TextStringObject("/Helv 0 Tf 0 g") 

929 else: 

930 da = da.get_object() 

931 font_properties = da.replace("\n", " ").replace("\r", " ").split(" ") 

932 font_properties = [x for x in font_properties if x != ""] 

933 if font_name: 

934 font_properties[font_properties.index("Tf") - 2] = font_name 

935 else: 

936 font_name = font_properties[font_properties.index("Tf") - 2] 

937 font_height = ( 

938 font_size 

939 if font_size >= 0 

940 else float(font_properties[font_properties.index("Tf") - 1]) 

941 ) 

942 if font_height == 0: 

943 if field.get(FA.Ff, 0) & FA.FfBits.Multiline: 

944 font_height = DEFAULT_FONT_HEIGHT_IN_MULTILINE 

945 else: 

946 font_height = rct.height - 2 

947 font_properties[font_properties.index("Tf") - 1] = str(font_height) 

948 da = " ".join(font_properties) 

949 y_offset = rct.height - 1 - font_height 

950 

951 # Retrieve font information from local DR ... 

952 dr: Any = cast( 

953 DictionaryObject, 

954 cast( 

955 DictionaryObject, 

956 annotation.get_inherited( 

957 "/DR", 

958 cast( 

959 DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM] 

960 ).get("/DR", DictionaryObject()), 

961 ), 

962 ).get_object(), 

963 ) 

964 dr = dr.get("/Font", DictionaryObject()).get_object() 

965 # _default_fonts_space_width keys is the list of Standard fonts 

966 if font_name not in dr and font_name not in _default_fonts_space_width: 

967 # ...or AcroForm dictionary 

968 dr = cast( 

969 dict[Any, Any], 

970 cast( 

971 DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM] 

972 ).get("/DR", {}), 

973 ) 

974 dr = dr.get_object().get("/Font", DictionaryObject()).get_object() 

975 font_res = dr.get(font_name, None) 

976 if not is_null_or_none(font_res): 

977 font_res = cast(DictionaryObject, font_res.get_object()) 

978 font_subtype, _, font_encoding, font_map = build_char_map_from_dict( 

979 200, font_res 

980 ) 

981 try: # remove width stored in -1 key 

982 del font_map[-1] 

983 except KeyError: 

984 pass 

985 font_full_rev: dict[str, bytes] 

986 if isinstance(font_encoding, str): 

987 font_full_rev = { 

988 v: k.encode(font_encoding) for k, v in font_map.items() 

989 } 

990 else: 

991 font_full_rev = {v: bytes((k,)) for k, v in font_encoding.items()} 

992 font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()} 

993 for key, value in font_map.items(): 

994 font_full_rev[value] = font_encoding_rev.get(key, key) 

995 else: 

996 logger_warning(f"Font dictionary for {font_name} not found.", __name__) 

997 font_full_rev = {} 

998 

999 # Retrieve field text and selected values 

1000 field_flags = field.get(FA.Ff, 0) 

1001 if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0: 

1002 txt = "\n".join(annotation.get_inherited(FA.Opt, [])) 

1003 sel = field.get("/V", []) 

1004 if not isinstance(sel, list): 

1005 sel = [sel] 

1006 else: # /Tx 

1007 txt = field.get("/V", "") 

1008 sel = [] 

1009 # Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings) 

1010 txt = txt.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)") 

1011 # Generate appearance stream 

1012 ap_stream = generate_appearance_stream( 

1013 txt, sel, da, font_full_rev, rct, font_height, y_offset 

1014 ) 

1015 

1016 # Create appearance dictionary 

1017 dct = DecodedStreamObject.initialize_from_dictionary( 

1018 { 

1019 NameObject("/Type"): NameObject("/XObject"), 

1020 NameObject("/Subtype"): NameObject("/Form"), 

1021 NameObject("/BBox"): rct, 

1022 "__streamdata__": ByteStringObject(ap_stream), 

1023 "/Length": 0, 

1024 } 

1025 ) 

1026 if AA.AP in annotation: 

1027 for k, v in cast(DictionaryObject, annotation[AA.AP]).get("/N", {}).items(): 

1028 if k not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}: 

1029 dct[k] = v 

1030 

1031 # Update Resources with font information if necessary 

1032 if font_res is not None: 

1033 dct[NameObject("/Resources")] = DictionaryObject( 

1034 { 

1035 NameObject("/Font"): DictionaryObject( 

1036 { 

1037 NameObject(font_name): getattr( 

1038 font_res, "indirect_reference", font_res 

1039 ) 

1040 } 

1041 ) 

1042 } 

1043 ) 

1044 if AA.AP not in annotation: 

1045 annotation[NameObject(AA.AP)] = DictionaryObject( 

1046 {NameObject("/N"): self._add_object(dct)} 

1047 ) 

1048 elif "/N" not in cast(DictionaryObject, annotation[AA.AP]): 

1049 cast(DictionaryObject, annotation[NameObject(AA.AP)])[ 

1050 NameObject("/N") 

1051 ] = self._add_object(dct) 

1052 else: # [/AP][/N] exists 

1053 n = annotation[AA.AP]["/N"].indirect_reference.idnum # type: ignore 

1054 self._objects[n - 1] = dct 

1055 dct.indirect_reference = IndirectObject(n, 0, self) 

1056 

1057 if flatten: 

1058 field_name = self._get_qualified_field_name(annotation) 

1059 self._add_apstream_object(page, dct, field_name, _rct[0], _rct[1], font_res) 

1060 

1061 FFBITS_NUL = FA.FfBits(0) 

1062 

1063 def update_page_form_field_values( 

1064 self, 

1065 page: Union[PageObject, list[PageObject], None], 

1066 fields: dict[str, Union[str, list[str], tuple[str, str, float]]], 

1067 flags: FA.FfBits = FFBITS_NUL, 

1068 auto_regenerate: Optional[bool] = True, 

1069 flatten: bool = False, 

1070 ) -> None: 

1071 """ 

1072 Update the form field values for a given page from a fields dictionary. 

1073 

1074 Copy field texts and values from fields to page. 

1075 If the field links to a parent object, add the information to the parent. 

1076 

1077 Args: 

1078 page: `PageObject` - references **PDF writer's page** where the 

1079 annotations and field data will be updated. 

1080 `List[Pageobject]` - provides list of pages to be processed. 

1081 `None` - all pages. 

1082 fields: a Python dictionary of: 

1083 

1084 * field names (/T) as keys and text values (/V) as value 

1085 * field names (/T) as keys and list of text values (/V) for multiple choice list 

1086 * field names (/T) as keys and tuple of: 

1087 * text values (/V) 

1088 * font id (e.g. /F1, the font id must exist) 

1089 * font size (0 for autosize) 

1090 

1091 flags: A set of flags from :class:`~pypdf.constants.FieldDictionaryAttributes.FfBits`. 

1092 

1093 auto_regenerate: Set/unset the need_appearances flag; 

1094 the flag is unchanged if auto_regenerate is None. 

1095 

1096 flatten: Whether or not to flatten the annotation. If True, this adds the annotation's 

1097 appearance stream to the page contents. Note that this option does not remove the 

1098 annotation itself. 

1099 

1100 """ 

1101 if CatalogDictionary.ACRO_FORM not in self._root_object: 

1102 raise PyPdfError("No /AcroForm dictionary in PDF of PdfWriter Object") 

1103 af = cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM]) 

1104 if InteractiveFormDictEntries.Fields not in af: 

1105 raise PyPdfError("No /Fields dictionary in PDF of PdfWriter Object") 

1106 if isinstance(auto_regenerate, bool): 

1107 self.set_need_appearances_writer(auto_regenerate) 

1108 # Iterate through pages, update field values 

1109 if page is None: 

1110 page = list(self.pages) 

1111 if isinstance(page, list): 

1112 for p in page: 

1113 if PG.ANNOTS in p: # just to prevent warnings 

1114 self.update_page_form_field_values(p, fields, flags, None, flatten=flatten) 

1115 return 

1116 if PG.ANNOTS not in page: 

1117 logger_warning("No fields to update on this page", __name__) 

1118 return 

1119 for annotation in page[PG.ANNOTS]: # type: ignore 

1120 annotation = cast(DictionaryObject, annotation.get_object()) 

1121 if annotation.get("/Subtype", "") != "/Widget": 

1122 continue 

1123 if "/FT" in annotation and "/T" in annotation: 

1124 parent_annotation = annotation 

1125 else: 

1126 parent_annotation = annotation.get( 

1127 PG.PARENT, DictionaryObject() 

1128 ).get_object() 

1129 

1130 for field, value in fields.items(): 

1131 if not ( 

1132 self._get_qualified_field_name(parent_annotation) == field 

1133 or parent_annotation.get("/T", None) == field 

1134 ): 

1135 continue 

1136 if ( 

1137 parent_annotation.get("/FT", None) == "/Ch" 

1138 and "/I" in parent_annotation 

1139 ): 

1140 del parent_annotation["/I"] 

1141 if flags: 

1142 annotation[NameObject(FA.Ff)] = NumberObject(flags) 

1143 if not (value is None and flatten): # Only change values if given by user and not flattening. 

1144 if isinstance(value, list): 

1145 lst = ArrayObject(TextStringObject(v) for v in value) 

1146 parent_annotation[NameObject(FA.V)] = lst 

1147 elif isinstance(value, tuple): 

1148 annotation[NameObject(FA.V)] = TextStringObject( 

1149 value[0], 

1150 ) 

1151 else: 

1152 parent_annotation[NameObject(FA.V)] = TextStringObject(value) 

1153 if parent_annotation.get(FA.FT) == "/Btn": 

1154 # Checkbox button (no /FT found in Radio widgets) 

1155 v = NameObject(value) 

1156 ap = cast(DictionaryObject, annotation[NameObject(AA.AP)]) 

1157 normal_ap = cast(DictionaryObject, ap["/N"]) 

1158 if v not in normal_ap: 

1159 v = NameObject("/Off") 

1160 appearance_stream_obj = normal_ap.get(v) 

1161 # other cases will be updated through the for loop 

1162 annotation[NameObject(AA.AS)] = v 

1163 annotation[NameObject(FA.V)] = v 

1164 if flatten and appearance_stream_obj is not None: 

1165 # We basically copy the entire appearance stream, which should be an XObject that 

1166 # is already registered. No need to add font resources. 

1167 rct = cast(RectangleObject, annotation[AA.Rect]) 

1168 self._add_apstream_object(page, appearance_stream_obj, field, rct[0], rct[1]) 

1169 elif ( 

1170 parent_annotation.get(FA.FT) == "/Tx" 

1171 or parent_annotation.get(FA.FT) == "/Ch" 

1172 ): 

1173 # textbox 

1174 if isinstance(value, tuple): 

1175 self._update_field_annotation( 

1176 page, parent_annotation, annotation, value[1], value[2], flatten=flatten 

1177 ) 

1178 else: 

1179 self._update_field_annotation(page, parent_annotation, annotation, flatten=flatten) 

1180 elif ( 

1181 annotation.get(FA.FT) == "/Sig" 

1182 ): # deprecated # not implemented yet 

1183 logger_warning("Signature forms not implemented yet", __name__) 

1184 

1185 def reattach_fields( 

1186 self, page: Optional[PageObject] = None 

1187 ) -> list[DictionaryObject]: 

1188 """ 

1189 Parse annotations within the page looking for orphan fields and 

1190 reattach then into the Fields Structure. 

1191 

1192 Args: 

1193 page: page to analyze. 

1194 If none is provided, all pages will be analyzed. 

1195 

1196 Returns: 

1197 list of reattached fields. 

1198 

1199 """ 

1200 lst = [] 

1201 if page is None: 

1202 for p in self.pages: 

1203 lst += self.reattach_fields(p) 

1204 return lst 

1205 

1206 try: 

1207 af = cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM]) 

1208 except KeyError: 

1209 af = DictionaryObject() 

1210 self._root_object[NameObject(CatalogDictionary.ACRO_FORM)] = af 

1211 try: 

1212 fields = cast(ArrayObject, af[InteractiveFormDictEntries.Fields]) 

1213 except KeyError: 

1214 fields = ArrayObject() 

1215 af[NameObject(InteractiveFormDictEntries.Fields)] = fields 

1216 

1217 if "/Annots" not in page: 

1218 return lst 

1219 annotations = cast(ArrayObject, page["/Annots"]) 

1220 for idx, annotation in enumerate(annotations): 

1221 is_indirect = isinstance(annotation, IndirectObject) 

1222 annotation = cast(DictionaryObject, annotation.get_object()) 

1223 if annotation.get("/Subtype", "") == "/Widget" and "/FT" in annotation: 

1224 if ( 

1225 "indirect_reference" in annotation.__dict__ 

1226 and annotation.indirect_reference in fields 

1227 ): 

1228 continue 

1229 if not is_indirect: 

1230 annotations[idx] = self._add_object(annotation) 

1231 fields.append(annotation.indirect_reference) 

1232 lst.append(annotation) 

1233 return lst 

1234 

1235 def clone_reader_document_root(self, reader: PdfReader) -> None: 

1236 """ 

1237 Copy the reader document root to the writer and all sub-elements, 

1238 including pages, threads, outlines,... For partial insertion, ``append`` 

1239 should be considered. 

1240 

1241 Args: 

1242 reader: PdfReader from which the document root should be copied. 

1243 

1244 """ 

1245 self._info_obj = None 

1246 if self.incremental: 

1247 self._objects = [None] * (cast(int, reader.trailer["/Size"]) - 1) 

1248 for i in range(len(self._objects)): 

1249 o = reader.get_object(i + 1) 

1250 if o is not None: 

1251 self._objects[i] = o.replicate(self) 

1252 else: 

1253 self._objects.clear() 

1254 self._root_object = reader.root_object.clone(self) 

1255 self._pages = self._root_object.raw_get("/Pages") 

1256 

1257 assert len(self._objects) <= cast(int, reader.trailer["/Size"]) # for pytest 

1258 # must be done here before rewriting 

1259 if self.incremental: 

1260 self._original_hash = [ 

1261 (obj.hash_bin() if obj is not None else 0) for obj in self._objects 

1262 ] 

1263 self._flatten() 

1264 assert self.flattened_pages is not None 

1265 for p in self.flattened_pages: 

1266 self._replace_object(cast(IndirectObject, p.indirect_reference).idnum, p) 

1267 if not self.incremental: 

1268 p[NameObject("/Parent")] = self._pages 

1269 if not self.incremental: 

1270 cast(DictionaryObject, self._pages.get_object())[ 

1271 NameObject("/Kids") 

1272 ] = ArrayObject([p.indirect_reference for p in self.flattened_pages]) 

1273 

1274 def clone_document_from_reader( 

1275 self, 

1276 reader: PdfReader, 

1277 after_page_append: Optional[Callable[[PageObject], None]] = None, 

1278 ) -> None: 

1279 """ 

1280 Create a copy (clone) of a document from a PDF file reader cloning 

1281 section '/Root' and '/Info' and '/ID' of the pdf. 

1282 

1283 Args: 

1284 reader: PDF file reader instance from which the clone 

1285 should be created. 

1286 after_page_append: 

1287 Callback function that is invoked after each page is appended to 

1288 the writer. Signature includes a reference to the appended page 

1289 (delegates to append_pages_from_reader). The single parameter of 

1290 the callback is a reference to the page just appended to the 

1291 document. 

1292 

1293 """ 

1294 self.clone_reader_document_root(reader) 

1295 inf = reader._info 

1296 if self.incremental: 

1297 if inf is not None: 

1298 self._info_obj = cast( 

1299 IndirectObject, inf.clone(self).indirect_reference 

1300 ) 

1301 assert isinstance(self._info, DictionaryObject), "for mypy" 

1302 self._original_hash[ 

1303 self._info_obj.indirect_reference.idnum - 1 

1304 ] = self._info.hash_bin() 

1305 elif inf is not None: 

1306 self._info_obj = self._add_object( 

1307 DictionaryObject(cast(DictionaryObject, inf.get_object())) 

1308 ) 

1309 # else: _info_obj = None done in clone_reader_document_root() 

1310 

1311 try: 

1312 self._ID = cast(ArrayObject, reader._ID).clone(self) 

1313 except AttributeError: 

1314 pass 

1315 

1316 if callable(after_page_append): 

1317 for page in cast( 

1318 ArrayObject, cast(DictionaryObject, self._pages.get_object())["/Kids"] 

1319 ): 

1320 after_page_append(page.get_object()) 

1321 

1322 def _compute_document_identifier(self) -> ByteStringObject: 

1323 stream = BytesIO() 

1324 self._write_pdf_structure(stream) 

1325 stream.seek(0) 

1326 return ByteStringObject(_rolling_checksum(stream).encode("utf8")) 

1327 

1328 def generate_file_identifiers(self) -> None: 

1329 """ 

1330 Generate an identifier for the PDF that will be written. 

1331 

1332 The only point of this is ensuring uniqueness. Reproducibility is not 

1333 required. 

1334 When a file is first written, both identifiers shall be set to the same value. 

1335 If both identifiers match when a file reference is resolved, it is very 

1336 likely that the correct and unchanged file has been found. If only the first 

1337 identifier matches, a different version of the correct file has been found. 

1338 see §14.4 "File Identifiers". 

1339 """ 

1340 if self._ID: 

1341 id1 = self._ID[0] 

1342 id2 = self._compute_document_identifier() 

1343 else: 

1344 id1 = self._compute_document_identifier() 

1345 id2 = id1 

1346 self._ID = ArrayObject((id1, id2)) 

1347 

1348 def encrypt( 

1349 self, 

1350 user_password: str, 

1351 owner_password: Optional[str] = None, 

1352 use_128bit: bool = True, 

1353 permissions_flag: UserAccessPermissions = ALL_DOCUMENT_PERMISSIONS, 

1354 *, 

1355 algorithm: Optional[str] = None, 

1356 ) -> None: 

1357 """ 

1358 Encrypt this PDF file with the PDF Standard encryption handler. 

1359 

1360 Args: 

1361 user_password: The password which allows for opening 

1362 and reading the PDF file with the restrictions provided. 

1363 owner_password: The password which allows for 

1364 opening the PDF files without any restrictions. By default, 

1365 the owner password is the same as the user password. 

1366 use_128bit: flag as to whether to use 128bit 

1367 encryption. When false, 40bit encryption will be used. 

1368 By default, this flag is on. 

1369 permissions_flag: permissions as described in 

1370 Table 3.20 of the PDF 1.7 specification. A bit value of 1 means 

1371 the permission is granted. 

1372 Hence an integer value of -1 will set all flags. 

1373 Bit position 3 is for printing, 4 is for modifying content, 

1374 5 and 6 control annotations, 9 for form fields, 

1375 10 for extraction of text and graphics. 

1376 algorithm: encrypt algorithm. Values may be one of "RC4-40", "RC4-128", 

1377 "AES-128", "AES-256-R5", "AES-256". If it is valid, 

1378 `use_128bit` will be ignored. 

1379 

1380 """ 

1381 if owner_password is None: 

1382 owner_password = user_password 

1383 

1384 if algorithm is not None: 

1385 try: 

1386 alg = getattr(EncryptAlgorithm, algorithm.replace("-", "_")) 

1387 except AttributeError: 

1388 raise ValueError(f"Algorithm '{algorithm}' NOT supported") 

1389 else: 

1390 alg = EncryptAlgorithm.RC4_128 

1391 if not use_128bit: 

1392 alg = EncryptAlgorithm.RC4_40 

1393 self.generate_file_identifiers() 

1394 assert self._ID 

1395 self._encryption = Encryption.make(alg, permissions_flag, self._ID[0]) 

1396 # in case call `encrypt` again 

1397 entry = self._encryption.write_entry(user_password, owner_password) 

1398 if self._encrypt_entry: 

1399 # replace old encrypt_entry 

1400 assert self._encrypt_entry.indirect_reference is not None 

1401 entry.indirect_reference = self._encrypt_entry.indirect_reference 

1402 self._objects[entry.indirect_reference.idnum - 1] = entry 

1403 else: 

1404 self._add_object(entry) 

1405 self._encrypt_entry = entry 

1406 

1407 def _resolve_links(self) -> None: 

1408 """Patch up links that were added to the document earlier, to 

1409 make sure they still point to the same pages. 

1410 """ 

1411 for (new_link, old_link) in self._unresolved_links: 

1412 old_page = old_link.find_referenced_page() 

1413 if not old_page: 

1414 continue 

1415 new_page = self._merged_in_pages.get(old_page) 

1416 if new_page is None: 

1417 continue 

1418 new_link.patch_reference(self, new_page) 

1419 

1420 def write_stream(self, stream: StreamType) -> None: 

1421 if hasattr(stream, "mode") and "b" not in stream.mode: 

1422 logger_warning( 

1423 f"File <{stream.name}> to write to is not in binary mode. " 

1424 "It may not be written to correctly.", 

1425 __name__, 

1426 ) 

1427 self._resolve_links() 

1428 

1429 if self.incremental: 

1430 self._reader.stream.seek(0) 

1431 stream.write(self._reader.stream.read(-1)) 

1432 if len(self.list_objects_in_increment()) > 0: 

1433 self._write_increment(stream) # writes objs, xref stream and startxref 

1434 else: 

1435 object_positions, free_objects = self._write_pdf_structure(stream) 

1436 xref_location = self._write_xref_table( 

1437 stream, object_positions, free_objects 

1438 ) 

1439 self._write_trailer(stream, xref_location) 

1440 

1441 def write(self, stream: Union[Path, StrByteType]) -> tuple[bool, IO[Any]]: 

1442 """ 

1443 Write the collection of pages added to this object out as a PDF file. 

1444 

1445 Args: 

1446 stream: An object to write the file to. The object can support 

1447 the write method and the tell method, similar to a file object, or 

1448 be a file path, just like the fileobj, just named it stream to keep 

1449 existing workflow. 

1450 

1451 Returns: 

1452 A tuple (bool, IO). 

1453 

1454 """ 

1455 my_file = False 

1456 

1457 if stream == "": 

1458 raise ValueError(f"Output({stream=}) is empty.") 

1459 

1460 if isinstance(stream, (str, Path)): 

1461 stream = FileIO(stream, "wb") 

1462 my_file = True 

1463 

1464 self.write_stream(stream) 

1465 

1466 if my_file: 

1467 stream.close() 

1468 else: 

1469 stream.flush() 

1470 

1471 return my_file, stream 

1472 

1473 def list_objects_in_increment(self) -> list[IndirectObject]: 

1474 """ 

1475 For analysis or debugging. 

1476 Provides the list of new or modified objects that will be written 

1477 in the increment. 

1478 Deleted objects will not be freed but will become orphans. 

1479 

1480 Returns: 

1481 List of new or modified IndirectObjects 

1482 

1483 """ 

1484 original_hash_count = len(self._original_hash) 

1485 return [ 

1486 cast(IndirectObject, obj).indirect_reference 

1487 for i, obj in enumerate(self._objects) 

1488 if ( 

1489 obj is not None 

1490 and ( 

1491 i >= original_hash_count 

1492 or obj.hash_bin() != self._original_hash[i] 

1493 ) 

1494 ) 

1495 ] 

1496 

1497 def _write_increment(self, stream: StreamType) -> None: 

1498 object_positions = {} 

1499 object_blocks = [] 

1500 current_start = -1 

1501 current_stop = -2 

1502 original_hash_count = len(self._original_hash) 

1503 for i, obj in enumerate(self._objects): 

1504 if obj is not None and ( 

1505 i >= original_hash_count 

1506 or obj.hash_bin() != self._original_hash[i] 

1507 ): 

1508 idnum = i + 1 

1509 assert isinstance(obj, PdfObject), "mypy" 

1510 # first write new/modified object 

1511 object_positions[idnum] = stream.tell() 

1512 stream.write(f"{idnum} 0 obj\n".encode()) 

1513 """ encryption is not operational 

1514 if self._encryption and obj != self._encrypt_entry: 

1515 obj = self._encryption.encrypt_object(obj, idnum, 0) 

1516 """ 

1517 obj.write_to_stream(stream) 

1518 stream.write(b"\nendobj\n") 

1519 

1520 # prepare xref 

1521 if idnum != current_stop: 

1522 if current_start > 0: 

1523 object_blocks.append( 

1524 [current_start, current_stop - current_start] 

1525 ) 

1526 current_start = idnum 

1527 current_stop = idnum + 1 

1528 assert current_start > 0, "for pytest only" 

1529 object_blocks.append([current_start, current_stop - current_start]) 

1530 # write incremented xref 

1531 xref_location = stream.tell() 

1532 xr_id = len(self._objects) + 1 

1533 stream.write(f"{xr_id} 0 obj".encode()) 

1534 init_data = { 

1535 NameObject("/Type"): NameObject("/XRef"), 

1536 NameObject("/Size"): NumberObject(xr_id + 1), 

1537 NameObject("/Root"): self.root_object.indirect_reference, 

1538 NameObject("/Filter"): NameObject("/FlateDecode"), 

1539 NameObject("/Index"): ArrayObject( 

1540 [NumberObject(_it) for _su in object_blocks for _it in _su] 

1541 ), 

1542 NameObject("/W"): ArrayObject( 

1543 [NumberObject(1), NumberObject(4), NumberObject(1)] 

1544 ), 

1545 "__streamdata__": b"", 

1546 } 

1547 if self._info is not None and ( 

1548 self._info.indirect_reference.idnum - 1 # type: ignore 

1549 >= len(self._original_hash) 

1550 or cast(IndirectObject, self._info).hash_bin() # kept for future 

1551 != self._original_hash[ 

1552 self._info.indirect_reference.idnum - 1 # type: ignore 

1553 ] 

1554 ): 

1555 init_data[NameObject(TK.INFO)] = self._info.indirect_reference 

1556 init_data[NameObject(TK.PREV)] = NumberObject(self._reader._startxref) 

1557 if self._ID: 

1558 init_data[NameObject(TK.ID)] = self._ID 

1559 xr = StreamObject.initialize_from_dictionary(init_data) 

1560 xr.set_data( 

1561 b"".join( 

1562 [struct.pack(b">BIB", 1, _pos, 0) for _pos in object_positions.values()] 

1563 ) 

1564 ) 

1565 xr.write_to_stream(stream) 

1566 stream.write(f"\nendobj\nstartxref\n{xref_location}\n%%EOF\n".encode()) # eof 

1567 

1568 def _write_pdf_structure(self, stream: StreamType) -> tuple[list[int], list[int]]: 

1569 object_positions = [] 

1570 free_objects = [] 

1571 stream.write(self.pdf_header.encode() + b"\n") 

1572 stream.write(b"%\xE2\xE3\xCF\xD3\n") 

1573 

1574 for idnum, obj in enumerate(self._objects, start=1): 

1575 if obj is not None: 

1576 object_positions.append(stream.tell()) 

1577 stream.write(f"{idnum} 0 obj\n".encode()) 

1578 if self._encryption and obj != self._encrypt_entry: 

1579 obj = self._encryption.encrypt_object(obj, idnum, 0) 

1580 obj.write_to_stream(stream) 

1581 stream.write(b"\nendobj\n") 

1582 else: 

1583 object_positions.append(-1) 

1584 free_objects.append(idnum) 

1585 free_objects.append(0) # add 0 to loop in accordance with specification 

1586 return object_positions, free_objects 

1587 

1588 def _write_xref_table( 

1589 self, stream: StreamType, object_positions: list[int], free_objects: list[int] 

1590 ) -> int: 

1591 xref_location = stream.tell() 

1592 stream.write(b"xref\n") 

1593 stream.write(f"0 {len(self._objects) + 1}\n".encode()) 

1594 stream.write(f"{free_objects[0]:0>10} {65535:0>5} f \n".encode()) 

1595 free_idx = 1 

1596 for offset in object_positions: 

1597 if offset > 0: 

1598 stream.write(f"{offset:0>10} {0:0>5} n \n".encode()) 

1599 else: 

1600 stream.write(f"{free_objects[free_idx]:0>10} {1:0>5} f \n".encode()) 

1601 free_idx += 1 

1602 return xref_location 

1603 

1604 def _write_trailer(self, stream: StreamType, xref_location: int) -> None: 

1605 """ 

1606 Write the PDF trailer to the stream. 

1607 

1608 To quote the PDF specification: 

1609 [The] trailer [gives] the location of the cross-reference table and 

1610 of certain special objects within the body of the file. 

1611 """ 

1612 stream.write(b"trailer\n") 

1613 trailer = DictionaryObject( 

1614 { 

1615 NameObject(TK.SIZE): NumberObject(len(self._objects) + 1), 

1616 NameObject(TK.ROOT): self.root_object.indirect_reference, 

1617 } 

1618 ) 

1619 if self._info is not None: 

1620 trailer[NameObject(TK.INFO)] = self._info.indirect_reference 

1621 if self._ID is not None: 

1622 trailer[NameObject(TK.ID)] = self._ID 

1623 if self._encrypt_entry: 

1624 trailer[NameObject(TK.ENCRYPT)] = self._encrypt_entry.indirect_reference 

1625 trailer.write_to_stream(stream) 

1626 stream.write(f"\nstartxref\n{xref_location}\n%%EOF\n".encode()) # eof 

1627 

1628 @property 

1629 def metadata(self) -> Optional[DocumentInformation]: 

1630 """ 

1631 Retrieve/set the PDF file's document information dictionary, if it exists. 

1632 

1633 Args: 

1634 value: dict with the entries to be set. if None : remove the /Info entry from the pdf. 

1635 

1636 Note that some PDF files use (XMP) metadata streams instead of document 

1637 information dictionaries, and these metadata streams will not be 

1638 accessed by this function, but by :meth:`~xmp_metadata`. 

1639 

1640 """ 

1641 return super().metadata 

1642 

1643 @metadata.setter 

1644 def metadata( 

1645 self, 

1646 value: Optional[Union[DocumentInformation, DictionaryObject, dict[Any, Any]]], 

1647 ) -> None: 

1648 if value is None: 

1649 self._info = None 

1650 else: 

1651 if self._info is not None: 

1652 self._info.clear() 

1653 

1654 self.add_metadata(value) 

1655 

1656 def add_metadata(self, infos: dict[str, Any]) -> None: 

1657 """ 

1658 Add custom metadata to the output. 

1659 

1660 Args: 

1661 infos: a Python dictionary where each key is a field 

1662 and each value is your new metadata. 

1663 

1664 """ 

1665 args = {} 

1666 if isinstance(infos, PdfObject): 

1667 infos = cast(DictionaryObject, infos.get_object()) 

1668 for key, value in list(infos.items()): 

1669 if isinstance(value, PdfObject): 

1670 value = value.get_object() 

1671 args[NameObject(key)] = create_string_object(str(value)) 

1672 if self._info is None: 

1673 self._info = DictionaryObject() 

1674 self._info.update(args) 

1675 

1676 def compress_identical_objects( 

1677 self, 

1678 remove_identicals: bool = True, 

1679 remove_orphans: bool = True, 

1680 ) -> None: 

1681 """ 

1682 Parse the PDF file and merge objects that have the same hash. 

1683 This will make objects common to multiple pages. 

1684 Recommended to be used just before writing output. 

1685 

1686 Args: 

1687 remove_identicals: Remove identical objects. 

1688 remove_orphans: Remove unreferenced objects. 

1689 

1690 """ 

1691 

1692 def replace_in_obj( 

1693 obj: PdfObject, crossref: dict[IndirectObject, IndirectObject] 

1694 ) -> None: 

1695 if isinstance(obj, DictionaryObject): 

1696 key_val = obj.items() 

1697 elif isinstance(obj, ArrayObject): 

1698 key_val = enumerate(obj) # type: ignore 

1699 else: 

1700 return 

1701 assert isinstance(obj, (DictionaryObject, ArrayObject)) 

1702 for k, v in key_val: 

1703 if isinstance(v, IndirectObject): 

1704 orphans[v.idnum - 1] = False 

1705 if v in crossref: 

1706 obj[k] = crossref[v] 

1707 else: 

1708 """the filtering on DictionaryObject and ArrayObject only 

1709 will be performed within replace_in_obj""" 

1710 replace_in_obj(v, crossref) 

1711 

1712 # _idnum_hash :dict[hash]=(1st_ind_obj,[other_indir_objs,...]) 

1713 self._idnum_hash = {} 

1714 orphans = [True] * len(self._objects) 

1715 # look for similar objects 

1716 for idx, obj in enumerate(self._objects): 

1717 if is_null_or_none(obj): 

1718 continue 

1719 assert obj is not None, "mypy" # mypy: TypeGuard of `is_null_or_none` does not help here. 

1720 assert isinstance(obj.indirect_reference, IndirectObject) 

1721 h = obj.hash_value() 

1722 if remove_identicals and h in self._idnum_hash: 

1723 self._idnum_hash[h][1].append(obj.indirect_reference) 

1724 self._objects[idx] = None 

1725 else: 

1726 self._idnum_hash[h] = (obj.indirect_reference, []) 

1727 

1728 # generate the dict converting others to 1st 

1729 cnv = {v[0]: v[1] for v in self._idnum_hash.values() if len(v[1]) > 0} 

1730 cnv_rev: dict[IndirectObject, IndirectObject] = {} 

1731 for k, v in cnv.items(): 

1732 cnv_rev.update(zip(v, (k,) * len(v))) 

1733 

1734 # replace reference to merged objects 

1735 for obj in self._objects: 

1736 if isinstance(obj, (DictionaryObject, ArrayObject)): 

1737 replace_in_obj(obj, cnv_rev) 

1738 

1739 # remove orphans (if applicable) 

1740 orphans[self.root_object.indirect_reference.idnum - 1] = False # type: ignore 

1741 

1742 orphans[self._info.indirect_reference.idnum - 1] = False # type: ignore 

1743 

1744 try: 

1745 orphans[self._ID.indirect_reference.idnum - 1] = False # type: ignore 

1746 except AttributeError: 

1747 pass 

1748 for i in compress(range(len(self._objects)), orphans): 

1749 self._objects[i] = None 

1750 

1751 def get_reference(self, obj: PdfObject) -> IndirectObject: 

1752 idnum = self._objects.index(obj) + 1 

1753 ref = IndirectObject(idnum, 0, self) 

1754 assert ref.get_object() == obj 

1755 return ref 

1756 

1757 def get_outline_root(self) -> TreeObject: 

1758 if CO.OUTLINES in self._root_object: 

1759 # Entries in the catalog dictionary 

1760 outline = cast(TreeObject, self._root_object[CO.OUTLINES]) 

1761 if not isinstance(outline, TreeObject): 

1762 t = TreeObject(outline) 

1763 self._replace_object(outline.indirect_reference.idnum, t) 

1764 outline = t 

1765 idnum = self._objects.index(outline) + 1 

1766 outline_ref = IndirectObject(idnum, 0, self) 

1767 assert outline_ref.get_object() == outline 

1768 else: 

1769 outline = TreeObject() 

1770 outline.update({}) 

1771 outline_ref = self._add_object(outline) 

1772 self._root_object[NameObject(CO.OUTLINES)] = outline_ref 

1773 

1774 return outline 

1775 

1776 def get_threads_root(self) -> ArrayObject: 

1777 """ 

1778 The list of threads. 

1779 

1780 See §12.4.3 of the PDF 1.7 or PDF 2.0 specification. 

1781 

1782 Returns: 

1783 An array (possibly empty) of Dictionaries with an ``/F`` key, 

1784 and optionally information about the thread in ``/I`` or ``/Metadata`` keys. 

1785 

1786 """ 

1787 if CO.THREADS in self._root_object: 

1788 # Entries in the catalog dictionary 

1789 threads = cast(ArrayObject, self._root_object[CO.THREADS]) 

1790 else: 

1791 threads = ArrayObject() 

1792 self._root_object[NameObject(CO.THREADS)] = threads 

1793 return threads 

1794 

1795 @property 

1796 def threads(self) -> ArrayObject: 

1797 """ 

1798 Read-only property for the list of threads. 

1799 

1800 See §12.4.3 of the PDF 1.7 or PDF 2.0 specification. 

1801 

1802 Each element is a dictionary with an ``/F`` key, and optionally 

1803 information about the thread in ``/I`` or ``/Metadata`` keys. 

1804 """ 

1805 return self.get_threads_root() 

1806 

1807 def add_outline_item_destination( 

1808 self, 

1809 page_destination: Union[IndirectObject, PageObject, TreeObject], 

1810 parent: Union[None, TreeObject, IndirectObject] = None, 

1811 before: Union[None, TreeObject, IndirectObject] = None, 

1812 is_open: bool = True, 

1813 ) -> IndirectObject: 

1814 page_destination = cast(PageObject, page_destination.get_object()) 

1815 if isinstance(page_destination, PageObject): 

1816 return self.add_outline_item_destination( 

1817 Destination( 

1818 f"page #{page_destination.page_number}", 

1819 cast(IndirectObject, page_destination.indirect_reference), 

1820 Fit.fit(), 

1821 ) 

1822 ) 

1823 

1824 if parent is None: 

1825 parent = self.get_outline_root() 

1826 

1827 page_destination[NameObject("/%is_open%")] = BooleanObject(is_open) 

1828 parent = cast(TreeObject, parent.get_object()) 

1829 page_destination_ref = self._add_object(page_destination) 

1830 if before is not None: 

1831 before = before.indirect_reference 

1832 parent.insert_child( 

1833 page_destination_ref, 

1834 before, 

1835 self, 

1836 page_destination.inc_parent_counter_outline 

1837 if is_open 

1838 else (lambda x, y: 0), # noqa: ARG005 

1839 ) 

1840 if "/Count" not in page_destination: 

1841 page_destination[NameObject("/Count")] = NumberObject(0) 

1842 

1843 return page_destination_ref 

1844 

1845 def add_outline_item_dict( 

1846 self, 

1847 outline_item: OutlineItemType, 

1848 parent: Union[None, TreeObject, IndirectObject] = None, 

1849 before: Union[None, TreeObject, IndirectObject] = None, 

1850 is_open: bool = True, 

1851 ) -> IndirectObject: 

1852 outline_item_object = TreeObject() 

1853 outline_item_object.update(outline_item) 

1854 

1855 """code currently unreachable 

1856 if "/A" in outline_item: 

1857 action = DictionaryObject() 

1858 a_dict = cast(DictionaryObject, outline_item["/A"]) 

1859 for k, v in list(a_dict.items()): 

1860 action[NameObject(str(k))] = v 

1861 action_ref = self._add_object(action) 

1862 outline_item_object[NameObject("/A")] = action_ref 

1863 """ 

1864 return self.add_outline_item_destination( 

1865 outline_item_object, parent, before, is_open 

1866 ) 

1867 

1868 def add_outline_item( 

1869 self, 

1870 title: str, 

1871 page_number: Union[None, PageObject, IndirectObject, int], 

1872 parent: Union[None, TreeObject, IndirectObject] = None, 

1873 before: Union[None, TreeObject, IndirectObject] = None, 

1874 color: Optional[Union[tuple[float, float, float], str]] = None, 

1875 bold: bool = False, 

1876 italic: bool = False, 

1877 fit: Fit = PAGE_FIT, 

1878 is_open: bool = True, 

1879 ) -> IndirectObject: 

1880 """ 

1881 Add an outline item (commonly referred to as a "Bookmark") to the PDF file. 

1882 

1883 Args: 

1884 title: Title to use for this outline item. 

1885 page_number: Page number this outline item will point to. 

1886 parent: A reference to a parent outline item to create nested 

1887 outline items. 

1888 before: 

1889 color: Color of the outline item's font as a red, green, blue tuple 

1890 from 0.0 to 1.0 or as a Hex String (#RRGGBB) 

1891 bold: Outline item font is bold 

1892 italic: Outline item font is italic 

1893 fit: The fit of the destination page. 

1894 

1895 Returns: 

1896 The added outline item as an indirect object. 

1897 

1898 """ 

1899 page_ref: Union[None, NullObject, IndirectObject, NumberObject] 

1900 if isinstance(italic, Fit): # it means that we are on the old params 

1901 if fit is not None and page_number is None: 

1902 page_number = fit 

1903 return self.add_outline_item( 

1904 title, page_number, parent, None, before, color, bold, italic, is_open=is_open 

1905 ) 

1906 if page_number is None: 

1907 action_ref = None 

1908 else: 

1909 if isinstance(page_number, IndirectObject): 

1910 page_ref = page_number 

1911 elif isinstance(page_number, PageObject): 

1912 page_ref = page_number.indirect_reference 

1913 elif isinstance(page_number, int): 

1914 try: 

1915 page_ref = self.pages[page_number].indirect_reference 

1916 except IndexError: 

1917 page_ref = NumberObject(page_number) 

1918 if page_ref is None: 

1919 logger_warning( 

1920 f"can not find reference of page {page_number}", 

1921 __name__, 

1922 ) 

1923 page_ref = NullObject() 

1924 dest = Destination( 

1925 NameObject("/" + title + " outline item"), 

1926 page_ref, 

1927 fit, 

1928 ) 

1929 

1930 action_ref = self._add_object( 

1931 DictionaryObject( 

1932 { 

1933 NameObject(GoToActionArguments.D): dest.dest_array, 

1934 NameObject(GoToActionArguments.S): NameObject("/GoTo"), 

1935 } 

1936 ) 

1937 ) 

1938 outline_item = self._add_object( 

1939 _create_outline_item(action_ref, title, color, italic, bold) 

1940 ) 

1941 

1942 if parent is None: 

1943 parent = self.get_outline_root() 

1944 return self.add_outline_item_destination(outline_item, parent, before, is_open) 

1945 

1946 def add_outline(self) -> None: 

1947 raise NotImplementedError( 

1948 "This method is not yet implemented. Use :meth:`add_outline_item` instead." 

1949 ) 

1950 

1951 def add_named_destination_array( 

1952 self, title: TextStringObject, destination: Union[IndirectObject, ArrayObject] 

1953 ) -> None: 

1954 named_dest = self.get_named_dest_root() 

1955 i = 0 

1956 while i < len(named_dest): 

1957 if title < named_dest[i]: 

1958 named_dest.insert(i, destination) 

1959 named_dest.insert(i, TextStringObject(title)) 

1960 return 

1961 i += 2 

1962 named_dest.extend([TextStringObject(title), destination]) 

1963 return 

1964 

1965 def add_named_destination_object( 

1966 self, 

1967 page_destination: PdfObject, 

1968 ) -> IndirectObject: 

1969 page_destination_ref = self._add_object(page_destination.dest_array) # type: ignore 

1970 self.add_named_destination_array( 

1971 cast("TextStringObject", page_destination["/Title"]), page_destination_ref # type: ignore 

1972 ) 

1973 

1974 return page_destination_ref 

1975 

1976 def add_named_destination( 

1977 self, 

1978 title: str, 

1979 page_number: int, 

1980 ) -> IndirectObject: 

1981 page_ref = self.get_object(self._pages)[PagesAttributes.KIDS][page_number] # type: ignore 

1982 dest = DictionaryObject() 

1983 dest.update( 

1984 { 

1985 NameObject(GoToActionArguments.D): ArrayObject( 

1986 [page_ref, NameObject(TypFitArguments.FIT_H), NumberObject(826)] 

1987 ), 

1988 NameObject(GoToActionArguments.S): NameObject("/GoTo"), 

1989 } 

1990 ) 

1991 

1992 dest_ref = self._add_object(dest) 

1993 if not isinstance(title, TextStringObject): 

1994 title = TextStringObject(str(title)) 

1995 

1996 self.add_named_destination_array(title, dest_ref) 

1997 return dest_ref 

1998 

1999 def remove_links(self) -> None: 

2000 """Remove links and annotations from this output.""" 

2001 for page in self.pages: 

2002 self.remove_objects_from_page(page, ObjectDeletionFlag.ALL_ANNOTATIONS) 

2003 

2004 def remove_annotations( 

2005 self, subtypes: Optional[Union[AnnotationSubtype, Iterable[AnnotationSubtype]]] 

2006 ) -> None: 

2007 """ 

2008 Remove annotations by annotation subtype. 

2009 

2010 Args: 

2011 subtypes: subtype or list of subtypes to be removed. 

2012 Examples are: "/Link", "/FileAttachment", "/Sound", 

2013 "/Movie", "/Screen", ... 

2014 If you want to remove all annotations, use subtypes=None. 

2015 

2016 """ 

2017 for page in self.pages: 

2018 self._remove_annots_from_page(page, subtypes) 

2019 

2020 def _remove_annots_from_page( 

2021 self, 

2022 page: Union[IndirectObject, PageObject, DictionaryObject], 

2023 subtypes: Optional[Iterable[str]], 

2024 ) -> None: 

2025 page = cast(DictionaryObject, page.get_object()) 

2026 if PG.ANNOTS in page: 

2027 i = 0 

2028 while i < len(cast(ArrayObject, page[PG.ANNOTS])): 

2029 an = cast(ArrayObject, page[PG.ANNOTS])[i] 

2030 obj = cast(DictionaryObject, an.get_object()) 

2031 if subtypes is None or cast(str, obj["/Subtype"]) in subtypes: 

2032 if isinstance(an, IndirectObject): 

2033 self._objects[an.idnum - 1] = NullObject() # to reduce PDF size 

2034 del page[PG.ANNOTS][i] # type:ignore 

2035 else: 

2036 i += 1 

2037 

2038 def remove_objects_from_page( 

2039 self, 

2040 page: Union[PageObject, DictionaryObject], 

2041 to_delete: Union[ObjectDeletionFlag, Iterable[ObjectDeletionFlag]], 

2042 text_filters: Optional[dict[str, Any]] = None 

2043 ) -> None: 

2044 """ 

2045 Remove objects specified by ``to_delete`` from the given page. 

2046 

2047 Args: 

2048 page: Page object to clean up. 

2049 to_delete: Objects to be deleted; can be a ``ObjectDeletionFlag`` 

2050 or a list of ObjectDeletionFlag 

2051 text_filters: Properties of text to be deleted, if applicable. Optional. 

2052 This is a Python dictionary with the following properties: 

2053 

2054 * font_ids: List of font resource IDs (such as /F1 or /T1_0) to be deleted. 

2055 

2056 """ 

2057 if isinstance(to_delete, (list, tuple)): 

2058 for to_d in to_delete: 

2059 self.remove_objects_from_page(page, to_d) 

2060 return None 

2061 assert isinstance(to_delete, ObjectDeletionFlag) 

2062 

2063 if to_delete & ObjectDeletionFlag.LINKS: 

2064 return self._remove_annots_from_page(page, ("/Link",)) 

2065 if to_delete & ObjectDeletionFlag.ATTACHMENTS: 

2066 return self._remove_annots_from_page( 

2067 page, ("/FileAttachment", "/Sound", "/Movie", "/Screen") 

2068 ) 

2069 if to_delete & ObjectDeletionFlag.OBJECTS_3D: 

2070 return self._remove_annots_from_page(page, ("/3D",)) 

2071 if to_delete & ObjectDeletionFlag.ALL_ANNOTATIONS: 

2072 return self._remove_annots_from_page(page, None) 

2073 

2074 jump_operators = [] 

2075 if to_delete & ObjectDeletionFlag.DRAWING_IMAGES: 

2076 jump_operators = ( 

2077 [ 

2078 b"w", b"J", b"j", b"M", b"d", b"i", 

2079 b"W", b"W*", 

2080 b"b", b"b*", b"B", b"B*", b"S", b"s", b"f", b"f*", b"F", b"n", 

2081 b"m", b"l", b"c", b"v", b"y", b"h", b"re", 

2082 b"sh" 

2083 ] 

2084 ) 

2085 if to_delete & ObjectDeletionFlag.TEXT: 

2086 jump_operators = [b"Tj", b"TJ", b"'", b'"'] 

2087 

2088 def clean( 

2089 content: ContentStream, 

2090 images: list[str], 

2091 forms: list[str], 

2092 text_filters: Optional[dict[str, Any]] = None 

2093 ) -> None: 

2094 nonlocal jump_operators, to_delete 

2095 

2096 font_id = None 

2097 font_ids_to_delete = [] 

2098 if text_filters and to_delete & ObjectDeletionFlag.TEXT: 

2099 font_ids_to_delete = text_filters.get("font_ids", []) 

2100 

2101 i = 0 

2102 while i < len(content.operations): 

2103 operands, operator = content.operations[i] 

2104 if operator == b"Tf": 

2105 font_id = operands[0] 

2106 if ( 

2107 ( 

2108 operator == b"INLINE IMAGE" 

2109 and (to_delete & ObjectDeletionFlag.INLINE_IMAGES) 

2110 ) 

2111 or (operator in jump_operators) 

2112 or ( 

2113 operator == b"Do" 

2114 and (to_delete & ObjectDeletionFlag.XOBJECT_IMAGES) 

2115 and (operands[0] in images) 

2116 ) 

2117 ): 

2118 if ( 

2119 not to_delete & ObjectDeletionFlag.TEXT 

2120 or (to_delete & ObjectDeletionFlag.TEXT and not text_filters) 

2121 or (to_delete & ObjectDeletionFlag.TEXT and font_id in font_ids_to_delete) 

2122 ): 

2123 del content.operations[i] 

2124 else: 

2125 i += 1 

2126 else: 

2127 i += 1 

2128 content.get_data() # this ensures ._data is rebuilt from the .operations 

2129 

2130 def clean_forms( 

2131 elt: DictionaryObject, stack: list[DictionaryObject] 

2132 ) -> tuple[list[str], list[str]]: 

2133 nonlocal to_delete 

2134 # elt in recursive call is a new ContentStream object, so we have to check the indirect_reference 

2135 if (elt in stack) or ( 

2136 hasattr(elt, "indirect_reference") 

2137 and any( 

2138 elt.indirect_reference == getattr(x, "indirect_reference", -1) 

2139 for x in stack 

2140 ) 

2141 ): 

2142 # to prevent infinite looping 

2143 return [], [] # pragma: no cover 

2144 try: 

2145 d = cast( 

2146 dict[Any, Any], 

2147 cast(DictionaryObject, elt["/Resources"])["/XObject"], 

2148 ) 

2149 except KeyError: 

2150 d = {} 

2151 images = [] 

2152 forms = [] 

2153 for k, v in d.items(): 

2154 o = v.get_object() 

2155 try: 

2156 content: Any = None 

2157 if ( 

2158 to_delete & ObjectDeletionFlag.XOBJECT_IMAGES 

2159 and o["/Subtype"] == "/Image" 

2160 ): 

2161 content = NullObject() # to delete the image keeping the entry 

2162 images.append(k) 

2163 if o["/Subtype"] == "/Form": 

2164 forms.append(k) 

2165 if isinstance(o, ContentStream): 

2166 content = o 

2167 else: 

2168 content = ContentStream(o, self) 

2169 content.update( 

2170 { 

2171 k1: v1 

2172 for k1, v1 in o.items() 

2173 if k1 not in ["/Length", "/Filter", "/DecodeParms"] 

2174 } 

2175 ) 

2176 try: 

2177 content.indirect_reference = o.indirect_reference 

2178 except AttributeError: # pragma: no cover 

2179 pass 

2180 stack.append(elt) 

2181 clean_forms(content, stack) # clean subforms 

2182 if content is not None: 

2183 if isinstance(v, IndirectObject): 

2184 self._objects[v.idnum - 1] = content 

2185 else: 

2186 # should only occur in a PDF not respecting PDF spec 

2187 # where streams must be indirected. 

2188 d[k] = self._add_object(content) # pragma: no cover 

2189 except (TypeError, KeyError): 

2190 pass 

2191 for im in images: 

2192 del d[im] # for clean-up 

2193 if isinstance(elt, StreamObject): # for /Form 

2194 if not isinstance(elt, ContentStream): # pragma: no cover 

2195 e = ContentStream(elt, self) 

2196 e.update(elt.items()) 

2197 elt = e 

2198 clean(elt, images, forms, text_filters) # clean the content 

2199 return images, forms 

2200 

2201 if not isinstance(page, PageObject): 

2202 page = PageObject(self, page.indirect_reference) # pragma: no cover 

2203 if "/Contents" in page: 

2204 content = cast(ContentStream, page.get_contents()) 

2205 

2206 images, forms = clean_forms(page, []) 

2207 

2208 clean(content, images, forms, text_filters) 

2209 page.replace_contents(content) 

2210 

2211 def remove_images( 

2212 self, 

2213 to_delete: ImageType = ImageType.ALL, 

2214 ) -> None: 

2215 """ 

2216 Remove images from this output. 

2217 

2218 Args: 

2219 to_delete: The type of images to be deleted 

2220 (default = all images types) 

2221 

2222 """ 

2223 if isinstance(to_delete, bool): 

2224 to_delete = ImageType.ALL 

2225 

2226 i = ObjectDeletionFlag.NONE 

2227 

2228 for image in ("XOBJECT_IMAGES", "INLINE_IMAGES", "DRAWING_IMAGES"): 

2229 if to_delete & ImageType[image]: 

2230 i |= ObjectDeletionFlag[image] 

2231 

2232 for page in self.pages: 

2233 self.remove_objects_from_page(page, i) 

2234 

2235 def remove_text(self, font_names: Optional[list[str]] = None) -> None: 

2236 """ 

2237 Remove text from the PDF. 

2238 

2239 Args: 

2240 font_names: List of font names to remove, such as "Helvetica-Bold". 

2241 Optional. If not specified, all text will be removed. 

2242 """ 

2243 if not font_names: 

2244 font_names = [] 

2245 

2246 for page in self.pages: 

2247 resource_ids_to_remove = [] 

2248 

2249 # Content streams reference fonts and other resources with names like "/F1" or "/T1_0" 

2250 # Font names need to be converted to resource names/IDs for easier removal 

2251 if font_names: 

2252 # Recursively loop through page objects to gather font info 

2253 def get_font_info( 

2254 obj: Any, 

2255 font_info: Optional[dict[str, Any]] = None, 

2256 key: Optional[str] = None 

2257 ) -> dict[str, Any]: 

2258 if font_info is None: 

2259 font_info = {} 

2260 if isinstance(obj, IndirectObject): 

2261 obj = obj.get_object() 

2262 if isinstance(obj, dict): 

2263 if obj.get("/Type") == "/Font": 

2264 font_name = obj.get("/BaseFont", "") 

2265 # Normalize font names like "/RRXFFV+Palatino-Bold" to "Palatino-Bold" 

2266 normalized_font_name = font_name.lstrip("/").split("+")[-1] 

2267 if normalized_font_name not in font_info: 

2268 font_info[normalized_font_name] = { 

2269 "normalized_font_name": normalized_font_name, 

2270 "resource_ids": [], 

2271 } 

2272 if key not in font_info[normalized_font_name]["resource_ids"]: 

2273 font_info[normalized_font_name]["resource_ids"].append(key) 

2274 for k in obj: 

2275 font_info = get_font_info(obj[k], font_info, k) 

2276 elif isinstance(obj, (list, ArrayObject)): 

2277 for child_obj in obj: 

2278 font_info = get_font_info(child_obj, font_info) 

2279 return font_info 

2280 

2281 # Add relevant resource names for removal 

2282 font_info = get_font_info(page.get("/Resources")) 

2283 for font_name in font_names: 

2284 if font_name in font_info: 

2285 resource_ids_to_remove.extend(font_info[font_name]["resource_ids"]) 

2286 

2287 text_filters = {} 

2288 if font_names: 

2289 text_filters["font_ids"] = resource_ids_to_remove 

2290 self.remove_objects_from_page(page, ObjectDeletionFlag.TEXT, text_filters=text_filters) 

2291 

2292 def add_uri( 

2293 self, 

2294 page_number: int, 

2295 uri: str, 

2296 rect: RectangleObject, 

2297 border: Optional[ArrayObject] = None, 

2298 ) -> None: 

2299 """ 

2300 Add an URI from a rectangular area to the specified page. 

2301 

2302 Args: 

2303 page_number: index of the page on which to place the URI action. 

2304 uri: URI of resource to link to. 

2305 rect: :class:`RectangleObject<pypdf.generic.RectangleObject>` or 

2306 array of four integers specifying the clickable rectangular area 

2307 ``[xLL, yLL, xUR, yUR]``, or string in the form 

2308 ``"[ xLL yLL xUR yUR ]"``. 

2309 border: if provided, an array describing border-drawing 

2310 properties. See the PDF spec for details. No border will be 

2311 drawn if this argument is omitted. 

2312 

2313 """ 

2314 page_link = self.get_object(self._pages)[PagesAttributes.KIDS][page_number] # type: ignore 

2315 page_ref = cast(dict[str, Any], self.get_object(page_link)) 

2316 

2317 border_arr: BorderArrayType 

2318 if border is not None: 

2319 border_arr = [NumberObject(n) for n in border[:3]] 

2320 if len(border) == 4: 

2321 dash_pattern = ArrayObject([NumberObject(n) for n in border[3]]) 

2322 border_arr.append(dash_pattern) 

2323 else: 

2324 border_arr = [NumberObject(2), NumberObject(2), NumberObject(2)] 

2325 

2326 if isinstance(rect, str): 

2327 rect = NumberObject(rect) 

2328 elif isinstance(rect, RectangleObject): 

2329 pass 

2330 else: 

2331 rect = RectangleObject(rect) 

2332 

2333 lnk2 = DictionaryObject() 

2334 lnk2.update( 

2335 { 

2336 NameObject("/S"): NameObject("/URI"), 

2337 NameObject("/URI"): TextStringObject(uri), 

2338 } 

2339 ) 

2340 lnk = DictionaryObject() 

2341 lnk.update( 

2342 { 

2343 NameObject(AA.Type): NameObject("/Annot"), 

2344 NameObject(AA.Subtype): NameObject("/Link"), 

2345 NameObject(AA.P): page_link, 

2346 NameObject(AA.Rect): rect, 

2347 NameObject("/H"): NameObject("/I"), 

2348 NameObject(AA.Border): ArrayObject(border_arr), 

2349 NameObject("/A"): lnk2, 

2350 } 

2351 ) 

2352 lnk_ref = self._add_object(lnk) 

2353 

2354 if PG.ANNOTS in page_ref: 

2355 page_ref[PG.ANNOTS].append(lnk_ref) 

2356 else: 

2357 page_ref[NameObject(PG.ANNOTS)] = ArrayObject([lnk_ref]) 

2358 

2359 _valid_layouts = ( 

2360 "/NoLayout", 

2361 "/SinglePage", 

2362 "/OneColumn", 

2363 "/TwoColumnLeft", 

2364 "/TwoColumnRight", 

2365 "/TwoPageLeft", 

2366 "/TwoPageRight", 

2367 ) 

2368 

2369 def _get_page_layout(self) -> Optional[LayoutType]: 

2370 try: 

2371 return cast(LayoutType, self._root_object["/PageLayout"]) 

2372 except KeyError: 

2373 return None 

2374 

2375 def _set_page_layout(self, layout: Union[NameObject, LayoutType]) -> None: 

2376 """ 

2377 Set the page layout. 

2378 

2379 Args: 

2380 layout: The page layout to be used. 

2381 

2382 .. list-table:: Valid ``layout`` arguments 

2383 :widths: 50 200 

2384 

2385 * - /NoLayout 

2386 - Layout explicitly not specified 

2387 * - /SinglePage 

2388 - Show one page at a time 

2389 * - /OneColumn 

2390 - Show one column at a time 

2391 * - /TwoColumnLeft 

2392 - Show pages in two columns, odd-numbered pages on the left 

2393 * - /TwoColumnRight 

2394 - Show pages in two columns, odd-numbered pages on the right 

2395 * - /TwoPageLeft 

2396 - Show two pages at a time, odd-numbered pages on the left 

2397 * - /TwoPageRight 

2398 - Show two pages at a time, odd-numbered pages on the right 

2399 

2400 """ 

2401 if not isinstance(layout, NameObject): 

2402 if layout not in self._valid_layouts: 

2403 logger_warning( 

2404 f"Layout should be one of: {'', ''.join(self._valid_layouts)}", 

2405 __name__, 

2406 ) 

2407 layout = NameObject(layout) 

2408 self._root_object.update({NameObject("/PageLayout"): layout}) 

2409 

2410 def set_page_layout(self, layout: LayoutType) -> None: 

2411 """ 

2412 Set the page layout. 

2413 

2414 Args: 

2415 layout: The page layout to be used 

2416 

2417 .. list-table:: Valid ``layout`` arguments 

2418 :widths: 50 200 

2419 

2420 * - /NoLayout 

2421 - Layout explicitly not specified 

2422 * - /SinglePage 

2423 - Show one page at a time 

2424 * - /OneColumn 

2425 - Show one column at a time 

2426 * - /TwoColumnLeft 

2427 - Show pages in two columns, odd-numbered pages on the left 

2428 * - /TwoColumnRight 

2429 - Show pages in two columns, odd-numbered pages on the right 

2430 * - /TwoPageLeft 

2431 - Show two pages at a time, odd-numbered pages on the left 

2432 * - /TwoPageRight 

2433 - Show two pages at a time, odd-numbered pages on the right 

2434 

2435 """ 

2436 self._set_page_layout(layout) 

2437 

2438 @property 

2439 def page_layout(self) -> Optional[LayoutType]: 

2440 """ 

2441 Page layout property. 

2442 

2443 .. list-table:: Valid ``layout`` values 

2444 :widths: 50 200 

2445 

2446 * - /NoLayout 

2447 - Layout explicitly not specified 

2448 * - /SinglePage 

2449 - Show one page at a time 

2450 * - /OneColumn 

2451 - Show one column at a time 

2452 * - /TwoColumnLeft 

2453 - Show pages in two columns, odd-numbered pages on the left 

2454 * - /TwoColumnRight 

2455 - Show pages in two columns, odd-numbered pages on the right 

2456 * - /TwoPageLeft 

2457 - Show two pages at a time, odd-numbered pages on the left 

2458 * - /TwoPageRight 

2459 - Show two pages at a time, odd-numbered pages on the right 

2460 """ 

2461 return self._get_page_layout() 

2462 

2463 @page_layout.setter 

2464 def page_layout(self, layout: LayoutType) -> None: 

2465 self._set_page_layout(layout) 

2466 

2467 _valid_modes = ( 

2468 "/UseNone", 

2469 "/UseOutlines", 

2470 "/UseThumbs", 

2471 "/FullScreen", 

2472 "/UseOC", 

2473 "/UseAttachments", 

2474 ) 

2475 

2476 def _get_page_mode(self) -> Optional[PagemodeType]: 

2477 try: 

2478 return cast(PagemodeType, self._root_object["/PageMode"]) 

2479 except KeyError: 

2480 return None 

2481 

2482 @property 

2483 def page_mode(self) -> Optional[PagemodeType]: 

2484 """ 

2485 Page mode property. 

2486 

2487 .. list-table:: Valid ``mode`` values 

2488 :widths: 50 200 

2489 

2490 * - /UseNone 

2491 - Do not show outline or thumbnails panels 

2492 * - /UseOutlines 

2493 - Show outline (aka bookmarks) panel 

2494 * - /UseThumbs 

2495 - Show page thumbnails panel 

2496 * - /FullScreen 

2497 - Fullscreen view 

2498 * - /UseOC 

2499 - Show Optional Content Group (OCG) panel 

2500 * - /UseAttachments 

2501 - Show attachments panel 

2502 """ 

2503 return self._get_page_mode() 

2504 

2505 @page_mode.setter 

2506 def page_mode(self, mode: PagemodeType) -> None: 

2507 if isinstance(mode, NameObject): 

2508 mode_name: NameObject = mode 

2509 else: 

2510 if mode not in self._valid_modes: 

2511 logger_warning( 

2512 f"Mode should be one of: {', '.join(self._valid_modes)}", __name__ 

2513 ) 

2514 mode_name = NameObject(mode) 

2515 self._root_object.update({NameObject("/PageMode"): mode_name}) 

2516 

2517 def add_annotation( 

2518 self, 

2519 page_number: Union[int, PageObject], 

2520 annotation: dict[str, Any], 

2521 ) -> DictionaryObject: 

2522 """ 

2523 Add a single annotation to the page. 

2524 The added annotation must be a new annotation. 

2525 It cannot be recycled. 

2526 

2527 Args: 

2528 page_number: PageObject or page index. 

2529 annotation: Annotation to be added (created with annotation). 

2530 

2531 Returns: 

2532 The inserted object. 

2533 This can be used for popup creation, for example. 

2534 

2535 """ 

2536 page = page_number 

2537 if isinstance(page, int): 

2538 page = self.pages[page] 

2539 elif not isinstance(page, PageObject): 

2540 raise TypeError("page: invalid type") 

2541 

2542 to_add = cast(DictionaryObject, _pdf_objectify(annotation)) 

2543 to_add[NameObject("/P")] = page.indirect_reference 

2544 

2545 if page.annotations is None: 

2546 page[NameObject("/Annots")] = ArrayObject() 

2547 assert page.annotations is not None 

2548 

2549 # Internal link annotations need the correct object type for the 

2550 # destination 

2551 if to_add.get("/Subtype") == "/Link" and "/Dest" in to_add: 

2552 tmp = cast(dict[Any, Any], to_add[NameObject("/Dest")]) 

2553 dest = Destination( 

2554 NameObject("/LinkName"), 

2555 tmp["target_page_index"], 

2556 Fit( 

2557 fit_type=tmp["fit"], fit_args=dict(tmp)["fit_args"] 

2558 ), # I have no clue why this dict-hack is necessary 

2559 ) 

2560 to_add[NameObject("/Dest")] = dest.dest_array 

2561 

2562 page.annotations.append(self._add_object(to_add)) 

2563 

2564 if to_add.get("/Subtype") == "/Popup" and NameObject("/Parent") in to_add: 

2565 cast(DictionaryObject, to_add["/Parent"].get_object())[ 

2566 NameObject("/Popup") 

2567 ] = to_add.indirect_reference 

2568 

2569 return to_add 

2570 

2571 def clean_page(self, page: Union[PageObject, IndirectObject]) -> PageObject: 

2572 """ 

2573 Perform some clean up in the page. 

2574 Currently: convert NameObject named destination to TextStringObject 

2575 (required for names/dests list) 

2576 

2577 Args: 

2578 page: 

2579 

2580 Returns: 

2581 The cleaned PageObject 

2582 

2583 """ 

2584 page = cast("PageObject", page.get_object()) 

2585 for a in page.get("/Annots", []): 

2586 a_obj = a.get_object() 

2587 d = a_obj.get("/Dest", None) 

2588 act = a_obj.get("/A", None) 

2589 if isinstance(d, NameObject): 

2590 a_obj[NameObject("/Dest")] = TextStringObject(d) 

2591 elif act is not None: 

2592 act = act.get_object() 

2593 d = act.get("/D", None) 

2594 if isinstance(d, NameObject): 

2595 act[NameObject("/D")] = TextStringObject(d) 

2596 return page 

2597 

2598 def _create_stream( 

2599 self, fileobj: Union[Path, StrByteType, PdfReader] 

2600 ) -> tuple[IOBase, Optional[Encryption]]: 

2601 # If the fileobj parameter is a string, assume it is a path 

2602 # and create a file object at that location. If it is a file, 

2603 # copy the file's contents into a BytesIO stream object; if 

2604 # it is a PdfReader, copy that reader's stream into a 

2605 # BytesIO stream. 

2606 # If fileobj is none of the above types, it is not modified 

2607 encryption_obj = None 

2608 stream: IOBase 

2609 if isinstance(fileobj, (str, Path)): 

2610 with FileIO(fileobj, "rb") as f: 

2611 stream = BytesIO(f.read()) 

2612 elif isinstance(fileobj, PdfReader): 

2613 if fileobj._encryption: 

2614 encryption_obj = fileobj._encryption 

2615 orig_tell = fileobj.stream.tell() 

2616 fileobj.stream.seek(0) 

2617 stream = BytesIO(fileobj.stream.read()) 

2618 

2619 # reset the stream to its original location 

2620 fileobj.stream.seek(orig_tell) 

2621 elif hasattr(fileobj, "seek") and hasattr(fileobj, "read"): 

2622 fileobj.seek(0) 

2623 filecontent = fileobj.read() 

2624 stream = BytesIO(filecontent) 

2625 else: 

2626 raise NotImplementedError( 

2627 "Merging requires an object that PdfReader can parse. " 

2628 "Typically, that is a Path or a string representing a Path, " 

2629 "a file object, or an object implementing .seek and .read. " 

2630 "Passing a PdfReader directly works as well." 

2631 ) 

2632 return stream, encryption_obj 

2633 

2634 def append( 

2635 self, 

2636 fileobj: Union[StrByteType, PdfReader, Path], 

2637 outline_item: Union[ 

2638 str, None, PageRange, tuple[int, int], tuple[int, int, int], list[int] 

2639 ] = None, 

2640 pages: Union[ 

2641 None, 

2642 PageRange, 

2643 tuple[int, int], 

2644 tuple[int, int, int], 

2645 list[int], 

2646 list[PageObject], 

2647 ] = None, 

2648 import_outline: bool = True, 

2649 excluded_fields: Optional[Union[list[str], tuple[str, ...]]] = None, 

2650 ) -> None: 

2651 """ 

2652 Identical to the :meth:`merge()<merge>` method, but assumes you want to 

2653 concatenate all pages onto the end of the file instead of specifying a 

2654 position. 

2655 

2656 Args: 

2657 fileobj: A File Object or an object that supports the standard 

2658 read and seek methods similar to a File Object. Could also be a 

2659 string representing a path to a PDF file. 

2660 outline_item: Optionally, you may specify a string to build an 

2661 outline (aka 'bookmark') to identify the beginning of the 

2662 included file. 

2663 pages: Can be a :class:`PageRange<pypdf.pagerange.PageRange>` 

2664 or a ``(start, stop[, step])`` tuple 

2665 or a list of pages to be processed 

2666 to merge only the specified range of pages from the source 

2667 document into the output document. 

2668 import_outline: You may prevent the source document's 

2669 outline (collection of outline items, previously referred to as 

2670 'bookmarks') from being imported by specifying this as ``False``. 

2671 excluded_fields: Provide the list of fields/keys to be ignored 

2672 if ``/Annots`` is part of the list, the annotation will be ignored 

2673 if ``/B`` is part of the list, the articles will be ignored 

2674 

2675 """ 

2676 if excluded_fields is None: 

2677 excluded_fields = () 

2678 if isinstance(outline_item, (tuple, list, PageRange)): 

2679 if isinstance(pages, bool): 

2680 if not isinstance(import_outline, bool): 

2681 excluded_fields = import_outline 

2682 import_outline = pages 

2683 pages = outline_item 

2684 self.merge( 

2685 None, 

2686 fileobj, 

2687 None, 

2688 pages, 

2689 import_outline, 

2690 excluded_fields, 

2691 ) 

2692 else: # if isinstance(outline_item, str): 

2693 self.merge( 

2694 None, 

2695 fileobj, 

2696 outline_item, 

2697 pages, 

2698 import_outline, 

2699 excluded_fields, 

2700 ) 

2701 

2702 def merge( 

2703 self, 

2704 position: Optional[int], 

2705 fileobj: Union[Path, StrByteType, PdfReader], 

2706 outline_item: Optional[str] = None, 

2707 pages: Optional[Union[PageRangeSpec, list[PageObject]]] = None, 

2708 import_outline: bool = True, 

2709 excluded_fields: Optional[Union[list[str], tuple[str, ...]]] = (), 

2710 ) -> None: 

2711 """ 

2712 Merge the pages from the given file into the output file at the 

2713 specified page number. 

2714 

2715 Args: 

2716 position: The *page number* to insert this file. File will 

2717 be inserted after the given number. 

2718 fileobj: A File Object or an object that supports the standard 

2719 read and seek methods similar to a File Object. Could also be a 

2720 string representing a path to a PDF file. 

2721 outline_item: Optionally, you may specify a string to build an outline 

2722 (aka 'bookmark') to identify the 

2723 beginning of the included file. 

2724 pages: can be a :class:`PageRange<pypdf.pagerange.PageRange>` 

2725 or a ``(start, stop[, step])`` tuple 

2726 or a list of pages to be processed 

2727 to merge only the specified range of pages from the source 

2728 document into the output document. 

2729 import_outline: You may prevent the source document's 

2730 outline (collection of outline items, previously referred to as 

2731 'bookmarks') from being imported by specifying this as ``False``. 

2732 excluded_fields: provide the list of fields/keys to be ignored 

2733 if ``/Annots`` is part of the list, the annotation will be ignored 

2734 if ``/B`` is part of the list, the articles will be ignored 

2735 

2736 Raises: 

2737 TypeError: The pages attribute is not configured properly 

2738 

2739 """ 

2740 if isinstance(fileobj, PdfDocCommon): 

2741 reader = fileobj 

2742 else: 

2743 stream, encryption_obj = self._create_stream(fileobj) 

2744 # Create a new PdfReader instance using the stream 

2745 # (either file or BytesIO or StringIO) created above 

2746 reader = PdfReader(stream, strict=False) # type: ignore[arg-type] 

2747 

2748 if excluded_fields is None: 

2749 excluded_fields = () 

2750 # Find the range of pages to merge. 

2751 if pages is None: 

2752 pages = list(range(len(reader.pages))) 

2753 elif isinstance(pages, PageRange): 

2754 pages = list(range(*pages.indices(len(reader.pages)))) 

2755 elif isinstance(pages, list): 

2756 pass # keep unchanged 

2757 elif isinstance(pages, tuple) and len(pages) <= 3: 

2758 pages = list(range(*pages)) 

2759 elif not isinstance(pages, tuple): 

2760 raise TypeError( 

2761 '"pages" must be a tuple of (start, stop[, step]) or a list' 

2762 ) 

2763 

2764 srcpages = {} 

2765 for page in pages: 

2766 if isinstance(page, PageObject): 

2767 pg = page 

2768 else: 

2769 pg = reader.pages[page] 

2770 assert pg.indirect_reference is not None 

2771 if position is None: 

2772 # numbers in the exclude list identifies that the exclusion is 

2773 # only applicable to 1st level of cloning 

2774 srcpages[pg.indirect_reference.idnum] = self.add_page( 

2775 pg, [*list(excluded_fields), 1, "/B", 1, "/Annots"] # type: ignore 

2776 ) 

2777 else: 

2778 srcpages[pg.indirect_reference.idnum] = self.insert_page( 

2779 pg, position, [*list(excluded_fields), 1, "/B", 1, "/Annots"] # type: ignore 

2780 ) 

2781 position += 1 

2782 srcpages[pg.indirect_reference.idnum].original_page = pg 

2783 

2784 reader._named_destinations = ( 

2785 reader.named_destinations 

2786 ) # need for the outline processing below 

2787 

2788 arr: Any 

2789 

2790 def _process_named_dests(dest: Any) -> None: 

2791 arr = dest.dest_array 

2792 if "/Names" in self._root_object and dest["/Title"] in cast( 

2793 list[Any], 

2794 cast( 

2795 DictionaryObject, 

2796 cast(DictionaryObject, self._root_object["/Names"]).get("/Dests", DictionaryObject()), 

2797 ).get("/Names", DictionaryObject()), 

2798 ): 

2799 # already exists: should not duplicate it 

2800 pass 

2801 elif dest["/Page"] is None or isinstance(dest["/Page"], NullObject): 

2802 pass 

2803 elif isinstance(dest["/Page"], int): 

2804 # the page reference is a page number normally not a PDF Reference 

2805 # page numbers as int are normally accepted only in external goto 

2806 try: 

2807 p = reader.pages[dest["/Page"]] 

2808 except IndexError: 

2809 return 

2810 assert p.indirect_reference is not None 

2811 try: 

2812 arr[NumberObject(0)] = NumberObject( 

2813 srcpages[p.indirect_reference.idnum].page_number 

2814 ) 

2815 self.add_named_destination_array(dest["/Title"], arr) 

2816 except KeyError: 

2817 pass 

2818 elif dest["/Page"].indirect_reference.idnum in srcpages: 

2819 arr[NumberObject(0)] = srcpages[ 

2820 dest["/Page"].indirect_reference.idnum 

2821 ].indirect_reference 

2822 self.add_named_destination_array(dest["/Title"], arr) 

2823 

2824 for dest in reader._named_destinations.values(): 

2825 _process_named_dests(dest) 

2826 

2827 outline_item_typ: TreeObject 

2828 if outline_item is not None: 

2829 outline_item_typ = cast( 

2830 "TreeObject", 

2831 self.add_outline_item( 

2832 TextStringObject(outline_item), 

2833 next(iter(srcpages.values())).indirect_reference, 

2834 fit=PAGE_FIT, 

2835 ).get_object(), 

2836 ) 

2837 else: 

2838 outline_item_typ = self.get_outline_root() 

2839 

2840 _ro = reader.root_object 

2841 if import_outline and CO.OUTLINES in _ro: 

2842 outline = self._get_filtered_outline( 

2843 _ro.get(CO.OUTLINES, None), srcpages, reader 

2844 ) 

2845 self._insert_filtered_outline( 

2846 outline, outline_item_typ, None 

2847 ) # TODO: use before parameter 

2848 

2849 if "/Annots" not in excluded_fields: 

2850 for pag in srcpages.values(): 

2851 lst = self._insert_filtered_annotations( 

2852 pag.original_page.get("/Annots", []), pag, srcpages, reader 

2853 ) 

2854 if len(lst) > 0: 

2855 pag[NameObject("/Annots")] = lst 

2856 self.clean_page(pag) 

2857 

2858 if "/AcroForm" in _ro and _ro["/AcroForm"] is not None: 

2859 if "/AcroForm" not in self._root_object: 

2860 self._root_object[NameObject("/AcroForm")] = self._add_object( 

2861 cast( 

2862 DictionaryObject, 

2863 reader.root_object["/AcroForm"], 

2864 ).clone(self, False, ("/Fields",)) 

2865 ) 

2866 arr = ArrayObject() 

2867 else: 

2868 arr = cast( 

2869 ArrayObject, 

2870 cast(DictionaryObject, self._root_object["/AcroForm"])["/Fields"], 

2871 ) 

2872 trslat = self._id_translated[id(reader)] 

2873 try: 

2874 for f in reader.root_object["/AcroForm"]["/Fields"]: # type: ignore 

2875 try: 

2876 ind = IndirectObject(trslat[f.idnum], 0, self) 

2877 if ind not in arr: 

2878 arr.append(ind) 

2879 except KeyError: 

2880 # for trslat[] which mean the field has not be copied 

2881 # through the page 

2882 pass 

2883 except KeyError: # for /Acroform or /Fields are not existing 

2884 arr = self._add_object(ArrayObject()) 

2885 cast(DictionaryObject, self._root_object["/AcroForm"])[ 

2886 NameObject("/Fields") 

2887 ] = arr 

2888 

2889 if "/B" not in excluded_fields: 

2890 self.add_filtered_articles("", srcpages, reader) 

2891 

2892 def _add_articles_thread( 

2893 self, 

2894 thread: DictionaryObject, # thread entry from the reader's array of threads 

2895 pages: dict[int, PageObject], 

2896 reader: PdfReader, 

2897 ) -> IndirectObject: 

2898 """ 

2899 Clone the thread with only the applicable articles. 

2900 

2901 Args: 

2902 thread: 

2903 pages: 

2904 reader: 

2905 

2906 Returns: 

2907 The added thread as an indirect reference 

2908 

2909 """ 

2910 nthread = thread.clone( 

2911 self, force_duplicate=True, ignore_fields=("/F",) 

2912 ) # use of clone to keep link between reader and writer 

2913 self.threads.append(nthread.indirect_reference) 

2914 first_article = cast("DictionaryObject", thread["/F"]) 

2915 current_article: Optional[DictionaryObject] = first_article 

2916 new_article: Optional[DictionaryObject] = None 

2917 while current_article is not None: 

2918 pag = self._get_cloned_page( 

2919 cast("PageObject", current_article["/P"]), pages, reader 

2920 ) 

2921 if pag is not None: 

2922 if new_article is None: 

2923 new_article = cast( 

2924 "DictionaryObject", 

2925 self._add_object(DictionaryObject()).get_object(), 

2926 ) 

2927 new_first = new_article 

2928 nthread[NameObject("/F")] = new_article.indirect_reference 

2929 else: 

2930 new_article2 = cast( 

2931 "DictionaryObject", 

2932 self._add_object( 

2933 DictionaryObject( 

2934 {NameObject("/V"): new_article.indirect_reference} 

2935 ) 

2936 ).get_object(), 

2937 ) 

2938 new_article[NameObject("/N")] = new_article2.indirect_reference 

2939 new_article = new_article2 

2940 new_article[NameObject("/P")] = pag 

2941 new_article[NameObject("/T")] = nthread.indirect_reference 

2942 new_article[NameObject("/R")] = current_article["/R"] 

2943 pag_obj = cast("PageObject", pag.get_object()) 

2944 if "/B" not in pag_obj: 

2945 pag_obj[NameObject("/B")] = ArrayObject() 

2946 cast("ArrayObject", pag_obj["/B"]).append( 

2947 new_article.indirect_reference 

2948 ) 

2949 current_article = cast("DictionaryObject", current_article["/N"]) 

2950 if current_article == first_article: 

2951 new_article[NameObject("/N")] = new_first.indirect_reference # type: ignore 

2952 new_first[NameObject("/V")] = new_article.indirect_reference # type: ignore 

2953 current_article = None 

2954 assert nthread.indirect_reference is not None 

2955 return nthread.indirect_reference 

2956 

2957 def add_filtered_articles( 

2958 self, 

2959 fltr: Union[ 

2960 Pattern[Any], str 

2961 ], # thread entry from the reader's array of threads 

2962 pages: dict[int, PageObject], 

2963 reader: PdfReader, 

2964 ) -> None: 

2965 """ 

2966 Add articles matching the defined criteria. 

2967 

2968 Args: 

2969 fltr: 

2970 pages: 

2971 reader: 

2972 

2973 """ 

2974 if isinstance(fltr, str): 

2975 fltr = re.compile(fltr) 

2976 elif not isinstance(fltr, Pattern): 

2977 fltr = re.compile("") 

2978 for p in pages.values(): 

2979 pp = p.original_page 

2980 for a in pp.get("/B", ()): 

2981 thr = a.get_object().get("/T") 

2982 if thr is None: 

2983 continue 

2984 thr = thr.get_object() 

2985 if thr.indirect_reference.idnum not in self._id_translated[ 

2986 id(reader) 

2987 ] and fltr.search((thr.get("/I", {})).get("/Title", "")): 

2988 self._add_articles_thread(thr, pages, reader) 

2989 

2990 def _get_cloned_page( 

2991 self, 

2992 page: Union[None, IndirectObject, PageObject, NullObject], 

2993 pages: dict[int, PageObject], 

2994 reader: PdfReader, 

2995 ) -> Optional[IndirectObject]: 

2996 if isinstance(page, NullObject): 

2997 return None 

2998 if isinstance(page, DictionaryObject) and page.get("/Type", "") == "/Page": 

2999 _i = page.indirect_reference 

3000 elif isinstance(page, IndirectObject): 

3001 _i = page 

3002 try: 

3003 return pages[_i.idnum].indirect_reference # type: ignore 

3004 except Exception: 

3005 return None 

3006 

3007 def _insert_filtered_annotations( 

3008 self, 

3009 annots: Union[IndirectObject, list[DictionaryObject], None], 

3010 page: PageObject, 

3011 pages: dict[int, PageObject], 

3012 reader: PdfReader, 

3013 ) -> list[Destination]: 

3014 outlist = ArrayObject() 

3015 if isinstance(annots, IndirectObject): 

3016 annots = cast("list[Any]", annots.get_object()) 

3017 if annots is None: 

3018 return outlist 

3019 if not isinstance(annots, list): 

3020 logger_warning(f"Expected list of annotations, got {annots} of type {annots.__class__.__name__}.", __name__) 

3021 return outlist 

3022 for an in annots: 

3023 ano = cast("DictionaryObject", an.get_object()) 

3024 if ( 

3025 ano["/Subtype"] != "/Link" 

3026 or "/A" not in ano 

3027 or cast("DictionaryObject", ano["/A"])["/S"] != "/GoTo" 

3028 or "/Dest" in ano 

3029 ): 

3030 if "/Dest" not in ano: 

3031 outlist.append(self._add_object(ano.clone(self))) 

3032 else: 

3033 d = ano["/Dest"] 

3034 if isinstance(d, str): 

3035 # it is a named dest 

3036 if str(d) in self.get_named_dest_root(): 

3037 outlist.append(ano.clone(self).indirect_reference) 

3038 else: 

3039 d = cast("ArrayObject", d) 

3040 p = self._get_cloned_page(d[0], pages, reader) 

3041 if p is not None: 

3042 anc = ano.clone(self, ignore_fields=("/Dest",)) 

3043 anc[NameObject("/Dest")] = ArrayObject([p, *d[1:]]) 

3044 outlist.append(self._add_object(anc)) 

3045 else: 

3046 d = cast("DictionaryObject", ano["/A"]).get("/D", NullObject()) 

3047 if d is None or isinstance(d, NullObject): 

3048 continue 

3049 if isinstance(d, str): 

3050 # it is a named dest 

3051 if str(d) in self.get_named_dest_root(): 

3052 outlist.append(ano.clone(self).indirect_reference) 

3053 else: 

3054 d = cast("ArrayObject", d) 

3055 p = self._get_cloned_page(d[0], pages, reader) 

3056 if p is not None: 

3057 anc = ano.clone(self, ignore_fields=("/D",)) 

3058 cast("DictionaryObject", anc["/A"])[ 

3059 NameObject("/D") 

3060 ] = ArrayObject([p, *d[1:]]) 

3061 outlist.append(self._add_object(anc)) 

3062 return outlist 

3063 

3064 def _get_filtered_outline( 

3065 self, 

3066 node: Any, 

3067 pages: dict[int, PageObject], 

3068 reader: PdfReader, 

3069 ) -> list[Destination]: 

3070 """ 

3071 Extract outline item entries that are part of the specified page set. 

3072 

3073 Args: 

3074 node: 

3075 pages: 

3076 reader: 

3077 

3078 Returns: 

3079 A list of destination objects. 

3080 

3081 """ 

3082 new_outline = [] 

3083 if node is None: 

3084 node = NullObject() 

3085 node = node.get_object() 

3086 if is_null_or_none(node): 

3087 node = DictionaryObject() 

3088 if node.get("/Type", "") == "/Outlines" or "/Title" not in node: 

3089 node = node.get("/First", None) 

3090 if node is not None: 

3091 node = node.get_object() 

3092 new_outline += self._get_filtered_outline(node, pages, reader) 

3093 else: 

3094 v: Union[None, IndirectObject, NullObject] 

3095 while node is not None: 

3096 node = node.get_object() 

3097 o = cast("Destination", reader._build_outline_item(node)) 

3098 v = self._get_cloned_page(cast("PageObject", o["/Page"]), pages, reader) 

3099 if v is None: 

3100 v = NullObject() 

3101 o[NameObject("/Page")] = v 

3102 if "/First" in node: 

3103 o._filtered_children = self._get_filtered_outline( 

3104 node["/First"], pages, reader 

3105 ) 

3106 else: 

3107 o._filtered_children = [] 

3108 if ( 

3109 not isinstance(o["/Page"], NullObject) 

3110 or len(o._filtered_children) > 0 

3111 ): 

3112 new_outline.append(o) 

3113 node = node.get("/Next", None) 

3114 return new_outline 

3115 

3116 def _clone_outline(self, dest: Destination) -> TreeObject: 

3117 n_ol = TreeObject() 

3118 self._add_object(n_ol) 

3119 n_ol[NameObject("/Title")] = TextStringObject(dest["/Title"]) 

3120 if not isinstance(dest["/Page"], NullObject): 

3121 if dest.node is not None and "/A" in dest.node: 

3122 n_ol[NameObject("/A")] = dest.node["/A"].clone(self) 

3123 else: 

3124 n_ol[NameObject("/Dest")] = dest.dest_array 

3125 # TODO: /SE 

3126 if dest.node is not None: 

3127 n_ol[NameObject("/F")] = NumberObject(dest.node.get("/F", 0)) 

3128 n_ol[NameObject("/C")] = ArrayObject( 

3129 dest.node.get( 

3130 "/C", [FloatObject(0.0), FloatObject(0.0), FloatObject(0.0)] 

3131 ) 

3132 ) 

3133 return n_ol 

3134 

3135 def _insert_filtered_outline( 

3136 self, 

3137 outlines: list[Destination], 

3138 parent: Union[TreeObject, IndirectObject], 

3139 before: Union[None, TreeObject, IndirectObject] = None, 

3140 ) -> None: 

3141 for dest in outlines: 

3142 # TODO: can be improved to keep A and SE entries (ignored for the moment) 

3143 # with np=self.add_outline_item_destination(dest,parent,before) 

3144 if dest.get("/Type", "") == "/Outlines" or "/Title" not in dest: 

3145 np = parent 

3146 else: 

3147 np = self._clone_outline(dest) 

3148 cast(TreeObject, parent.get_object()).insert_child(np, before, self) 

3149 self._insert_filtered_outline(dest._filtered_children, np, None) 

3150 

3151 def close(self) -> None: 

3152 """Implemented for API harmonization.""" 

3153 return 

3154 

3155 def find_outline_item( 

3156 self, 

3157 outline_item: dict[str, Any], 

3158 root: Optional[OutlineType] = None, 

3159 ) -> Optional[list[int]]: 

3160 if root is None: 

3161 o = self.get_outline_root() 

3162 else: 

3163 o = cast("TreeObject", root) 

3164 

3165 i = 0 

3166 while o is not None: 

3167 if ( 

3168 o.indirect_reference == outline_item 

3169 or o.get("/Title", None) == outline_item 

3170 ): 

3171 return [i] 

3172 if "/First" in o: 

3173 res = self.find_outline_item( 

3174 outline_item, cast(OutlineType, o["/First"]) 

3175 ) 

3176 if res: 

3177 return ([i] if "/Title" in o else []) + res 

3178 if "/Next" in o: 

3179 i += 1 

3180 o = cast(TreeObject, o["/Next"]) 

3181 else: 

3182 return None 

3183 

3184 def reset_translation( 

3185 self, reader: Union[None, PdfReader, IndirectObject] = None 

3186 ) -> None: 

3187 """ 

3188 Reset the translation table between reader and the writer object. 

3189 

3190 Late cloning will create new independent objects. 

3191 

3192 Args: 

3193 reader: PdfReader or IndirectObject referencing a PdfReader object. 

3194 if set to None or omitted, all tables will be reset. 

3195 

3196 """ 

3197 if reader is None: 

3198 self._id_translated = {} 

3199 elif isinstance(reader, PdfReader): 

3200 try: 

3201 del self._id_translated[id(reader)] 

3202 except Exception: 

3203 pass 

3204 elif isinstance(reader, IndirectObject): 

3205 try: 

3206 del self._id_translated[id(reader.pdf)] 

3207 except Exception: 

3208 pass 

3209 else: 

3210 raise Exception("invalid parameter {reader}") 

3211 

3212 def set_page_label( 

3213 self, 

3214 page_index_from: int, 

3215 page_index_to: int, 

3216 style: Optional[PageLabelStyle] = None, 

3217 prefix: Optional[str] = None, 

3218 start: Optional[int] = 0, 

3219 ) -> None: 

3220 """ 

3221 Set a page label to a range of pages. 

3222 

3223 Page indexes must be given starting from 0. 

3224 Labels must have a style, a prefix or both. 

3225 If a range is not assigned any page label, a decimal label starting from 1 is applied. 

3226 

3227 Args: 

3228 page_index_from: page index of the beginning of the range starting from 0 

3229 page_index_to: page index of the beginning of the range starting from 0 

3230 style: The numbering style to be used for the numeric portion of each page label: 

3231 

3232 * ``/D`` Decimal Arabic numerals 

3233 * ``/R`` Uppercase Roman numerals 

3234 * ``/r`` Lowercase Roman numerals 

3235 * ``/A`` Uppercase letters (A to Z for the first 26 pages, 

3236 AA to ZZ for the next 26, and so on) 

3237 * ``/a`` Lowercase letters (a to z for the first 26 pages, 

3238 aa to zz for the next 26, and so on) 

3239 

3240 prefix: The label prefix for page labels in this range. 

3241 start: The value of the numeric portion for the first page label 

3242 in the range. 

3243 Subsequent pages are numbered sequentially from this value, 

3244 which must be greater than or equal to 1. 

3245 Default value: 1. 

3246 

3247 """ 

3248 if style is None and prefix is None: 

3249 raise ValueError("At least one of style and prefix must be given") 

3250 if page_index_from < 0: 

3251 raise ValueError("page_index_from must be greater or equal than 0") 

3252 if page_index_to < page_index_from: 

3253 raise ValueError( 

3254 "page_index_to must be greater or equal than page_index_from" 

3255 ) 

3256 if page_index_to >= len(self.pages): 

3257 raise ValueError("page_index_to exceeds number of pages") 

3258 if start is not None and start != 0 and start < 1: 

3259 raise ValueError("If given, start must be greater or equal than one") 

3260 

3261 self._set_page_label(page_index_from, page_index_to, style, prefix, start) 

3262 

3263 def _set_page_label( 

3264 self, 

3265 page_index_from: int, 

3266 page_index_to: int, 

3267 style: Optional[PageLabelStyle] = None, 

3268 prefix: Optional[str] = None, 

3269 start: Optional[int] = 0, 

3270 ) -> None: 

3271 """ 

3272 Set a page label to a range of pages. 

3273 

3274 Page indexes must be given starting from 0. 

3275 Labels must have a style, a prefix or both. 

3276 If a range is not assigned any page label a decimal label starting from 1 is applied. 

3277 

3278 Args: 

3279 page_index_from: page index of the beginning of the range starting from 0 

3280 page_index_to: page index of the beginning of the range starting from 0 

3281 style: The numbering style to be used for the numeric portion of each page label: 

3282 /D Decimal Arabic numerals 

3283 /R Uppercase Roman numerals 

3284 /r Lowercase Roman numerals 

3285 /A Uppercase letters (A to Z for the first 26 pages, 

3286 AA to ZZ for the next 26, and so on) 

3287 /a Lowercase letters (a to z for the first 26 pages, 

3288 aa to zz for the next 26, and so on) 

3289 prefix: The label prefix for page labels in this range. 

3290 start: The value of the numeric portion for the first page label 

3291 in the range. 

3292 Subsequent pages are numbered sequentially from this value, 

3293 which must be greater than or equal to 1. Default value: 1. 

3294 

3295 """ 

3296 default_page_label = DictionaryObject() 

3297 default_page_label[NameObject("/S")] = NameObject("/D") 

3298 

3299 new_page_label = DictionaryObject() 

3300 if style is not None: 

3301 new_page_label[NameObject("/S")] = NameObject(style) 

3302 if prefix is not None: 

3303 new_page_label[NameObject("/P")] = TextStringObject(prefix) 

3304 if start != 0: 

3305 new_page_label[NameObject("/St")] = NumberObject(start) 

3306 

3307 if NameObject(CatalogDictionary.PAGE_LABELS) not in self._root_object: 

3308 nums = ArrayObject() 

3309 nums_insert(NumberObject(0), default_page_label, nums) 

3310 page_labels = TreeObject() 

3311 page_labels[NameObject("/Nums")] = nums 

3312 self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] = page_labels 

3313 

3314 page_labels = cast( 

3315 TreeObject, self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] 

3316 ) 

3317 nums = cast(ArrayObject, page_labels[NameObject("/Nums")]) 

3318 

3319 nums_insert(NumberObject(page_index_from), new_page_label, nums) 

3320 nums_clear_range(NumberObject(page_index_from), page_index_to, nums) 

3321 next_label_pos, *_ = nums_next(NumberObject(page_index_from), nums) 

3322 if next_label_pos != page_index_to + 1 and page_index_to + 1 < len(self.pages): 

3323 nums_insert(NumberObject(page_index_to + 1), default_page_label, nums) 

3324 

3325 page_labels[NameObject("/Nums")] = nums 

3326 self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] = page_labels 

3327 

3328 def _repr_mimebundle_( 

3329 self, 

3330 include: Union[None, Iterable[str]] = None, 

3331 exclude: Union[None, Iterable[str]] = None, 

3332 ) -> dict[str, Any]: 

3333 """ 

3334 Integration into Jupyter Notebooks. 

3335 

3336 This method returns a dictionary that maps a mime-type to its 

3337 representation. 

3338 

3339 .. seealso:: 

3340 

3341 https://ipython.readthedocs.io/en/stable/config/integrating.html 

3342 """ 

3343 pdf_data = BytesIO() 

3344 self.write(pdf_data) 

3345 data = { 

3346 "application/pdf": pdf_data, 

3347 } 

3348 

3349 if include is not None: 

3350 # Filter representations based on include list 

3351 data = {k: v for k, v in data.items() if k in include} 

3352 

3353 if exclude is not None: 

3354 # Remove representations based on exclude list 

3355 data = {k: v for k, v in data.items() if k not in exclude} 

3356 

3357 return data 

3358 

3359 

3360def _pdf_objectify(obj: Union[dict[str, Any], str, float, list[Any]]) -> PdfObject: 

3361 if isinstance(obj, PdfObject): 

3362 return obj 

3363 if isinstance(obj, dict): 

3364 to_add = DictionaryObject() 

3365 for key, value in obj.items(): 

3366 to_add[NameObject(key)] = _pdf_objectify(value) 

3367 return to_add 

3368 if isinstance(obj, str): 

3369 if obj.startswith("/"): 

3370 return NameObject(obj) 

3371 return TextStringObject(obj) 

3372 if isinstance(obj, (float, int)): 

3373 return FloatObject(obj) 

3374 if isinstance(obj, list): 

3375 return ArrayObject(_pdf_objectify(i) for i in obj) 

3376 raise NotImplementedError( 

3377 f"{type(obj)=} could not be cast to a PdfObject" 

3378 ) 

3379 

3380 

3381def _create_outline_item( 

3382 action_ref: Union[None, IndirectObject], 

3383 title: str, 

3384 color: Union[tuple[float, float, float], str, None], 

3385 italic: bool, 

3386 bold: bool, 

3387) -> TreeObject: 

3388 outline_item = TreeObject() 

3389 if action_ref is not None: 

3390 outline_item[NameObject("/A")] = action_ref 

3391 outline_item.update( 

3392 { 

3393 NameObject("/Title"): create_string_object(title), 

3394 } 

3395 ) 

3396 if color: 

3397 if isinstance(color, str): 

3398 color = hex_to_rgb(color) 

3399 outline_item.update( 

3400 {NameObject("/C"): ArrayObject([FloatObject(c) for c in color])} 

3401 ) 

3402 if italic or bold: 

3403 format_flag = 0 

3404 if italic: 

3405 format_flag += OutlineFontFlag.italic 

3406 if bold: 

3407 format_flag += OutlineFontFlag.bold 

3408 outline_item.update({NameObject("/F"): NumberObject(format_flag)}) 

3409 return outline_item 

3410 

3411 

3412def generate_appearance_stream( 

3413 txt: str, 

3414 sel: list[str], 

3415 da: str, 

3416 font_full_rev: dict[str, bytes], 

3417 rct: RectangleObject, 

3418 font_height: float, 

3419 y_offset: float, 

3420) -> bytes: 

3421 ap_stream = f"q\n/Tx BMC \nq\n1 1 {rct.width - 1} {rct.height - 1} re\nW\nBT\n{da}\n".encode() 

3422 for line_number, line in enumerate(txt.replace("\n", "\r").split("\r")): 

3423 if line in sel: 

3424 # may be improved but cannot find how to get fill working => replaced with lined box 

3425 ap_stream += ( 

3426 f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n" 

3427 f"0.5 0.5 0.5 rg s\n{da}\n" 

3428 ).encode() 

3429 if line_number == 0: 

3430 ap_stream += f"2 {y_offset} Td\n".encode() 

3431 else: 

3432 # Td is a relative translation 

3433 ap_stream += f"0 {- font_height * 1.4} Td\n".encode() 

3434 enc_line: list[bytes] = [ 

3435 font_full_rev.get(c, c.encode("utf-16-be")) for c in line 

3436 ] 

3437 if any(len(c) >= 2 for c in enc_line): 

3438 ap_stream += b"<" + (b"".join(enc_line)).hex().encode() + b"> Tj\n" 

3439 else: 

3440 ap_stream += b"(" + b"".join(enc_line) + b") Tj\n" 

3441 ap_stream += b"ET\nQ\nEMC\nQ\n" 

3442 return ap_stream