1# All rights reserved.
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions are
5# met:
6#
7# * Redistributions of source code must retain the above copyright notice,
8# this list of conditions and the following disclaimer.
9# * Redistributions in binary form must reproduce the above copyright notice,
10# this list of conditions and the following disclaimer in the documentation
11# and/or other materials provided with the distribution.
12# * The name of the author may not be used to endorse or promote products
13# derived from this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25# POSSIBILITY OF SUCH DAMAGE.
26
27
28# This module contains code used by _writer.py to track links in pages
29# being added to the writer until the links can be resolved.
30
31from typing import TYPE_CHECKING, Optional, Union, cast
32
33from . import ArrayObject, DictionaryObject, IndirectObject, PdfObject, TextStringObject
34
35if TYPE_CHECKING:
36 from .._page import PageObject
37 from .._reader import PdfReader
38 from .._writer import PdfWriter
39
40
41class NamedReferenceLink:
42 """Named reference link being preserved until we can resolve it correctly."""
43
44 def __init__(self, reference: TextStringObject, source_pdf: "PdfReader") -> None:
45 """reference: TextStringObject with named reference"""
46 self._reference = reference
47 self._source_pdf = source_pdf
48
49 def find_referenced_page(self) -> Union[IndirectObject, None]:
50 destination = self._source_pdf.named_destinations.get(str(self._reference))
51 return destination.page if destination else None
52
53 def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
54 """target_pdf: PdfWriter which the new link went into"""
55 # point named destination in new PDF to the new page
56 if str(self._reference) not in target_pdf.named_destinations:
57 target_pdf.add_named_destination(str(self._reference), new_page.page_number)
58
59
60class DirectReferenceLink:
61 """Direct reference link being preserved until we can resolve it correctly."""
62
63 def __init__(self, reference: ArrayObject) -> None:
64 """reference: an ArrayObject whose first element is the Page indirect object"""
65 self._reference = reference
66
67 def find_referenced_page(self) -> IndirectObject:
68 return self._reference[0]
69
70 def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
71 """target_pdf: PdfWriter which the new link went into"""
72 self._reference[0] = new_page
73
74
75ReferenceLink = Union[NamedReferenceLink, DirectReferenceLink]
76
77
78def extract_links(new_page: "PageObject", old_page: "PageObject") -> list[tuple[ReferenceLink, ReferenceLink]]:
79 """Extracts links from two pages on the assumption that the two pages are
80 the same. Produces one list of (new link, old link) tuples.
81 """
82 new_links = [_build_link(link, new_page) for link in new_page.get("/Annots", [])]
83 old_links = [_build_link(link, old_page) for link in old_page.get("/Annots", [])]
84
85 return [
86 (new_link, old_link) for (new_link, old_link)
87 in zip(new_links, old_links)
88 if new_link and old_link
89 ]
90
91
92def _build_link(indirect_object: IndirectObject, page: "PageObject") -> Optional[ReferenceLink]:
93 src = cast("PdfReader", page.pdf)
94 link = cast(DictionaryObject, indirect_object.get_object())
95 if (not isinstance(link, DictionaryObject)) or link.get("/Subtype") != "/Link":
96 return None
97
98 if "/A" in link:
99 action = cast(DictionaryObject, link["/A"])
100 if action.get("/S") != "/GoTo":
101 return None
102
103 if "/D" not in action:
104 return None
105 return _create_link(action["/D"], src)
106
107 if "/Dest" in link:
108 return _create_link(link["/Dest"], src)
109
110 return None # Nothing to do here
111
112
113def _create_link(reference: PdfObject, source_pdf: "PdfReader")-> Optional[ReferenceLink]:
114 if isinstance(reference, TextStringObject):
115 return NamedReferenceLink(reference, source_pdf)
116 if isinstance(reference, ArrayObject):
117 return DirectReferenceLink(reference)
118 return None