Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/generic/_link.py: 26%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# All rights reserved.
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions are
5# met:
6#
7# * Redistributions of source code must retain the above copyright notice,
8# this list of conditions and the following disclaimer.
9# * Redistributions in binary form must reproduce the above copyright notice,
10# this list of conditions and the following disclaimer in the documentation
11# and/or other materials provided with the distribution.
12# * The name of the author may not be used to endorse or promote products
13# derived from this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25# POSSIBILITY OF SUCH DAMAGE.
28# This module contains code used by _writer.py to track links in pages
29# being added to the writer until the links can be resolved.
31from typing import TYPE_CHECKING, Optional, Union, cast
33from .._utils import logger_warning
34from . import ArrayObject, DictionaryObject, IndirectObject, PdfObject, TextStringObject, is_null_or_none
36if TYPE_CHECKING:
37 from .._page import PageObject
38 from .._reader import PdfReader
39 from .._writer import PdfWriter
42class NamedReferenceLink:
43 """Named reference link being preserved until we can resolve it correctly."""
45 def __init__(self, reference: TextStringObject, source_pdf: "PdfReader") -> None:
46 """reference: TextStringObject with named reference"""
47 self._reference = reference
48 self._source_pdf = source_pdf
50 def find_referenced_page(self) -> Union[IndirectObject, None]:
51 destination = self._source_pdf.named_destinations.get(str(self._reference))
52 return destination.page if destination else None
54 def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
55 """target_pdf: PdfWriter which the new link went into"""
56 # point named destination in new PDF to the new page
57 if str(self._reference) not in target_pdf.named_destinations:
58 target_pdf.add_named_destination(str(self._reference), new_page.page_number)
61class DirectReferenceLink:
62 """Direct reference link being preserved until we can resolve it correctly."""
64 def __init__(self, reference: ArrayObject) -> None:
65 """reference: an ArrayObject whose first element is the Page indirect object"""
66 self._reference = reference
68 def find_referenced_page(self) -> IndirectObject:
69 return self._reference[0]
71 def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
72 """target_pdf: PdfWriter which the new link went into"""
73 self._reference[0] = new_page
76ReferenceLink = Union[NamedReferenceLink, DirectReferenceLink]
79def extract_links(new_page: "PageObject", old_page: "PageObject") -> list[tuple[ReferenceLink, ReferenceLink]]:
80 """Extracts links from two pages on the assumption that the two pages are
81 the same. Produces one list of (new link, old link) tuples.
82 """
83 new_annotations = new_page.get("/Annots", ArrayObject()).get_object()
84 old_annotations = old_page.get("/Annots", ArrayObject()).get_object()
85 if is_null_or_none(new_annotations):
86 new_annotations = ArrayObject()
87 if is_null_or_none(old_annotations):
88 old_annotations = ArrayObject()
89 if not isinstance(new_annotations, ArrayObject) or not isinstance(old_annotations, ArrayObject):
90 logger_warning(
91 f"Expected annotation arrays: {old_annotations} {new_annotations}. Ignoring annotations.",
92 __name__
93 )
94 return []
95 if len(new_annotations) != len(old_annotations):
96 logger_warning(f"Annotation sizes differ: {old_annotations} vs. {new_annotations}", __name__)
98 new_links = [_build_link(link, new_page) for link in new_annotations]
99 old_links = [_build_link(link, old_page) for link in old_annotations]
101 return [
102 (new_link, old_link) for (new_link, old_link)
103 in zip(new_links, old_links)
104 if new_link and old_link
105 ]
108def _build_link(indirect_object: IndirectObject, page: "PageObject") -> Optional[ReferenceLink]:
109 src = cast("PdfReader", page.pdf)
110 link = cast(DictionaryObject, indirect_object.get_object())
111 if (not isinstance(link, DictionaryObject)) or link.get("/Subtype") != "/Link":
112 return None
114 if "/A" in link:
115 action = cast(DictionaryObject, link["/A"])
116 if action.get("/S") != "/GoTo":
117 return None
119 if "/D" not in action:
120 return None
121 return _create_link(action["/D"], src)
123 if "/Dest" in link:
124 return _create_link(link["/Dest"], src)
126 return None # Nothing to do here
129def _create_link(reference: PdfObject, source_pdf: "PdfReader") -> Optional[ReferenceLink]:
130 if isinstance(reference, TextStringObject):
131 return NamedReferenceLink(reference, source_pdf)
132 if isinstance(reference, ArrayObject):
133 return DirectReferenceLink(reference)
134 return None