Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/generic/_link.py: 26%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

62 statements  

1# All rights reserved. 

2# 

3# Redistribution and use in source and binary forms, with or without 

4# modification, are permitted provided that the following conditions are 

5# met: 

6# 

7# * Redistributions of source code must retain the above copyright notice, 

8# this list of conditions and the following disclaimer. 

9# * Redistributions in binary form must reproduce the above copyright notice, 

10# this list of conditions and the following disclaimer in the documentation 

11# and/or other materials provided with the distribution. 

12# * The name of the author may not be used to endorse or promote products 

13# derived from this software without specific prior written permission. 

14# 

15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 

16# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 

17# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 

18# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 

19# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 

20# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 

21# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 

22# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 

23# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 

24# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 

25# POSSIBILITY OF SUCH DAMAGE. 

26 

27 

28# This module contains code used by _writer.py to track links in pages 

29# being added to the writer until the links can be resolved. 

30 

31from typing import TYPE_CHECKING, Optional, Union, cast 

32 

33from .._utils import logger_warning 

34from . import ArrayObject, DictionaryObject, IndirectObject, PdfObject, TextStringObject, is_null_or_none 

35 

36if TYPE_CHECKING: 

37 from .._page import PageObject 

38 from .._reader import PdfReader 

39 from .._writer import PdfWriter 

40 

41 

42class NamedReferenceLink: 

43 """Named reference link being preserved until we can resolve it correctly.""" 

44 

45 def __init__(self, reference: TextStringObject, source_pdf: "PdfReader") -> None: 

46 """reference: TextStringObject with named reference""" 

47 self._reference = reference 

48 self._source_pdf = source_pdf 

49 

50 def find_referenced_page(self) -> Union[IndirectObject, None]: 

51 destination = self._source_pdf.named_destinations.get(str(self._reference)) 

52 return destination.page if destination else None 

53 

54 def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None: 

55 """target_pdf: PdfWriter which the new link went into""" 

56 # point named destination in new PDF to the new page 

57 if str(self._reference) not in target_pdf.named_destinations: 

58 target_pdf.add_named_destination(str(self._reference), new_page.page_number) 

59 

60 

61class DirectReferenceLink: 

62 """Direct reference link being preserved until we can resolve it correctly.""" 

63 

64 def __init__(self, reference: ArrayObject) -> None: 

65 """reference: an ArrayObject whose first element is the Page indirect object""" 

66 self._reference = reference 

67 

68 def find_referenced_page(self) -> IndirectObject: 

69 return cast(IndirectObject, self._reference[0]) 

70 

71 def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None: 

72 """target_pdf: PdfWriter which the new link went into""" 

73 self._reference[0] = new_page 

74 

75 

76ReferenceLink = Union[NamedReferenceLink, DirectReferenceLink] 

77 

78 

79def extract_links(new_page: "PageObject", old_page: "PageObject") -> list[tuple[ReferenceLink, ReferenceLink]]: 

80 """Extracts links from two pages on the assumption that the two pages are 

81 the same. Produces one list of (new link, old link) tuples. 

82 

83 Non-link annotations are ignored before pairing to avoid dropping valid 

84 links when one page includes additional non-link annotation entries. 

85 """ 

86 new_annotations = new_page.get("/Annots", ArrayObject()).get_object() 

87 old_annotations = old_page.get("/Annots", ArrayObject()).get_object() 

88 if is_null_or_none(new_annotations): 

89 new_annotations = ArrayObject() 

90 if is_null_or_none(old_annotations): 

91 old_annotations = ArrayObject() 

92 if not isinstance(new_annotations, ArrayObject) or not isinstance(old_annotations, ArrayObject): 

93 logger_warning( 

94 f"Expected annotation arrays: {old_annotations} {new_annotations}. Ignoring annotations.", 

95 __name__ 

96 ) 

97 return [] 

98 new_links = [ 

99 link 

100 for annotation in new_annotations 

101 if (link := _build_link(annotation, new_page)) is not None 

102 ] 

103 old_links = [ 

104 link 

105 for annotation in old_annotations 

106 if (link := _build_link(annotation, old_page)) is not None 

107 ] 

108 

109 if len(new_links) != len(old_links): 

110 logger_warning( 

111 f"Annotation sizes differ: {old_links} vs. {new_links}", 

112 __name__, 

113 ) 

114 

115 return list(zip(new_links, old_links)) 

116 

117 

118def _build_link(indirect_object: IndirectObject, page: "PageObject") -> Optional[ReferenceLink]: 

119 src = cast("PdfReader", page.pdf) 

120 link = cast(DictionaryObject, indirect_object.get_object()) 

121 if (not isinstance(link, DictionaryObject)) or link.get("/Subtype") != "/Link": 

122 return None 

123 

124 if "/A" in link: 

125 action = cast(DictionaryObject, link["/A"]) 

126 if action.get("/S") != "/GoTo": 

127 return None 

128 

129 if "/D" not in action: 

130 return None 

131 return _create_link(action["/D"], src) 

132 

133 if "/Dest" in link: 

134 return _create_link(link["/Dest"], src) 

135 

136 return None # Nothing to do here 

137 

138 

139def _create_link(reference: PdfObject, source_pdf: "PdfReader") -> Optional[ReferenceLink]: 

140 if isinstance(reference, TextStringObject): 

141 return NamedReferenceLink(reference, source_pdf) 

142 if isinstance(reference, ArrayObject): 

143 return DirectReferenceLink(reference) 

144 return None