Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/_page_labels.py: 15%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Page labels are shown by PDF viewers as "the page number".
4A page has a numeric index, starting at 0. Additionally, the page
5has a label. In the most simple case:
7 label = index + 1
9However, the title page and the table of contents might have Roman numerals as
10page labels. This makes things more complicated.
12Example 1
13---------
15>>> reader.root_object["/PageLabels"]["/Nums"]
16[0, IndirectObject(18, 0, 139929798197504),
17 8, IndirectObject(19, 0, 139929798197504)]
18>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][1])
19{'/S': '/r'}
20>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][3])
21{'/S': '/D'}
23Example 2
24---------
25The following is a document with pages labeled
26i, ii, iii, iv, 1, 2, 3, A-8, A-9, ...
281 0 obj
29 << /Type /Catalog
30 /PageLabels << /Nums [
31 0 << /S /r >>
32 4 << /S /D >>
33 7 << /S /D
34 /P ( A- )
35 /St 8
36 >>
37 % A number tree containing
38 % three page label dictionaries
39 ]
40 >>
41 ...
42 >>
43endobj
46§12.4.2 PDF Specification 1.7 and 2.0
47=====================================
49Entries in a page label dictionary
50----------------------------------
51The /S key:
52D Decimal Arabic numerals
53R Uppercase Roman numerals
54r Lowercase Roman numerals
55A Uppercase letters (A to Z for the first 26 pages,
56 AA to ZZ for the next 26, and so on)
57a Lowercase letters (a to z for the first 26 pages,
58 aa to zz for the next 26, and so on)
59"""
61from collections.abc import Callable, Iterator
62from typing import Optional, cast
64from ._protocols import PdfCommonDocProtocol
65from ._utils import logger_warning
66from .generic import (
67 ArrayObject,
68 DictionaryObject,
69 NullObject,
70 NumberObject,
71 is_null_or_none,
72)
75def number2uppercase_roman_numeral(num: int) -> str:
76 roman = [
77 (1000, "M"),
78 (900, "CM"),
79 (500, "D"),
80 (400, "CD"),
81 (100, "C"),
82 (90, "XC"),
83 (50, "L"),
84 (40, "XL"),
85 (10, "X"),
86 (9, "IX"),
87 (5, "V"),
88 (4, "IV"),
89 (1, "I"),
90 ]
92 def roman_num(num: int) -> Iterator[str]:
93 for decimal, roman_repr in roman:
94 x, _ = divmod(num, decimal)
95 yield roman_repr * x
96 num -= decimal * x
97 if num <= 0:
98 break
100 return "".join(list(roman_num(num)))
103def number2lowercase_roman_numeral(number: int) -> str:
104 return number2uppercase_roman_numeral(number).lower()
107def number2uppercase_letter(number: int) -> str:
108 if number <= 0:
109 raise ValueError("Expecting a positive number")
110 alphabet = [chr(i) for i in range(ord("A"), ord("Z") + 1)]
111 rep = ""
112 while number > 0:
113 remainder = number % 26
114 if remainder == 0:
115 remainder = 26
116 rep = alphabet[remainder - 1] + rep
117 # update
118 number -= remainder
119 number = number // 26
120 return rep
123def number2lowercase_letter(number: int) -> str:
124 return number2uppercase_letter(number).lower()
127def get_label_from_nums(dictionary_object: DictionaryObject, index: int) -> str:
128 # [Nums] shall be an array of the form
129 # [ key_1 value_1 key_2 value_2 ... key_n value_n ]
130 # where each key_i is an integer and the corresponding
131 # value_i shall be the object associated with that key.
132 # The keys shall be sorted in numerical order,
133 # analogously to the arrangement of keys in a name tree
134 # as described in 7.9.6, "Name Trees."
135 nums = cast(ArrayObject, dictionary_object["/Nums"])
136 i = 0
137 value = None
138 start_index = 0
139 while i < len(nums):
140 start_index = nums[i]
141 value = nums[i + 1].get_object()
142 if i + 2 == len(nums):
143 break
144 if nums[i + 2] > index:
145 break
146 i += 2
147 m: dict[Optional[str], Callable[[int], str]] = {
148 None: lambda _: "",
149 "/D": str,
150 "/R": number2uppercase_roman_numeral,
151 "/r": number2lowercase_roman_numeral,
152 "/A": number2uppercase_letter,
153 "/a": number2lowercase_letter,
154 }
155 # if /Nums array is not following the specification or if /Nums is empty
156 if not isinstance(value, dict):
157 return str(index + 1) # Fallback
158 start = value.get("/St", 1)
159 prefix = value.get("/P", "")
160 mapping_function = m[value.get("/S")]
161 return prefix + mapping_function(index - start_index + start)
164def index2label(reader: PdfCommonDocProtocol, index: int) -> str:
165 """
166 See 7.9.7 "Number Trees".
168 Args:
169 reader: The PdfReader
170 index: The index of the page
172 Returns:
173 The label of the page, e.g. "iv" or "4".
175 """
176 root = cast(DictionaryObject, reader.root_object)
177 if "/PageLabels" not in root:
178 return str(index + 1) # Fallback
179 number_tree = cast(DictionaryObject, root["/PageLabels"].get_object())
180 if "/Nums" in number_tree:
181 return get_label_from_nums(number_tree, index)
182 if "/Kids" in number_tree and not isinstance(number_tree["/Kids"], NullObject):
183 # number_tree = {'/Kids': [IndirectObject(7333, 0, 140132998195856), ...]}
184 # Limit maximum depth.
185 level = 0
186 while level < 100:
187 kids = cast(list[DictionaryObject], number_tree["/Kids"])
188 for kid in kids:
189 # kid = {'/Limits': [0, 63], '/Nums': [0, {'/P': 'C1'}, ...]}
190 limits = cast(list[int], kid["/Limits"])
191 if limits[0] <= index <= limits[1]:
192 if not is_null_or_none(kid.get("/Kids", None)):
193 # Recursive definition.
194 level += 1
195 if level == 100: # pragma: no cover
196 raise NotImplementedError(
197 "Too deep nesting is not supported."
198 )
199 number_tree = kid
200 # Exit the inner `for` loop and continue at the next level with the
201 # next iteration of the `while` loop.
202 break
203 return get_label_from_nums(kid, index)
204 else:
205 # When there are no kids, make sure to exit the `while` loop directly
206 # and continue with the fallback.
207 break
209 logger_warning(f"Could not reliably determine page label for {index}.", __name__)
210 return str(index + 1) # Fallback if neither /Nums nor /Kids is in the number_tree
213def nums_insert(
214 key: NumberObject,
215 value: DictionaryObject,
216 nums: ArrayObject,
217) -> None:
218 """
219 Insert a key, value pair in a Nums array.
221 See 7.9.7 "Number Trees".
223 Args:
224 key: number key of the entry
225 value: value of the entry
226 nums: Nums array to modify
228 """
229 if len(nums) % 2 != 0:
230 raise ValueError("A nums like array must have an even number of elements")
232 i = len(nums)
233 while i != 0 and key <= nums[i - 2]:
234 i = i - 2
236 if i < len(nums) and key == nums[i]:
237 nums[i + 1] = value
238 else:
239 nums.insert(i, key)
240 nums.insert(i + 1, value)
243def nums_clear_range(
244 key: NumberObject,
245 page_index_to: int,
246 nums: ArrayObject,
247) -> None:
248 """
249 Remove all entries in a number tree in a range after an entry.
251 See 7.9.7 "Number Trees".
253 Args:
254 key: number key of the entry before the range
255 page_index_to: The page index of the upper limit of the range
256 nums: Nums array to modify
258 """
259 if len(nums) % 2 != 0:
260 raise ValueError("A nums like array must have an even number of elements")
261 if page_index_to < key:
262 raise ValueError("page_index_to must be greater or equal than key")
264 i = nums.index(key) + 2
265 while i < len(nums) and nums[i] <= page_index_to:
266 nums.pop(i)
267 nums.pop(i)
270def nums_next(
271 key: NumberObject,
272 nums: ArrayObject,
273) -> tuple[Optional[NumberObject], Optional[DictionaryObject]]:
274 """
275 Return the (key, value) pair of the entry after the given one.
277 See 7.9.7 "Number Trees".
279 Args:
280 key: number key of the entry
281 nums: Nums array
283 """
284 if len(nums) % 2 != 0:
285 raise ValueError("A nums like array must have an even number of elements")
287 i = nums.index(key) + 2
288 if i < len(nums):
289 return (nums[i], nums[i + 1])
290 return (None, None)