Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/_page_labels.py: 15%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

100 statements  

1""" 

2Page labels are shown by PDF viewers as "the page number". 

3 

4A page has a numeric index, starting at 0. Additionally, the page 

5has a label. In the most simple case: 

6 

7 label = index + 1 

8 

9However, the title page and the table of contents might have Roman numerals as 

10page labels. This makes things more complicated. 

11 

12Example 1 

13--------- 

14 

15>>> reader.root_object["/PageLabels"]["/Nums"] 

16[0, IndirectObject(18, 0, 139929798197504), 

17 8, IndirectObject(19, 0, 139929798197504)] 

18>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][1]) 

19{'/S': '/r'} 

20>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][3]) 

21{'/S': '/D'} 

22 

23Example 2 

24--------- 

25The following is a document with pages labeled 

26i, ii, iii, iv, 1, 2, 3, A-8, A-9, ... 

27 

281 0 obj 

29 << /Type /Catalog 

30 /PageLabels << /Nums [ 

31 0 << /S /r >> 

32 4 << /S /D >> 

33 7 << /S /D 

34 /P ( A- ) 

35 /St 8 

36 >> 

37 % A number tree containing 

38 % three page label dictionaries 

39 ] 

40 >> 

41 ... 

42 >> 

43endobj 

44 

45 

46§12.4.2 PDF Specification 1.7 and 2.0 

47===================================== 

48 

49Entries in a page label dictionary 

50---------------------------------- 

51The /S key: 

52D Decimal Arabic numerals 

53R Uppercase Roman numerals 

54r Lowercase Roman numerals 

55A Uppercase letters (A to Z for the first 26 pages, 

56 AA to ZZ for the next 26, and so on) 

57a Lowercase letters (a to z for the first 26 pages, 

58 aa to zz for the next 26, and so on) 

59""" 

60 

61from collections.abc import Iterator 

62from typing import Optional, cast 

63 

64from ._protocols import PdfCommonDocProtocol 

65from ._utils import logger_warning 

66from .generic import ( 

67 ArrayObject, 

68 DictionaryObject, 

69 NullObject, 

70 NumberObject, 

71 is_null_or_none, 

72) 

73 

74 

75def number2uppercase_roman_numeral(num: int) -> str: 

76 roman = [ 

77 (1000, "M"), 

78 (900, "CM"), 

79 (500, "D"), 

80 (400, "CD"), 

81 (100, "C"), 

82 (90, "XC"), 

83 (50, "L"), 

84 (40, "XL"), 

85 (10, "X"), 

86 (9, "IX"), 

87 (5, "V"), 

88 (4, "IV"), 

89 (1, "I"), 

90 ] 

91 

92 def roman_num(num: int) -> Iterator[str]: 

93 for decimal, roman_repr in roman: 

94 x, _ = divmod(num, decimal) 

95 yield roman_repr * x 

96 num -= decimal * x 

97 if num <= 0: 

98 break 

99 

100 return "".join(list(roman_num(num))) 

101 

102 

103def number2lowercase_roman_numeral(number: int) -> str: 

104 return number2uppercase_roman_numeral(number).lower() 

105 

106 

107def number2uppercase_letter(number: int) -> str: 

108 if number <= 0: 

109 raise ValueError("Expecting a positive number") 

110 alphabet = [chr(i) for i in range(ord("A"), ord("Z") + 1)] 

111 rep = "" 

112 while number > 0: 

113 remainder = number % 26 

114 if remainder == 0: 

115 remainder = 26 

116 rep = alphabet[remainder - 1] + rep 

117 # update 

118 number -= remainder 

119 number = number // 26 

120 return rep 

121 

122 

123def number2lowercase_letter(number: int) -> str: 

124 return number2uppercase_letter(number).lower() 

125 

126 

127def get_label_from_nums(dictionary_object: DictionaryObject, index: int) -> str: 

128 # [Nums] shall be an array of the form 

129 # [ key_1 value_1 key_2 value_2 ... key_n value_n ] 

130 # where each key_i is an integer and the corresponding 

131 # value_i shall be the object associated with that key. 

132 # The keys shall be sorted in numerical order, 

133 # analogously to the arrangement of keys in a name tree 

134 # as described in 7.9.6, "Name Trees." 

135 nums = cast(ArrayObject, dictionary_object["/Nums"]) 

136 i = 0 

137 value = None 

138 start_index = 0 

139 while i < len(nums): 

140 start_index = nums[i] 

141 value = nums[i + 1].get_object() 

142 if i + 2 == len(nums): 

143 break 

144 if nums[i + 2] > index: 

145 break 

146 i += 2 

147 m = { 

148 None: lambda _: "", 

149 "/D": lambda n: str(n), 

150 "/R": number2uppercase_roman_numeral, 

151 "/r": number2lowercase_roman_numeral, 

152 "/A": number2uppercase_letter, 

153 "/a": number2lowercase_letter, 

154 } 

155 # if /Nums array is not following the specification or if /Nums is empty 

156 if not isinstance(value, dict): 

157 return str(index + 1) # Fallback 

158 start = value.get("/St", 1) 

159 prefix = value.get("/P", "") 

160 return prefix + m[value.get("/S")](index - start_index + start) 

161 

162 

163def index2label(reader: PdfCommonDocProtocol, index: int) -> str: 

164 """ 

165 See 7.9.7 "Number Trees". 

166 

167 Args: 

168 reader: The PdfReader 

169 index: The index of the page 

170 

171 Returns: 

172 The label of the page, e.g. "iv" or "4". 

173 

174 """ 

175 root = cast(DictionaryObject, reader.root_object) 

176 if "/PageLabels" not in root: 

177 return str(index + 1) # Fallback 

178 number_tree = cast(DictionaryObject, root["/PageLabels"].get_object()) 

179 if "/Nums" in number_tree: 

180 return get_label_from_nums(number_tree, index) 

181 if "/Kids" in number_tree and not isinstance(number_tree["/Kids"], NullObject): 

182 # number_tree = {'/Kids': [IndirectObject(7333, 0, 140132998195856), ...]} 

183 # Limit maximum depth. 

184 level = 0 

185 while level < 100: 

186 kids = cast(list[DictionaryObject], number_tree["/Kids"]) 

187 for kid in kids: 

188 # kid = {'/Limits': [0, 63], '/Nums': [0, {'/P': 'C1'}, ...]} 

189 limits = cast(list[int], kid["/Limits"]) 

190 if limits[0] <= index <= limits[1]: 

191 if not is_null_or_none(kid.get("/Kids", None)): 

192 # Recursive definition. 

193 level += 1 

194 if level == 100: # pragma: no cover 

195 raise NotImplementedError( 

196 "Too deep nesting is not supported." 

197 ) 

198 number_tree = kid 

199 # Exit the inner `for` loop and continue at the next level with the 

200 # next iteration of the `while` loop. 

201 break 

202 return get_label_from_nums(kid, index) 

203 else: 

204 # When there are no kids, make sure to exit the `while` loop directly 

205 # and continue with the fallback. 

206 break 

207 

208 logger_warning(f"Could not reliably determine page label for {index}.", __name__) 

209 return str(index + 1) # Fallback if neither /Nums nor /Kids is in the number_tree 

210 

211 

212def nums_insert( 

213 key: NumberObject, 

214 value: DictionaryObject, 

215 nums: ArrayObject, 

216) -> None: 

217 """ 

218 Insert a key, value pair in a Nums array. 

219 

220 See 7.9.7 "Number Trees". 

221 

222 Args: 

223 key: number key of the entry 

224 value: value of the entry 

225 nums: Nums array to modify 

226 

227 """ 

228 if len(nums) % 2 != 0: 

229 raise ValueError("A nums like array must have an even number of elements") 

230 

231 i = len(nums) 

232 while i != 0 and key <= nums[i - 2]: 

233 i = i - 2 

234 

235 if i < len(nums) and key == nums[i]: 

236 nums[i + 1] = value 

237 else: 

238 nums.insert(i, key) 

239 nums.insert(i + 1, value) 

240 

241 

242def nums_clear_range( 

243 key: NumberObject, 

244 page_index_to: int, 

245 nums: ArrayObject, 

246) -> None: 

247 """ 

248 Remove all entries in a number tree in a range after an entry. 

249 

250 See 7.9.7 "Number Trees". 

251 

252 Args: 

253 key: number key of the entry before the range 

254 page_index_to: The page index of the upper limit of the range 

255 nums: Nums array to modify 

256 

257 """ 

258 if len(nums) % 2 != 0: 

259 raise ValueError("A nums like array must have an even number of elements") 

260 if page_index_to < key: 

261 raise ValueError("page_index_to must be greater or equal than key") 

262 

263 i = nums.index(key) + 2 

264 while i < len(nums) and nums[i] <= page_index_to: 

265 nums.pop(i) 

266 nums.pop(i) 

267 

268 

269def nums_next( 

270 key: NumberObject, 

271 nums: ArrayObject, 

272) -> tuple[Optional[NumberObject], Optional[DictionaryObject]]: 

273 """ 

274 Return the (key, value) pair of the entry after the given one. 

275 

276 See 7.9.7 "Number Trees". 

277 

278 Args: 

279 key: number key of the entry 

280 nums: Nums array 

281 

282 """ 

283 if len(nums) % 2 != 0: 

284 raise ValueError("A nums like array must have an even number of elements") 

285 

286 i = nums.index(key) + 2 

287 if i < len(nums): 

288 return (nums[i], nums[i + 1]) 

289 return (None, None)