Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/_page_labels.py: 15%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

101 statements  

1""" 

2Page labels are shown by PDF viewers as "the page number". 

3 

4A page has a numeric index, starting at 0. Additionally, the page 

5has a label. In the most simple case: 

6 

7 label = index + 1 

8 

9However, the title page and the table of contents might have Roman numerals as 

10page labels. This makes things more complicated. 

11 

12Example 1 

13--------- 

14 

15>>> reader.root_object["/PageLabels"]["/Nums"] 

16[0, IndirectObject(18, 0, 139929798197504), 

17 8, IndirectObject(19, 0, 139929798197504)] 

18>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][1]) 

19{'/S': '/r'} 

20>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][3]) 

21{'/S': '/D'} 

22 

23Example 2 

24--------- 

25The following is a document with pages labeled 

26i, ii, iii, iv, 1, 2, 3, A-8, A-9, ... 

27 

281 0 obj 

29 << /Type /Catalog 

30 /PageLabels << /Nums [ 

31 0 << /S /r >> 

32 4 << /S /D >> 

33 7 << /S /D 

34 /P ( A- ) 

35 /St 8 

36 >> 

37 % A number tree containing 

38 % three page label dictionaries 

39 ] 

40 >> 

41 ... 

42 >> 

43endobj 

44 

45 

46§12.4.2 PDF Specification 1.7 and 2.0 

47===================================== 

48 

49Entries in a page label dictionary 

50---------------------------------- 

51The /S key: 

52D Decimal Arabic numerals 

53R Uppercase Roman numerals 

54r Lowercase Roman numerals 

55A Uppercase letters (A to Z for the first 26 pages, 

56 AA to ZZ for the next 26, and so on) 

57a Lowercase letters (a to z for the first 26 pages, 

58 aa to zz for the next 26, and so on) 

59""" 

60 

61from collections.abc import Callable, Iterator 

62from typing import Optional, cast 

63 

64from ._protocols import PdfCommonDocProtocol 

65from ._utils import logger_warning 

66from .generic import ( 

67 ArrayObject, 

68 DictionaryObject, 

69 NullObject, 

70 NumberObject, 

71 is_null_or_none, 

72) 

73 

74 

75def number2uppercase_roman_numeral(num: int) -> str: 

76 roman = [ 

77 (1000, "M"), 

78 (900, "CM"), 

79 (500, "D"), 

80 (400, "CD"), 

81 (100, "C"), 

82 (90, "XC"), 

83 (50, "L"), 

84 (40, "XL"), 

85 (10, "X"), 

86 (9, "IX"), 

87 (5, "V"), 

88 (4, "IV"), 

89 (1, "I"), 

90 ] 

91 

92 def roman_num(num: int) -> Iterator[str]: 

93 for decimal, roman_repr in roman: 

94 x, _ = divmod(num, decimal) 

95 yield roman_repr * x 

96 num -= decimal * x 

97 if num <= 0: 

98 break 

99 

100 return "".join(list(roman_num(num))) 

101 

102 

103def number2lowercase_roman_numeral(number: int) -> str: 

104 return number2uppercase_roman_numeral(number).lower() 

105 

106 

107def number2uppercase_letter(number: int) -> str: 

108 if number <= 0: 

109 raise ValueError("Expecting a positive number") 

110 alphabet = [chr(i) for i in range(ord("A"), ord("Z") + 1)] 

111 rep = "" 

112 while number > 0: 

113 remainder = number % 26 

114 if remainder == 0: 

115 remainder = 26 

116 rep = alphabet[remainder - 1] + rep 

117 # update 

118 number -= remainder 

119 number = number // 26 

120 return rep 

121 

122 

123def number2lowercase_letter(number: int) -> str: 

124 return number2uppercase_letter(number).lower() 

125 

126 

127def get_label_from_nums(dictionary_object: DictionaryObject, index: int) -> str: 

128 # [Nums] shall be an array of the form 

129 # [ key_1 value_1 key_2 value_2 ... key_n value_n ] 

130 # where each key_i is an integer and the corresponding 

131 # value_i shall be the object associated with that key. 

132 # The keys shall be sorted in numerical order, 

133 # analogously to the arrangement of keys in a name tree 

134 # as described in 7.9.6, "Name Trees." 

135 nums = cast(ArrayObject, dictionary_object["/Nums"]) 

136 i = 0 

137 value = None 

138 start_index = 0 

139 while i < len(nums): 

140 start_index = nums[i] 

141 value = nums[i + 1].get_object() 

142 if i + 2 == len(nums): 

143 break 

144 if nums[i + 2] > index: 

145 break 

146 i += 2 

147 m: dict[Optional[str], Callable[[int], str]] = { 

148 None: lambda _: "", 

149 "/D": str, 

150 "/R": number2uppercase_roman_numeral, 

151 "/r": number2lowercase_roman_numeral, 

152 "/A": number2uppercase_letter, 

153 "/a": number2lowercase_letter, 

154 } 

155 # if /Nums array is not following the specification or if /Nums is empty 

156 if not isinstance(value, dict): 

157 return str(index + 1) # Fallback 

158 start = value.get("/St", 1) 

159 prefix = value.get("/P", "") 

160 mapping_function = m[value.get("/S")] 

161 return prefix + mapping_function(index - start_index + start) 

162 

163 

164def index2label(reader: PdfCommonDocProtocol, index: int) -> str: 

165 """ 

166 See 7.9.7 "Number Trees". 

167 

168 Args: 

169 reader: The PdfReader 

170 index: The index of the page 

171 

172 Returns: 

173 The label of the page, e.g. "iv" or "4". 

174 

175 """ 

176 root = cast(DictionaryObject, reader.root_object) 

177 if "/PageLabels" not in root: 

178 return str(index + 1) # Fallback 

179 number_tree = cast(DictionaryObject, root["/PageLabels"].get_object()) 

180 if "/Nums" in number_tree: 

181 return get_label_from_nums(number_tree, index) 

182 if "/Kids" in number_tree and not isinstance(number_tree["/Kids"], NullObject): 

183 # number_tree = {'/Kids': [IndirectObject(7333, 0, 140132998195856), ...]} 

184 # Limit maximum depth. 

185 level = 0 

186 while level < 100: 

187 kids = cast(list[DictionaryObject], number_tree["/Kids"]) 

188 for kid in kids: 

189 # kid = {'/Limits': [0, 63], '/Nums': [0, {'/P': 'C1'}, ...]} 

190 limits = cast(list[int], kid["/Limits"]) 

191 if limits[0] <= index <= limits[1]: 

192 if not is_null_or_none(kid.get("/Kids", None)): 

193 # Recursive definition. 

194 level += 1 

195 if level == 100: # pragma: no cover 

196 raise NotImplementedError( 

197 "Too deep nesting is not supported." 

198 ) 

199 number_tree = kid 

200 # Exit the inner `for` loop and continue at the next level with the 

201 # next iteration of the `while` loop. 

202 break 

203 return get_label_from_nums(kid, index) 

204 else: 

205 # When there are no kids, make sure to exit the `while` loop directly 

206 # and continue with the fallback. 

207 break 

208 

209 logger_warning(f"Could not reliably determine page label for {index}.", __name__) 

210 return str(index + 1) # Fallback if neither /Nums nor /Kids is in the number_tree 

211 

212 

213def nums_insert( 

214 key: NumberObject, 

215 value: DictionaryObject, 

216 nums: ArrayObject, 

217) -> None: 

218 """ 

219 Insert a key, value pair in a Nums array. 

220 

221 See 7.9.7 "Number Trees". 

222 

223 Args: 

224 key: number key of the entry 

225 value: value of the entry 

226 nums: Nums array to modify 

227 

228 """ 

229 if len(nums) % 2 != 0: 

230 raise ValueError("A nums like array must have an even number of elements") 

231 

232 i = len(nums) 

233 while i != 0 and key <= nums[i - 2]: 

234 i = i - 2 

235 

236 if i < len(nums) and key == nums[i]: 

237 nums[i + 1] = value 

238 else: 

239 nums.insert(i, key) 

240 nums.insert(i + 1, value) 

241 

242 

243def nums_clear_range( 

244 key: NumberObject, 

245 page_index_to: int, 

246 nums: ArrayObject, 

247) -> None: 

248 """ 

249 Remove all entries in a number tree in a range after an entry. 

250 

251 See 7.9.7 "Number Trees". 

252 

253 Args: 

254 key: number key of the entry before the range 

255 page_index_to: The page index of the upper limit of the range 

256 nums: Nums array to modify 

257 

258 """ 

259 if len(nums) % 2 != 0: 

260 raise ValueError("A nums like array must have an even number of elements") 

261 if page_index_to < key: 

262 raise ValueError("page_index_to must be greater or equal than key") 

263 

264 i = nums.index(key) + 2 

265 while i < len(nums) and nums[i] <= page_index_to: 

266 nums.pop(i) 

267 nums.pop(i) 

268 

269 

270def nums_next( 

271 key: NumberObject, 

272 nums: ArrayObject, 

273) -> tuple[Optional[NumberObject], Optional[DictionaryObject]]: 

274 """ 

275 Return the (key, value) pair of the entry after the given one. 

276 

277 See 7.9.7 "Number Trees". 

278 

279 Args: 

280 key: number key of the entry 

281 nums: Nums array 

282 

283 """ 

284 if len(nums) % 2 != 0: 

285 raise ValueError("A nums like array must have an even number of elements") 

286 

287 i = nums.index(key) + 2 

288 if i < len(nums): 

289 return (nums[i], nums[i + 1]) 

290 return (None, None)