Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/_page_labels.py: 14%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

99 statements  

1""" 

2Page labels are shown by PDF viewers as "the page number". 

3 

4A page has a numeric index, starting at 0. Additionally, the page 

5has a label. In the most simple case: 

6 

7 label = index + 1 

8 

9However, the title page and the table of contents might have Roman numerals as 

10page labels. This makes things more complicated. 

11 

12Example 1 

13--------- 

14 

15>>> reader.root_object["/PageLabels"]["/Nums"] 

16[0, IndirectObject(18, 0, 139929798197504), 

17 8, IndirectObject(19, 0, 139929798197504)] 

18>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][1]) 

19{'/S': '/r'} 

20>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][3]) 

21{'/S': '/D'} 

22 

23Example 2 

24--------- 

25The following is a document with pages labeled 

26i, ii, iii, iv, 1, 2, 3, A-8, A-9, ... 

27 

281 0 obj 

29 << /Type /Catalog 

30 /PageLabels << /Nums [ 

31 0 << /S /r >> 

32 4 << /S /D >> 

33 7 << /S /D 

34 /P ( A- ) 

35 /St 8 

36 >> 

37 % A number tree containing 

38 % three page label dictionaries 

39 ] 

40 >> 

41 ... 

42 >> 

43endobj 

44 

45 

46§12.4.2 PDF Specification 1.7 and 2.0 

47===================================== 

48 

49Entries in a page label dictionary 

50---------------------------------- 

51The /S key: 

52D Decimal Arabic numerals 

53R Uppercase Roman numerals 

54r Lowercase Roman numerals 

55A Uppercase letters (A to Z for the first 26 pages, 

56 AA to ZZ for the next 26, and so on) 

57a Lowercase letters (a to z for the first 26 pages, 

58 aa to zz for the next 26, and so on) 

59""" 

60 

61from typing import Iterator, List, Optional, Tuple, cast 

62 

63from ._protocols import PdfCommonDocProtocol 

64from ._utils import logger_warning 

65from .generic import ( 

66 ArrayObject, 

67 DictionaryObject, 

68 NullObject, 

69 NumberObject, 

70 is_null_or_none, 

71) 

72 

73 

74def number2uppercase_roman_numeral(num: int) -> str: 

75 roman = [ 

76 (1000, "M"), 

77 (900, "CM"), 

78 (500, "D"), 

79 (400, "CD"), 

80 (100, "C"), 

81 (90, "XC"), 

82 (50, "L"), 

83 (40, "XL"), 

84 (10, "X"), 

85 (9, "IX"), 

86 (5, "V"), 

87 (4, "IV"), 

88 (1, "I"), 

89 ] 

90 

91 def roman_num(num: int) -> Iterator[str]: 

92 for decimal, roman_repr in roman: 

93 x, _ = divmod(num, decimal) 

94 yield roman_repr * x 

95 num -= decimal * x 

96 if num <= 0: 

97 break 

98 

99 return "".join(list(roman_num(num))) 

100 

101 

102def number2lowercase_roman_numeral(number: int) -> str: 

103 return number2uppercase_roman_numeral(number).lower() 

104 

105 

106def number2uppercase_letter(number: int) -> str: 

107 if number <= 0: 

108 raise ValueError("Expecting a positive number") 

109 alphabet = [chr(i) for i in range(ord("A"), ord("Z") + 1)] 

110 rep = "" 

111 while number > 0: 

112 remainder = number % 26 

113 if remainder == 0: 

114 remainder = 26 

115 rep = alphabet[remainder - 1] + rep 

116 # update 

117 number -= remainder 

118 number = number // 26 

119 return rep 

120 

121 

122def number2lowercase_letter(number: int) -> str: 

123 return number2uppercase_letter(number).lower() 

124 

125 

126def get_label_from_nums(dictionary_object: DictionaryObject, index: int) -> str: 

127 # [Nums] shall be an array of the form 

128 # [ key_1 value_1 key_2 value_2 ... key_n value_n ] 

129 # where each key_i is an integer and the corresponding 

130 # value_i shall be the object associated with that key. 

131 # The keys shall be sorted in numerical order, 

132 # analogously to the arrangement of keys in a name tree 

133 # as described in 7.9.6, "Name Trees." 

134 nums = cast(ArrayObject, dictionary_object["/Nums"]) 

135 i = 0 

136 value = None 

137 start_index = 0 

138 while i < len(nums): 

139 start_index = nums[i] 

140 value = nums[i + 1].get_object() 

141 if i + 2 == len(nums): 

142 break 

143 if nums[i + 2] > index: 

144 break 

145 i += 2 

146 m = { 

147 None: lambda _: "", 

148 "/D": lambda n: str(n), 

149 "/R": number2uppercase_roman_numeral, 

150 "/r": number2lowercase_roman_numeral, 

151 "/A": number2uppercase_letter, 

152 "/a": number2lowercase_letter, 

153 } 

154 # if /Nums array is not following the specification or if /Nums is empty 

155 if not isinstance(value, dict): 

156 return str(index + 1) # Fallback 

157 start = value.get("/St", 1) 

158 prefix = value.get("/P", "") 

159 return prefix + m[value.get("/S")](index - start_index + start) 

160 

161 

162def index2label(reader: PdfCommonDocProtocol, index: int) -> str: 

163 """ 

164 See 7.9.7 "Number Trees". 

165 

166 Args: 

167 reader: The PdfReader 

168 index: The index of the page 

169 

170 Returns: 

171 The label of the page, e.g. "iv" or "4". 

172 

173 """ 

174 root = cast(DictionaryObject, reader.root_object) 

175 if "/PageLabels" not in root: 

176 return str(index + 1) # Fallback 

177 number_tree = cast(DictionaryObject, root["/PageLabels"].get_object()) 

178 if "/Nums" in number_tree: 

179 return get_label_from_nums(number_tree, index) 

180 if "/Kids" in number_tree and not isinstance(number_tree["/Kids"], NullObject): 

181 # number_tree = {'/Kids': [IndirectObject(7333, 0, 140132998195856), ...]} 

182 # Limit maximum depth. 

183 level = 0 

184 while level < 100: 

185 kids = cast(List[DictionaryObject], number_tree["/Kids"]) 

186 for kid in kids: 

187 # kid = {'/Limits': [0, 63], '/Nums': [0, {'/P': 'C1'}, ...]} 

188 limits = cast(List[int], kid["/Limits"]) 

189 if limits[0] <= index <= limits[1]: 

190 if not is_null_or_none(kid.get("/Kids", None)): 

191 # Recursive definition. 

192 level += 1 

193 if level == 100: # pragma: no cover 

194 raise NotImplementedError( 

195 "Too deep nesting is not supported." 

196 ) 

197 number_tree = kid 

198 # Exit the inner `for` loop and continue at the next level with the 

199 # next iteration of the `while` loop. 

200 break 

201 return get_label_from_nums(kid, index) 

202 else: 

203 # When there are no kids, make sure to exit the `while` loop directly 

204 # and continue with the fallback. 

205 break 

206 

207 logger_warning(f"Could not reliably determine page label for {index}.", __name__) 

208 return str(index + 1) # Fallback if neither /Nums nor /Kids is in the number_tree 

209 

210 

211def nums_insert( 

212 key: NumberObject, 

213 value: DictionaryObject, 

214 nums: ArrayObject, 

215) -> None: 

216 """ 

217 Insert a key, value pair in a Nums array. 

218 

219 See 7.9.7 "Number Trees". 

220 

221 Args: 

222 key: number key of the entry 

223 value: value of the entry 

224 nums: Nums array to modify 

225 

226 """ 

227 if len(nums) % 2 != 0: 

228 raise ValueError("A nums like array must have an even number of elements") 

229 

230 i = len(nums) 

231 while i != 0 and key <= nums[i - 2]: 

232 i = i - 2 

233 

234 if i < len(nums) and key == nums[i]: 

235 nums[i + 1] = value 

236 else: 

237 nums.insert(i, key) 

238 nums.insert(i + 1, value) 

239 

240 

241def nums_clear_range( 

242 key: NumberObject, 

243 page_index_to: int, 

244 nums: ArrayObject, 

245) -> None: 

246 """ 

247 Remove all entries in a number tree in a range after an entry. 

248 

249 See 7.9.7 "Number Trees". 

250 

251 Args: 

252 key: number key of the entry before the range 

253 page_index_to: The page index of the upper limit of the range 

254 nums: Nums array to modify 

255 

256 """ 

257 if len(nums) % 2 != 0: 

258 raise ValueError("A nums like array must have an even number of elements") 

259 if page_index_to < key: 

260 raise ValueError("page_index_to must be greater or equal than key") 

261 

262 i = nums.index(key) + 2 

263 while i < len(nums) and nums[i] <= page_index_to: 

264 nums.pop(i) 

265 nums.pop(i) 

266 

267 

268def nums_next( 

269 key: NumberObject, 

270 nums: ArrayObject, 

271) -> Tuple[Optional[NumberObject], Optional[DictionaryObject]]: 

272 """ 

273 Return the (key, value) pair of the entry after the given one. 

274 

275 See 7.9.7 "Number Trees". 

276 

277 Args: 

278 key: number key of the entry 

279 nums: Nums array 

280 

281 """ 

282 if len(nums) % 2 != 0: 

283 raise ValueError("A nums like array must have an even number of elements") 

284 

285 i = nums.index(key) + 2 

286 if i < len(nums): 

287 return (nums[i], nums[i + 1]) 

288 return (None, None)