Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pikepdf/models/outlines.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

227 statements  

1# SPDX-FileCopyrightText: 2022 James R. Barlow, 2020 Matthias Erll 

2 

3# SPDX-License-Identifier: MPL-2.0 

4 

5"""Support for document outlines (e.g. table of contents).""" 

6 

7from __future__ import annotations 

8 

9from collections.abc import Iterable 

10from enum import Enum 

11from itertools import chain 

12from typing import cast 

13 

14from pikepdf._core import Page, Pdf 

15from pikepdf.objects import Array, Dictionary, Name, Object, String 

16 

17 

18class PageLocation(Enum): 

19 """Page view location definitions, from PDF spec.""" 

20 

21 XYZ = 1 

22 Fit = 2 

23 FitH = 3 

24 FitV = 4 

25 FitR = 5 

26 FitB = 6 

27 FitBH = 7 

28 FitBV = 8 

29 

30 

31PAGE_LOCATION_ARGS = { 

32 PageLocation.XYZ: ('left', 'top', 'zoom'), 

33 PageLocation.FitH: ('top',), 

34 PageLocation.FitV: ('left',), 

35 PageLocation.FitR: ('left', 'bottom', 'right', 'top'), 

36 PageLocation.FitBH: ('top',), 

37 PageLocation.FitBV: ('left',), 

38} 

39ALL_PAGE_LOCATION_KWARGS = set(chain.from_iterable(PAGE_LOCATION_ARGS.values())) 

40 

41 

42def make_page_destination( 

43 pdf: Pdf, 

44 page_num: int, 

45 page_location: PageLocation | str | None = None, 

46 *, 

47 left: float | None = None, 

48 top: float | None = None, 

49 right: float | None = None, 

50 bottom: float | None = None, 

51 zoom: float | None = None, 

52) -> Array: 

53 """Create a destination ``Array`` with reference to a Pdf document's page number. 

54 

55 Arguments: 

56 pdf: PDF document object. 

57 page_num: Page number (zero-based). 

58 page_location: Optional page location, as a string or :enum:`PageLocation`. 

59 left: Specify page viewport rectangle. 

60 top: Specify page viewport rectangle. 

61 right: Specify page viewport rectangle. 

62 bottom: Specify page viewport rectangle. 

63 zoom: Specify page viewport rectangle's zoom level. 

64 

65 left, top, right, bottom, zoom are used in conjunction with the page fit style 

66 specified by *page_location*. 

67 """ 

68 return _make_page_destination( 

69 pdf, 

70 page_num, 

71 page_location=page_location, 

72 left=left, 

73 top=top, 

74 right=right, 

75 bottom=bottom, 

76 zoom=zoom, 

77 ) 

78 

79 

80def _make_page_destination( 

81 pdf: Pdf, 

82 page_num: int, 

83 page_location: PageLocation | str | None = None, 

84 **kwargs, 

85) -> Array: 

86 kwargs = {k: v for k, v in kwargs.items() if v is not None} 

87 

88 res: list[Dictionary | Name] = [pdf.pages[page_num].obj] 

89 if page_location: 

90 if isinstance(page_location, PageLocation): 

91 loc_key = page_location 

92 loc_str = loc_key.name 

93 else: 

94 loc_str = page_location 

95 try: 

96 loc_key = PageLocation[loc_str] 

97 except KeyError: 

98 raise ValueError( 

99 f"Invalid or unsupported page location type {loc_str}" 

100 ) from None 

101 res.append(Name(f'/{loc_str}')) 

102 dest_arg_names = PAGE_LOCATION_ARGS.get(loc_key) 

103 if dest_arg_names: 

104 res.extend(kwargs.get(k, 0) for k in dest_arg_names) 

105 else: 

106 res.append(Name.Fit) 

107 return Array(res) 

108 

109 

110class OutlineStructureError(Exception): 

111 """Indicates an error in the outline data structure.""" 

112 

113 

114class OutlineItem: 

115 """Manage a single item in a PDF document outlines structure. 

116 

117 Includes nested items. 

118 

119 Arguments: 

120 title: Title of the outlines item. 

121 destination: Page number, destination name, or any other PDF object 

122 to be used as a reference when clicking on the outlines entry. Note 

123 this should be ``None`` if an action is used instead. If set to a 

124 page number, it will be resolved to a reference at the time of 

125 writing the outlines back to the document. 

126 page_location: Supplemental page location for a page number 

127 in ``destination``, e.g. ``PageLocation.Fit``. May also be 

128 a simple string such as ``'FitH'``. 

129 action: Action to perform when clicking on this item. Will be ignored 

130 during writing if ``destination`` is also set. 

131 obj: ``Dictionary`` object representing this outlines item in a ``Pdf``. 

132 May be ``None`` for creating a new object. If present, an existing 

133 object is modified in-place during writing and original attributes 

134 are retained. 

135 left, top, bottom, right, zoom: Describes the viewport position associated 

136 with a destination. 

137 

138 This object does not contain any information about higher-level or 

139 neighboring elements. 

140 

141 Valid destination arrays: 

142 [page /XYZ left top zoom] 

143 generally 

144 [page, PageLocationEntry, 0 to 4 ints] 

145 """ 

146 

147 def __init__( 

148 self, 

149 title: str, 

150 destination: Array | String | Name | int | None = None, 

151 page_location: PageLocation | str | None = None, 

152 action: Dictionary | None = None, 

153 obj: Dictionary | None = None, 

154 *, 

155 left: float | None = None, 

156 top: float | None = None, 

157 right: float | None = None, 

158 bottom: float | None = None, 

159 zoom: float | None = None, 

160 ): 

161 """Initialize OutlineItem.""" 

162 self.title = title 

163 self.destination = destination 

164 self.page_location = page_location 

165 self.page_location_kwargs = {} 

166 self.action = action 

167 if self.destination is not None and self.action is not None: 

168 raise ValueError("Only one of destination and action may be set") 

169 self.obj = obj 

170 kwargs = dict(left=left, top=top, right=right, bottom=bottom, zoom=zoom) 

171 self.page_location_kwargs = {k: v for k, v in kwargs.items() if v is not None} 

172 self.is_closed = False 

173 self.children: list[OutlineItem] = [] 

174 

175 def __str__(self): 

176 if self.children: 

177 if self.is_closed: 

178 oc_indicator = '[+]' 

179 else: 

180 oc_indicator = '[-]' 

181 else: 

182 oc_indicator = '[ ]' 

183 if self.destination is not None: 

184 if isinstance(self.destination, Array): 

185 # 12.3.2.2 Explicit destination 

186 # [raw_page, /PageLocation.SomeThing, integer parameters for viewport] 

187 raw_page = self.destination[0] 

188 page = Page(raw_page) 

189 dest = page.label 

190 elif isinstance(self.destination, String): 

191 # 12.3.2.2 Named destination, byte string reference to Names 

192 dest = ( 

193 f"<Named Destination in document .Root.Names dictionary: " 

194 f"{self.destination}>" 

195 ) 

196 elif isinstance(self.destination, Name): 

197 # 12.3.2.2 Named destination, name object (PDF 1.1) 

198 dest = ( 

199 f"<Named Destination in document .Root.Dests dictionary: " 

200 f"{self.destination}>" 

201 ) 

202 elif isinstance(self.destination, int): 

203 # Page number 

204 dest = f'<Page {self.destination}>' 

205 else: 

206 dest = '<Action>' 

207 return f'{oc_indicator} {self.title} -> {dest}' 

208 

209 def __repr__(self): 

210 return f'<pikepdf.{self.__class__.__name__}: "{self.title}">' 

211 

212 @classmethod 

213 def from_dictionary_object(cls, obj: Dictionary): 

214 """Create a ``OutlineItem`` from a ``Dictionary``. 

215 

216 Does not process nested items. 

217 

218 Arguments: 

219 obj: ``Dictionary`` object representing a single outline node. 

220 """ 

221 title = str(obj.Title) 

222 destination = obj.get(Name.Dest) 

223 if destination is not None and not isinstance( 

224 destination, (Array, String, Name) 

225 ): 

226 # 12.3.3: /Dest may be a name, byte string or array 

227 raise OutlineStructureError( 

228 f"Unexpected object type in Outline's /Dest: {destination!r}" 

229 ) 

230 action = obj.get(Name.A) 

231 if action is not None and not isinstance(action, Dictionary): 

232 raise OutlineStructureError( 

233 f"Unexpected object type in Outline's /A: {action!r}" 

234 ) 

235 return cls(title, destination=destination, action=action, obj=obj) 

236 

237 def to_dictionary_object(self, pdf: Pdf, create_new: bool = False) -> Dictionary: 

238 """Create/update a ``Dictionary`` object from this outline node. 

239 

240 Page numbers are resolved to a page reference on the input 

241 ``Pdf`` object. 

242 

243 Arguments: 

244 pdf: PDF document object. 

245 create_new: If set to ``True``, creates a new object instead of 

246 modifying an existing one in-place. 

247 """ 

248 if create_new or self.obj is None: 

249 self.obj = obj = pdf.make_indirect(Dictionary()) 

250 else: 

251 obj = self.obj 

252 obj.Title = self.title 

253 if self.destination is not None: 

254 if isinstance(self.destination, int): 

255 self.destination = make_page_destination( 

256 pdf, 

257 self.destination, 

258 self.page_location, 

259 **self.page_location_kwargs, 

260 ) 

261 obj.Dest = self.destination 

262 if Name.A in obj: 

263 del obj.A 

264 elif self.action is not None: 

265 obj.A = self.action 

266 if Name.Dest in obj: 

267 del obj.Dest 

268 return obj 

269 

270 

271class Outline: 

272 """Maintains a intuitive interface for creating and editing PDF document outlines. 

273 

274 See {{ pdfrm }} section 12.3. 

275 

276 Arguments: 

277 pdf: PDF document object. 

278 max_depth: Maximum recursion depth to consider when reading the outline. 

279 strict: If set to ``False`` (default) silently ignores structural errors. 

280 Setting it to ``True`` raises a 

281 :class:`pikepdf.OutlineStructureError` 

282 if any object references re-occur while the outline is being read or 

283 written. 

284 

285 See Also: 

286 :meth:`pikepdf.Pdf.open_outline` 

287 """ 

288 

289 def __init__(self, pdf: Pdf, max_depth: int = 15, strict: bool = False): 

290 """Initialize Outline.""" 

291 self._root: list[OutlineItem] | None = None 

292 self._pdf = pdf 

293 self._max_depth = max_depth 

294 self._strict = strict 

295 self._updating = False 

296 

297 def __str__(self): 

298 return str(self.root) 

299 

300 def __repr__(self): 

301 return f'<pikepdf.{self.__class__.__name__}: {len(self.root)} items>' 

302 

303 def _repr_pretty_(self, p, cycle): 

304 if cycle: 

305 p.text("...") 

306 else: 

307 with p.group(2, "pikepdf.models.outlines.Outline<\n", "\n>"): 

308 for _, item in enumerate(self.root): 

309 p.breakable() 

310 p.pretty(str(item)) 

311 

312 def __enter__(self): 

313 self._updating = True 

314 return self 

315 

316 def __exit__(self, exc_type, exc_val, exc_tb): 

317 try: 

318 if exc_type is not None: 

319 return 

320 self._save() 

321 finally: 

322 self._updating = False 

323 

324 def _save_level_outline( 

325 self, 

326 parent: Dictionary, 

327 outline_items: Iterable[OutlineItem], 

328 level: int, 

329 visited_objs: set[tuple[int, int]], 

330 ): 

331 count = 0 

332 prev: Dictionary | None = None 

333 first: Dictionary | None = None 

334 for item in outline_items: 

335 out_obj = item.to_dictionary_object(self._pdf) 

336 objgen = out_obj.objgen 

337 if objgen in visited_objs: 

338 if self._strict: 

339 raise OutlineStructureError( 

340 f"Outline object {objgen} reoccurred in structure" 

341 ) 

342 out_obj = item.to_dictionary_object(self._pdf, create_new=True) 

343 else: 

344 visited_objs.add(objgen) 

345 

346 out_obj.Parent = parent 

347 count += 1 

348 if prev is not None: 

349 prev.Next = out_obj 

350 out_obj.Prev = prev 

351 else: 

352 first = out_obj 

353 if Name.Prev in out_obj: 

354 del out_obj.Prev 

355 prev = out_obj 

356 if level < self._max_depth: 

357 sub_items: Iterable[OutlineItem] = item.children 

358 else: 

359 sub_items = () 

360 self._save_level_outline(out_obj, sub_items, level + 1, visited_objs) 

361 if item.is_closed: 

362 out_obj.Count = -cast(int, out_obj.Count) 

363 else: 

364 count += cast(int, out_obj.Count) 

365 if count: 

366 assert prev is not None and first is not None 

367 if Name.Next in prev: 

368 del prev.Next 

369 parent.First = first 

370 parent.Last = prev 

371 else: 

372 if Name.First in parent: 

373 del parent.First 

374 if Name.Last in parent: 

375 del parent.Last 

376 parent.Count = count 

377 

378 def _load_level_outline( 

379 self, 

380 first_obj: Dictionary, 

381 outline_items: list[Object], 

382 level: int, 

383 visited_objs: set[tuple[int, int]], 

384 ): 

385 current_obj: Dictionary | None = first_obj 

386 while current_obj: 

387 objgen = current_obj.objgen 

388 if objgen in visited_objs: 

389 if self._strict: 

390 raise OutlineStructureError( 

391 f"Outline object {objgen} reoccurred in structure" 

392 ) 

393 return 

394 visited_objs.add(objgen) 

395 

396 item = OutlineItem.from_dictionary_object(current_obj) 

397 first_child = current_obj.get(Name.First) 

398 if isinstance(first_child, Dictionary) and level < self._max_depth: 

399 self._load_level_outline( 

400 first_child, item.children, level + 1, visited_objs 

401 ) 

402 count = current_obj.get(Name.Count) 

403 if isinstance(count, int) and count < 0: 

404 item.is_closed = True 

405 outline_items.append(item) 

406 next_obj = current_obj.get(Name.Next) 

407 if next_obj is None or isinstance(next_obj, Dictionary): 

408 current_obj = next_obj 

409 else: 

410 raise OutlineStructureError( 

411 f"Outline object {objgen} points to non-dictionary" 

412 ) 

413 

414 def _save(self): 

415 if self._root is None: 

416 return 

417 if Name.Outlines in self._pdf.Root: 

418 outlines = self._pdf.Root.Outlines 

419 else: 

420 self._pdf.Root.Outlines = outlines = self._pdf.make_indirect( 

421 Dictionary(Type=Name.Outlines) 

422 ) 

423 self._save_level_outline(outlines, self._root, 0, set()) 

424 

425 def _load(self): 

426 self._root = root = [] 

427 if Name.Outlines not in self._pdf.Root: 

428 return 

429 outlines = self._pdf.Root.Outlines or {} 

430 first_obj = outlines.get(Name.First) 

431 if first_obj: 

432 self._load_level_outline(first_obj, root, 0, set()) 

433 

434 def add(self, title: str, destination: Array | int | None) -> OutlineItem: 

435 """Add an item to the outline. 

436 

437 Arguments: 

438 title: Title of the outline item. 

439 destination: Destination to jump to when the item is selected. 

440 

441 Returns: 

442 The newly created :class:`OutlineItem`. 

443 """ 

444 if self._root is None: 

445 self._load() 

446 item = OutlineItem(title, destination) 

447 if self._root is None: 

448 self._root = [item] 

449 else: 

450 self._root.append(item) 

451 if not self._updating: 

452 self._save() 

453 return item 

454 

455 @property 

456 def root(self) -> list[OutlineItem]: 

457 """Return the root node of the outline.""" 

458 if self._root is None: 

459 self._load() 

460 return cast(list[OutlineItem], self._root) 

461 

462 @root.setter 

463 def root(self, new_root: list[OutlineItem]): 

464 """Set the root node of the outline.""" 

465 if not isinstance(new_root, list): 

466 raise ValueError("Root must be a list of OutlineItem objects.") 

467 for item in new_root: 

468 if not isinstance(item, OutlineItem): 

469 raise ValueError("Each item in root must be an OutlineItem.") 

470 

471 self._root = new_root