1# SPDX-FileCopyrightText: 2022 James R. Barlow, 2020 Matthias Erll
2
3# SPDX-License-Identifier: MPL-2.0
4
5"""Support for document outlines (e.g. table of contents)."""
6
7from __future__ import annotations
8
9from collections.abc import Iterable
10from enum import Enum
11from itertools import chain
12from typing import cast
13
14from pikepdf._core import Page, Pdf
15from pikepdf.objects import Array, Dictionary, Name, Object, String
16
17
18class PageLocation(Enum):
19 """Page view location definitions, from PDF spec."""
20
21 XYZ = 1
22 Fit = 2
23 FitH = 3
24 FitV = 4
25 FitR = 5
26 FitB = 6
27 FitBH = 7
28 FitBV = 8
29
30
31PAGE_LOCATION_ARGS = {
32 PageLocation.XYZ: ('left', 'top', 'zoom'),
33 PageLocation.FitH: ('top',),
34 PageLocation.FitV: ('left',),
35 PageLocation.FitR: ('left', 'bottom', 'right', 'top'),
36 PageLocation.FitBH: ('top',),
37 PageLocation.FitBV: ('left',),
38}
39ALL_PAGE_LOCATION_KWARGS = set(chain.from_iterable(PAGE_LOCATION_ARGS.values()))
40
41
42def make_page_destination(
43 pdf: Pdf,
44 page_num: int,
45 page_location: PageLocation | str | None = None,
46 *,
47 left: float | None = None,
48 top: float | None = None,
49 right: float | None = None,
50 bottom: float | None = None,
51 zoom: float | None = None,
52) -> Array:
53 """Create a destination ``Array`` with reference to a Pdf document's page number.
54
55 Arguments:
56 pdf: PDF document object.
57 page_num: Page number (zero-based).
58 page_location: Optional page location, as a string or :enum:`PageLocation`.
59 left: Specify page viewport rectangle.
60 top: Specify page viewport rectangle.
61 right: Specify page viewport rectangle.
62 bottom: Specify page viewport rectangle.
63 zoom: Specify page viewport rectangle's zoom level.
64
65 left, top, right, bottom, zoom are used in conjunction with the page fit style
66 specified by *page_location*.
67 """
68 return _make_page_destination(
69 pdf,
70 page_num,
71 page_location=page_location,
72 left=left,
73 top=top,
74 right=right,
75 bottom=bottom,
76 zoom=zoom,
77 )
78
79
80def _make_page_destination(
81 pdf: Pdf,
82 page_num: int,
83 page_location: PageLocation | str | None = None,
84 **kwargs,
85) -> Array:
86 kwargs = {k: v for k, v in kwargs.items() if v is not None}
87
88 res: list[Dictionary | Name] = [pdf.pages[page_num].obj]
89 if page_location:
90 if isinstance(page_location, PageLocation):
91 loc_key = page_location
92 loc_str = loc_key.name
93 else:
94 loc_str = page_location
95 try:
96 loc_key = PageLocation[loc_str]
97 except KeyError:
98 raise ValueError(
99 f"Invalid or unsupported page location type {loc_str}"
100 ) from None
101 res.append(Name(f'/{loc_str}'))
102 dest_arg_names = PAGE_LOCATION_ARGS.get(loc_key)
103 if dest_arg_names:
104 res.extend(kwargs.get(k, 0) for k in dest_arg_names)
105 else:
106 res.append(Name.Fit)
107 return Array(res)
108
109
110class OutlineStructureError(Exception):
111 """Indicates an error in the outline data structure."""
112
113
114class OutlineItem:
115 """Manage a single item in a PDF document outlines structure.
116
117 Includes nested items.
118
119 Arguments:
120 title: Title of the outlines item.
121 destination: Page number, destination name, or any other PDF object
122 to be used as a reference when clicking on the outlines entry. Note
123 this should be ``None`` if an action is used instead. If set to a
124 page number, it will be resolved to a reference at the time of
125 writing the outlines back to the document.
126 page_location: Supplemental page location for a page number
127 in ``destination``, e.g. ``PageLocation.Fit``. May also be
128 a simple string such as ``'FitH'``.
129 action: Action to perform when clicking on this item. Will be ignored
130 during writing if ``destination`` is also set.
131 obj: ``Dictionary`` object representing this outlines item in a ``Pdf``.
132 May be ``None`` for creating a new object. If present, an existing
133 object is modified in-place during writing and original attributes
134 are retained.
135 left, top, bottom, right, zoom: Describes the viewport position associated
136 with a destination.
137
138 This object does not contain any information about higher-level or
139 neighboring elements.
140
141 Valid destination arrays:
142 [page /XYZ left top zoom]
143 generally
144 [page, PageLocationEntry, 0 to 4 ints]
145 """
146
147 def __init__(
148 self,
149 title: str,
150 destination: Array | String | Name | int | None = None,
151 page_location: PageLocation | str | None = None,
152 action: Dictionary | None = None,
153 obj: Dictionary | None = None,
154 *,
155 left: float | None = None,
156 top: float | None = None,
157 right: float | None = None,
158 bottom: float | None = None,
159 zoom: float | None = None,
160 ):
161 """Initialize OutlineItem."""
162 self.title = title
163 self.destination = destination
164 self.page_location = page_location
165 self.page_location_kwargs = {}
166 self.action = action
167 if self.destination is not None and self.action is not None:
168 raise ValueError("Only one of destination and action may be set")
169 self.obj = obj
170 kwargs = dict(left=left, top=top, right=right, bottom=bottom, zoom=zoom)
171 self.page_location_kwargs = {k: v for k, v in kwargs.items() if v is not None}
172 self.is_closed = False
173 self.children: list[OutlineItem] = []
174
175 def __str__(self):
176 if self.children:
177 if self.is_closed:
178 oc_indicator = '[+]'
179 else:
180 oc_indicator = '[-]'
181 else:
182 oc_indicator = '[ ]'
183 if self.destination is not None:
184 if isinstance(self.destination, Array):
185 # 12.3.2.2 Explicit destination
186 # [raw_page, /PageLocation.SomeThing, integer parameters for viewport]
187 raw_page = self.destination[0]
188 page = Page(raw_page)
189 dest = page.label
190 elif isinstance(self.destination, String):
191 # 12.3.2.2 Named destination, byte string reference to Names
192 dest = (
193 f"<Named Destination in document .Root.Names dictionary: "
194 f"{self.destination}>"
195 )
196 elif isinstance(self.destination, Name):
197 # 12.3.2.2 Named destination, name object (PDF 1.1)
198 dest = (
199 f"<Named Destination in document .Root.Dests dictionary: "
200 f"{self.destination}>"
201 )
202 elif isinstance(self.destination, int):
203 # Page number
204 dest = f'<Page {self.destination}>'
205 else:
206 dest = '<Action>'
207 return f'{oc_indicator} {self.title} -> {dest}'
208
209 def __repr__(self):
210 return f'<pikepdf.{self.__class__.__name__}: "{self.title}">'
211
212 @classmethod
213 def from_dictionary_object(cls, obj: Dictionary):
214 """Create a ``OutlineItem`` from a ``Dictionary``.
215
216 Does not process nested items.
217
218 Arguments:
219 obj: ``Dictionary`` object representing a single outline node.
220 """
221 title = str(obj.Title)
222 destination = obj.get(Name.Dest)
223 if destination is not None and not isinstance(
224 destination, (Array, String, Name)
225 ):
226 # 12.3.3: /Dest may be a name, byte string or array
227 raise OutlineStructureError(
228 f"Unexpected object type in Outline's /Dest: {destination!r}"
229 )
230 action = obj.get(Name.A)
231 if action is not None and not isinstance(action, Dictionary):
232 raise OutlineStructureError(
233 f"Unexpected object type in Outline's /A: {action!r}"
234 )
235 return cls(title, destination=destination, action=action, obj=obj)
236
237 def to_dictionary_object(self, pdf: Pdf, create_new: bool = False) -> Dictionary:
238 """Create/update a ``Dictionary`` object from this outline node.
239
240 Page numbers are resolved to a page reference on the input
241 ``Pdf`` object.
242
243 Arguments:
244 pdf: PDF document object.
245 create_new: If set to ``True``, creates a new object instead of
246 modifying an existing one in-place.
247 """
248 if create_new or self.obj is None:
249 self.obj = obj = pdf.make_indirect(Dictionary())
250 else:
251 obj = self.obj
252 obj.Title = self.title
253 if self.destination is not None:
254 if isinstance(self.destination, int):
255 self.destination = make_page_destination(
256 pdf,
257 self.destination,
258 self.page_location,
259 **self.page_location_kwargs,
260 )
261 obj.Dest = self.destination
262 if Name.A in obj:
263 del obj.A
264 elif self.action is not None:
265 obj.A = self.action
266 if Name.Dest in obj:
267 del obj.Dest
268 return obj
269
270
271class Outline:
272 """Maintains a intuitive interface for creating and editing PDF document outlines.
273
274 See {{ pdfrm }} section 12.3.
275
276 Arguments:
277 pdf: PDF document object.
278 max_depth: Maximum recursion depth to consider when reading the outline.
279 strict: If set to ``False`` (default) silently ignores structural errors.
280 Setting it to ``True`` raises a
281 :class:`pikepdf.OutlineStructureError`
282 if any object references re-occur while the outline is being read or
283 written.
284
285 See Also:
286 :meth:`pikepdf.Pdf.open_outline`
287 """
288
289 def __init__(self, pdf: Pdf, max_depth: int = 15, strict: bool = False):
290 """Initialize Outline."""
291 self._root: list[OutlineItem] | None = None
292 self._pdf = pdf
293 self._max_depth = max_depth
294 self._strict = strict
295 self._updating = False
296
297 def __str__(self):
298 return str(self.root)
299
300 def __repr__(self):
301 return f'<pikepdf.{self.__class__.__name__}: {len(self.root)} items>'
302
303 def _repr_pretty_(self, p, cycle):
304 if cycle:
305 p.text("...")
306 else:
307 with p.group(2, "pikepdf.models.outlines.Outline<\n", "\n>"):
308 for _, item in enumerate(self.root):
309 p.breakable()
310 p.pretty(str(item))
311
312 def __enter__(self):
313 self._updating = True
314 return self
315
316 def __exit__(self, exc_type, exc_val, exc_tb):
317 try:
318 if exc_type is not None:
319 return
320 self._save()
321 finally:
322 self._updating = False
323
324 def _save_level_outline(
325 self,
326 parent: Dictionary,
327 outline_items: Iterable[OutlineItem],
328 level: int,
329 visited_objs: set[tuple[int, int]],
330 ):
331 count = 0
332 prev: Dictionary | None = None
333 first: Dictionary | None = None
334 for item in outline_items:
335 out_obj = item.to_dictionary_object(self._pdf)
336 objgen = out_obj.objgen
337 if objgen in visited_objs:
338 if self._strict:
339 raise OutlineStructureError(
340 f"Outline object {objgen} reoccurred in structure"
341 )
342 out_obj = item.to_dictionary_object(self._pdf, create_new=True)
343 else:
344 visited_objs.add(objgen)
345
346 out_obj.Parent = parent
347 count += 1
348 if prev is not None:
349 prev.Next = out_obj
350 out_obj.Prev = prev
351 else:
352 first = out_obj
353 if Name.Prev in out_obj:
354 del out_obj.Prev
355 prev = out_obj
356 if level < self._max_depth:
357 sub_items: Iterable[OutlineItem] = item.children
358 else:
359 sub_items = ()
360 self._save_level_outline(out_obj, sub_items, level + 1, visited_objs)
361 if item.is_closed:
362 out_obj.Count = -cast(int, out_obj.Count)
363 else:
364 count += cast(int, out_obj.Count)
365 if count:
366 assert prev is not None and first is not None
367 if Name.Next in prev:
368 del prev.Next
369 parent.First = first
370 parent.Last = prev
371 else:
372 if Name.First in parent:
373 del parent.First
374 if Name.Last in parent:
375 del parent.Last
376 parent.Count = count
377
378 def _load_level_outline(
379 self,
380 first_obj: Dictionary,
381 outline_items: list[Object],
382 level: int,
383 visited_objs: set[tuple[int, int]],
384 ):
385 current_obj: Dictionary | None = first_obj
386 while current_obj:
387 objgen = current_obj.objgen
388 if objgen in visited_objs:
389 if self._strict:
390 raise OutlineStructureError(
391 f"Outline object {objgen} reoccurred in structure"
392 )
393 return
394 visited_objs.add(objgen)
395
396 item = OutlineItem.from_dictionary_object(current_obj)
397 first_child = current_obj.get(Name.First)
398 if isinstance(first_child, Dictionary) and level < self._max_depth:
399 self._load_level_outline(
400 first_child, item.children, level + 1, visited_objs
401 )
402 count = current_obj.get(Name.Count)
403 if isinstance(count, int) and count < 0:
404 item.is_closed = True
405 outline_items.append(item)
406 next_obj = current_obj.get(Name.Next)
407 if next_obj is None or isinstance(next_obj, Dictionary):
408 current_obj = next_obj
409 else:
410 raise OutlineStructureError(
411 f"Outline object {objgen} points to non-dictionary"
412 )
413
414 def _save(self):
415 if self._root is None:
416 return
417 if Name.Outlines in self._pdf.Root:
418 outlines = self._pdf.Root.Outlines
419 else:
420 self._pdf.Root.Outlines = outlines = self._pdf.make_indirect(
421 Dictionary(Type=Name.Outlines)
422 )
423 self._save_level_outline(outlines, self._root, 0, set())
424
425 def _load(self):
426 self._root = root = []
427 if Name.Outlines not in self._pdf.Root:
428 return
429 outlines = self._pdf.Root.Outlines or {}
430 first_obj = outlines.get(Name.First)
431 if first_obj:
432 self._load_level_outline(first_obj, root, 0, set())
433
434 def add(self, title: str, destination: Array | int | None) -> OutlineItem:
435 """Add an item to the outline.
436
437 Arguments:
438 title: Title of the outline item.
439 destination: Destination to jump to when the item is selected.
440
441 Returns:
442 The newly created :class:`OutlineItem`.
443 """
444 if self._root is None:
445 self._load()
446 item = OutlineItem(title, destination)
447 if self._root is None:
448 self._root = [item]
449 else:
450 self._root.append(item)
451 if not self._updating:
452 self._save()
453 return item
454
455 @property
456 def root(self) -> list[OutlineItem]:
457 """Return the root node of the outline."""
458 if self._root is None:
459 self._load()
460 return cast(list[OutlineItem], self._root)
461
462 @root.setter
463 def root(self, new_root: list[OutlineItem]):
464 """Set the root node of the outline."""
465 if not isinstance(new_root, list):
466 raise ValueError("Root must be a list of OutlineItem objects.")
467 for item in new_root:
468 if not isinstance(item, OutlineItem):
469 raise ValueError("Each item in root must be an OutlineItem.")
470
471 self._root = new_root