1import itertools
2from collections.abc import Iterable
3from typing import Any
4
5from pdfminer import settings
6from pdfminer.pdfparser import PDFSyntaxError
7from pdfminer.pdftypes import dict_value, int_value, list_value
8from pdfminer.utils import choplist
9
10
11class NumberTree:
12 """A PDF number tree.
13
14 See Section 3.8.6 of the PDF Reference.
15 """
16
17 def __init__(self, obj: Any):
18 self._obj = dict_value(obj)
19 self.nums: Iterable[Any] | None = None
20 self.kids: Iterable[Any] | None = None
21 self.limits: Iterable[Any] | None = None
22
23 if "Nums" in self._obj:
24 self.nums = list_value(self._obj["Nums"])
25 if "Kids" in self._obj:
26 self.kids = list_value(self._obj["Kids"])
27 if "Limits" in self._obj:
28 self.limits = list_value(self._obj["Limits"])
29
30 def _parse(self) -> list[tuple[int, Any]]:
31 items = []
32 if self.nums: # Leaf node
33 for k, v in choplist(2, self.nums):
34 items.append((int_value(k), v))
35
36 if self.kids: # Root or intermediate node
37 for child_ref in self.kids:
38 items += NumberTree(child_ref)._parse()
39
40 return items
41
42 values: list[tuple[int, Any]] # workaround decorators unsupported by mypy
43
44 @property # type: ignore[no-redef,misc]
45 def values(self) -> list[tuple[int, Any]]:
46 values = self._parse()
47
48 if settings.STRICT:
49 if not all(a[0] <= b[0] for a, b in itertools.pairwise(values)):
50 raise PDFSyntaxError("Number tree elements are out of order")
51 else:
52 values.sort(key=lambda t: t[0])
53
54 return values