Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/pagerange.py: 34%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Representation and utils for ranges of PDF file pages.
4Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
5All rights reserved. This software is available under a BSD license;
6see https://github.com/py-pdf/pypdf/blob/main/LICENSE
7"""
9import re
10from typing import Any, List, Tuple, Union
12from .errors import ParseError
14_INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0".
15PAGE_RANGE_RE = f"^({_INT_RE}|({_INT_RE}?(:{_INT_RE}?(:{_INT_RE}?)?)))$"
16# groups: 12 34 5 6 7 8
19class PageRange:
20 """
21 A slice-like representation of a range of page indices.
23 For example, page numbers, only starting at zero.
25 The syntax is like what you would put between brackets [ ].
26 The slice is one of the few Python types that can't be subclassed,
27 but this class converts to and from slices, and allows similar use.
29 - PageRange(str) parses a string representing a page range.
30 - PageRange(slice) directly "imports" a slice.
31 - to_slice() gives the equivalent slice.
32 - str() and repr() allow printing.
33 - indices(n) is like slice.indices(n).
34 """
36 def __init__(self, arg: Union[slice, "PageRange", str]) -> None:
37 """
38 Initialize with either a slice -- giving the equivalent page range,
39 or a PageRange object -- making a copy,
40 or a string like
41 "int", "[int]:[int]" or "[int]:[int]:[int]",
42 where the brackets indicate optional ints.
43 Remember, page indices start with zero.
44 Page range expression examples:
46 : all pages. -1 last page.
47 22 just the 23rd page. :-1 all but the last page.
48 0:3 the first three pages. -2 second-to-last page.
49 :3 the first three pages. -2: last two pages.
50 5: from the sixth page onward. -3:-1 third & second to last.
51 The third, "stride" or "step" number is also recognized.
52 ::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
53 1:10:2 1 3 5 7 9 2::-1 2 1 0.
54 ::-1 all pages in reverse order.
55 Note the difference between this notation and arguments to slice():
56 slice(3) means the first three pages;
57 PageRange("3") means the range of only the fourth page.
58 However PageRange(slice(3)) means the first three pages.
59 """
60 if isinstance(arg, slice):
61 self._slice = arg
62 return
64 if isinstance(arg, PageRange):
65 self._slice = arg.to_slice()
66 return
68 m = isinstance(arg, str) and re.match(PAGE_RANGE_RE, arg)
69 if not m:
70 raise ParseError(arg)
71 if m.group(2):
72 # Special case: just an int means a range of one page.
73 start = int(m.group(2))
74 stop = start + 1 if start != -1 else None
75 self._slice = slice(start, stop)
76 else:
77 self._slice = slice(*[int(g) if g else None for g in m.group(4, 6, 8)])
79 @staticmethod
80 def valid(input: Any) -> bool:
81 """
82 True if input is a valid initializer for a PageRange.
84 Args:
85 input: A possible PageRange string or a PageRange object.
87 Returns:
88 True, if the ``input`` is a valid PageRange.
90 """
91 return isinstance(input, (slice, PageRange)) or (
92 isinstance(input, str) and bool(re.match(PAGE_RANGE_RE, input))
93 )
95 def to_slice(self) -> slice:
96 """Return the slice equivalent of this page range."""
97 return self._slice
99 def __str__(self) -> str:
100 """A string like "1:2:3"."""
101 s = self._slice
102 indices: Union[Tuple[int, int], Tuple[int, int, int]]
103 if s.step is None:
104 if s.start is not None and s.stop == s.start + 1:
105 return str(s.start)
107 indices = s.start, s.stop
108 else:
109 indices = s.start, s.stop, s.step
110 return ":".join("" if i is None else str(i) for i in indices)
112 def __repr__(self) -> str:
113 """A string like "PageRange('1:2:3')"."""
114 return "PageRange(" + repr(str(self)) + ")"
116 def indices(self, n: int) -> Tuple[int, int, int]:
117 """
118 Assuming a sequence of length n, calculate the start and stop indices,
119 and the stride length of the PageRange.
121 See help(slice.indices).
123 Args:
124 n: the length of the list of pages to choose from.
126 Returns:
127 Arguments for range().
129 """
130 return self._slice.indices(n)
132 def __eq__(self, other: object) -> bool:
133 if not isinstance(other, PageRange):
134 return False
135 return self._slice == other._slice
137 def __hash__(self) -> int:
138 return hash((self.__class__, (self._slice.start, self._slice.stop, self._slice.step)))
140 def __add__(self, other: "PageRange") -> "PageRange":
141 if not isinstance(other, PageRange):
142 raise TypeError(f"Can't add PageRange and {type(other)}")
143 if self._slice.step is not None or other._slice.step is not None:
144 raise ValueError("Can't add PageRange with stride")
145 a = self._slice.start, self._slice.stop
146 b = other._slice.start, other._slice.stop
148 if a[0] > b[0]:
149 a, b = b, a
151 # Now a[0] is the smallest
152 if b[0] > a[1]:
153 # There is a gap between a and b.
154 raise ValueError("Can't add PageRanges with gap")
155 return PageRange(slice(a[0], max(a[1], b[1])))
158PAGE_RANGE_ALL = PageRange(":") # The range of all pages.
161def parse_filename_page_ranges(
162 args: List[Union[str, PageRange, None]]
163) -> List[Tuple[str, PageRange]]:
164 """
165 Given a list of filenames and page ranges, return a list of (filename, page_range) pairs.
167 Args:
168 args: A list where the first element is a filename. The other elements are
169 filenames, page-range expressions, slice objects, or PageRange objects.
170 A filename not followed by a page range indicates all pages of the file.
172 Returns:
173 A list of (filename, page_range) pairs.
175 """
176 pairs: List[Tuple[str, PageRange]] = []
177 pdf_filename: Union[str, None] = None
178 did_page_range = False
179 for arg in [*args, None]:
180 if PageRange.valid(arg):
181 if not pdf_filename:
182 raise ValueError(
183 "The first argument must be a filename, not a page range."
184 )
186 assert arg is not None
187 pairs.append((pdf_filename, PageRange(arg)))
188 did_page_range = True
189 else:
190 # New filename or end of list - use the complete previous file?
191 if pdf_filename and not did_page_range:
192 pairs.append((pdf_filename, PAGE_RANGE_ALL))
194 assert not isinstance(arg, PageRange), arg
195 pdf_filename = arg
196 did_page_range = False
197 return pairs
200PageRangeSpec = Union[str, PageRange, Tuple[int, int], Tuple[int, int, int], List[int]]