Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/pagerange.py: 34%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

76 statements  

1""" 

2Representation and utils for ranges of PDF file pages. 

3 

4Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>. 

5All rights reserved. This software is available under a BSD license; 

6see https://github.com/py-pdf/pypdf/blob/main/LICENSE 

7""" 

8 

9import re 

10from typing import Any, List, Tuple, Union 

11 

12from .errors import ParseError 

13 

14_INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0". 

15PAGE_RANGE_RE = f"^({_INT_RE}|({_INT_RE}?(:{_INT_RE}?(:{_INT_RE}?)?)))$" 

16# groups: 12 34 5 6 7 8 

17 

18 

19class PageRange: 

20 """ 

21 A slice-like representation of a range of page indices. 

22 

23 For example, page numbers, only starting at zero. 

24 

25 The syntax is like what you would put between brackets [ ]. 

26 The slice is one of the few Python types that can't be subclassed, 

27 but this class converts to and from slices, and allows similar use. 

28 

29 - PageRange(str) parses a string representing a page range. 

30 - PageRange(slice) directly "imports" a slice. 

31 - to_slice() gives the equivalent slice. 

32 - str() and repr() allow printing. 

33 - indices(n) is like slice.indices(n). 

34 """ 

35 

36 def __init__(self, arg: Union[slice, "PageRange", str]) -> None: 

37 """ 

38 Initialize with either a slice -- giving the equivalent page range, 

39 or a PageRange object -- making a copy, 

40 or a string like 

41 "int", "[int]:[int]" or "[int]:[int]:[int]", 

42 where the brackets indicate optional ints. 

43 Remember, page indices start with zero. 

44 Page range expression examples: 

45 

46 : all pages. -1 last page. 

47 22 just the 23rd page. :-1 all but the last page. 

48 0:3 the first three pages. -2 second-to-last page. 

49 :3 the first three pages. -2: last two pages. 

50 5: from the sixth page onward. -3:-1 third & second to last. 

51 The third, "stride" or "step" number is also recognized. 

52 ::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0. 

53 1:10:2 1 3 5 7 9 2::-1 2 1 0. 

54 ::-1 all pages in reverse order. 

55 Note the difference between this notation and arguments to slice(): 

56 slice(3) means the first three pages; 

57 PageRange("3") means the range of only the fourth page. 

58 However PageRange(slice(3)) means the first three pages. 

59 """ 

60 if isinstance(arg, slice): 

61 self._slice = arg 

62 return 

63 

64 if isinstance(arg, PageRange): 

65 self._slice = arg.to_slice() 

66 return 

67 

68 m = isinstance(arg, str) and re.match(PAGE_RANGE_RE, arg) 

69 if not m: 

70 raise ParseError(arg) 

71 if m.group(2): 

72 # Special case: just an int means a range of one page. 

73 start = int(m.group(2)) 

74 stop = start + 1 if start != -1 else None 

75 self._slice = slice(start, stop) 

76 else: 

77 self._slice = slice(*[int(g) if g else None for g in m.group(4, 6, 8)]) 

78 

79 @staticmethod 

80 def valid(input: Any) -> bool: 

81 """ 

82 True if input is a valid initializer for a PageRange. 

83 

84 Args: 

85 input: A possible PageRange string or a PageRange object. 

86 

87 Returns: 

88 True, if the ``input`` is a valid PageRange. 

89 

90 """ 

91 return isinstance(input, (slice, PageRange)) or ( 

92 isinstance(input, str) and bool(re.match(PAGE_RANGE_RE, input)) 

93 ) 

94 

95 def to_slice(self) -> slice: 

96 """Return the slice equivalent of this page range.""" 

97 return self._slice 

98 

99 def __str__(self) -> str: 

100 """A string like "1:2:3".""" 

101 s = self._slice 

102 indices: Union[Tuple[int, int], Tuple[int, int, int]] 

103 if s.step is None: 

104 if s.start is not None and s.stop == s.start + 1: 

105 return str(s.start) 

106 

107 indices = s.start, s.stop 

108 else: 

109 indices = s.start, s.stop, s.step 

110 return ":".join("" if i is None else str(i) for i in indices) 

111 

112 def __repr__(self) -> str: 

113 """A string like "PageRange('1:2:3')".""" 

114 return "PageRange(" + repr(str(self)) + ")" 

115 

116 def indices(self, n: int) -> Tuple[int, int, int]: 

117 """ 

118 Assuming a sequence of length n, calculate the start and stop indices, 

119 and the stride length of the PageRange. 

120 

121 See help(slice.indices). 

122 

123 Args: 

124 n: the length of the list of pages to choose from. 

125 

126 Returns: 

127 Arguments for range(). 

128 

129 """ 

130 return self._slice.indices(n) 

131 

132 def __eq__(self, other: object) -> bool: 

133 if not isinstance(other, PageRange): 

134 return False 

135 return self._slice == other._slice 

136 

137 def __hash__(self) -> int: 

138 return hash((self.__class__, (self._slice.start, self._slice.stop, self._slice.step))) 

139 

140 def __add__(self, other: "PageRange") -> "PageRange": 

141 if not isinstance(other, PageRange): 

142 raise TypeError(f"Can't add PageRange and {type(other)}") 

143 if self._slice.step is not None or other._slice.step is not None: 

144 raise ValueError("Can't add PageRange with stride") 

145 a = self._slice.start, self._slice.stop 

146 b = other._slice.start, other._slice.stop 

147 

148 if a[0] > b[0]: 

149 a, b = b, a 

150 

151 # Now a[0] is the smallest 

152 if b[0] > a[1]: 

153 # There is a gap between a and b. 

154 raise ValueError("Can't add PageRanges with gap") 

155 return PageRange(slice(a[0], max(a[1], b[1]))) 

156 

157 

158PAGE_RANGE_ALL = PageRange(":") # The range of all pages. 

159 

160 

161def parse_filename_page_ranges( 

162 args: List[Union[str, PageRange, None]] 

163) -> List[Tuple[str, PageRange]]: 

164 """ 

165 Given a list of filenames and page ranges, return a list of (filename, page_range) pairs. 

166 

167 Args: 

168 args: A list where the first element is a filename. The other elements are 

169 filenames, page-range expressions, slice objects, or PageRange objects. 

170 A filename not followed by a page range indicates all pages of the file. 

171 

172 Returns: 

173 A list of (filename, page_range) pairs. 

174 

175 """ 

176 pairs: List[Tuple[str, PageRange]] = [] 

177 pdf_filename: Union[str, None] = None 

178 did_page_range = False 

179 for arg in [*args, None]: 

180 if PageRange.valid(arg): 

181 if not pdf_filename: 

182 raise ValueError( 

183 "The first argument must be a filename, not a page range." 

184 ) 

185 

186 assert arg is not None 

187 pairs.append((pdf_filename, PageRange(arg))) 

188 did_page_range = True 

189 else: 

190 # New filename or end of list - use the complete previous file? 

191 if pdf_filename and not did_page_range: 

192 pairs.append((pdf_filename, PAGE_RANGE_ALL)) 

193 

194 assert not isinstance(arg, PageRange), arg 

195 pdf_filename = arg 

196 did_page_range = False 

197 return pairs 

198 

199 

200PageRangeSpec = Union[str, PageRange, Tuple[int, int], Tuple[int, int, int], List[int]]