Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pikepdf/objects.py: 50%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

86 statements  

1# SPDX-FileCopyrightText: 2022 James R. Barlow 

2# SPDX-License-Identifier: MPL-2.0 

3 

4"""Provide classes to stand in for PDF objects. 

5 

6The purpose of these is to provide nice-looking classes to allow explicit 

7construction of PDF objects and more pythonic idioms and facilitate discovery 

8by documentation generators and linters. 

9 

10It's also a place to narrow the scope of input types to those more easily 

11converted to C++. 

12 

13There is some deliberate "smoke and mirrors" here: all of the objects are truly 

14instances of ``pikepdf.Object``, which is a variant container object. The 

15``__new__`` constructs a ``pikepdf.Object`` in each case, and the rest of the 

16class definition is present as an aide for code introspection. 

17""" 

18 

19from __future__ import annotations 

20 

21from collections.abc import Iterable, Mapping 

22 

23# pylint: disable=unused-import, abstract-method 

24from secrets import token_urlsafe 

25from typing import TYPE_CHECKING, Any, cast 

26 

27from pikepdf import _core 

28from pikepdf._core import Matrix, Object, ObjectType, Rectangle 

29 

30if TYPE_CHECKING: # pragma: no cover 

31 from pikepdf import Pdf 

32 

33# By default pikepdf.Object will identify itself as pikepdf._core.Object 

34# Here we change the module to discourage people from using that internal name 

35# Instead it will become pikepdf.objects.Object 

36Object.__module__ = __name__ 

37ObjectType.__module__ = __name__ 

38 

39 

40# type(Object) is the metaclass that pybind11 defines; we wish to extend that 

41# pylint cannot see the C++ metaclass definition and is thoroughly confused. 

42# pylint: disable=invalid-metaclass 

43 

44 

45class _ObjectMeta(type(Object)): # type: ignore 

46 """Support instance checking.""" 

47 

48 def __instancecheck__(self, instance: Any) -> bool: 

49 # Note: since this class is a metaclass, self is a class object 

50 if type(instance) is not Object: 

51 return False 

52 return self.object_type == instance._type_code 

53 

54 

55class _NameObjectMeta(_ObjectMeta): 

56 """Support usage pikepdf.Name.Whatever -> Name('/Whatever').""" 

57 

58 def __getattr__(self, attr: str) -> Name: 

59 if attr.startswith('_') or attr == 'object_type': 

60 return getattr(_ObjectMeta, attr) 

61 return Name('/' + attr) 

62 

63 def __setattr__(self, attr: str, value: Any) -> None: 

64 # No need for a symmetric .startswith('_'). To prevent user error, we 

65 # simply don't allow mucking with the pikepdf.Name class's attributes. 

66 # There is no reason to ever assign to them. 

67 raise AttributeError( 

68 "Attributes may not be set on pikepdf.Name. Perhaps you meant to " 

69 "modify a Dictionary rather than a Name?" 

70 ) 

71 

72 def __getitem__(self, item: str) -> None: 

73 if item.startswith('/'): 

74 item = item[1:] 

75 raise TypeError( 

76 "pikepdf.Name is not subscriptable. You probably meant:\n" 

77 f" pikepdf.Name.{item}\n" 

78 "or\n" 

79 f" pikepdf.Name('/{item}')\n" 

80 ) 

81 

82 

83class Name(Object, metaclass=_NameObjectMeta): 

84 """Construct a PDF Name object. 

85 

86 Names can be constructed with two notations: 

87 

88 1. ``Name.Resources`` 

89 

90 2. ``Name('/Resources')`` 

91 

92 The two are semantically equivalent. The former is preferred for names 

93 that are normally expected to be in a PDF. The latter is preferred for 

94 dynamic names and attributes. 

95 """ 

96 

97 object_type = ObjectType.name_ 

98 

99 def __new__(cls, name: str | Name) -> Name: 

100 """Construct a PDF Name.""" 

101 # QPDF_Name::unparse ensures that names are always saved in a UTF-8 

102 # compatible way, so we only need to guard the input. 

103 if isinstance(name, bytes): 

104 raise TypeError("Name should be str") 

105 if isinstance(name, Name): 

106 return name # Names are immutable so we can return a reference 

107 return _core._new_name(name) 

108 

109 @classmethod 

110 def random(cls, len_: int = 16, prefix: str = '') -> Name: 

111 """Generate a cryptographically strong, random, valid PDF Name. 

112 

113 If you are inserting a new name into a PDF (for example, 

114 name for a new image), you can use this function to generate a 

115 cryptographically strong random name that is almost certainly already 

116 not already in the PDF, and not colliding with other existing names. 

117 

118 This function uses Python's secrets.token_urlsafe, which returns a 

119 URL-safe encoded random number of the desired length. An optional 

120 *prefix* may be prepended. (The encoding is ultimately done with 

121 :func:`base64.urlsafe_b64encode`.) Serendipitously, URL-safe is also 

122 PDF-safe. 

123 

124 When the length parameter is 16 (16 random bytes or 128 bits), the result 

125 is probably globally unique and can be treated as never colliding with 

126 other names. 

127 

128 The length of the returned string may vary because it is encoded, 

129 but will always have ``8 * len_`` random bits. 

130 

131 Args: 

132 len_: The length of the random string. 

133 prefix: A prefix to prepend to the random string. 

134 """ 

135 random_string = token_urlsafe(len_) 

136 return _core._new_name(f"/{prefix}{random_string}") 

137 

138 

139class Operator(Object, metaclass=_ObjectMeta): 

140 """Construct an operator for use in a content stream. 

141 

142 An Operator is one of a limited set of commands that can appear in PDF content 

143 streams (roughly the mini-language that draws objects, lines and text on a 

144 virtual PDF canvas). The commands :func:`parse_content_stream` and 

145 :func:`unparse_content_stream` create and expect Operators respectively, along 

146 with their operands. 

147 

148 pikepdf uses the special Operator "INLINE IMAGE" to denote an inline image 

149 in a content stream. 

150 """ 

151 

152 object_type = ObjectType.operator 

153 

154 def __new__(cls, name: str) -> Operator: 

155 """Construct an operator.""" 

156 return cast('Operator', _core._new_operator(name)) 

157 

158 

159class String(Object, metaclass=_ObjectMeta): 

160 """Construct a PDF String object.""" 

161 

162 object_type = ObjectType.string 

163 

164 def __new__(cls, s: str | bytes) -> String: 

165 """Construct a PDF String. 

166 

167 Args: 

168 s: The string to use. String will be encoded for 

169 PDF, bytes will be constructed without encoding. 

170 """ 

171 if isinstance(s, bytes): 

172 return _core._new_string(s) 

173 return _core._new_string_utf8(s) 

174 

175 

176class Array(Object, metaclass=_ObjectMeta): 

177 """Construct a PDF Array object.""" 

178 

179 object_type = ObjectType.array 

180 

181 def __new__(cls, a: Iterable | Rectangle | Matrix | None = None) -> Array: 

182 """Construct a PDF Array. 

183 

184 Args: 

185 a: An iterable of objects. All objects must be either 

186 `pikepdf.Object` or convertible to `pikepdf.Object`. 

187 """ 

188 if isinstance(a, (str, bytes)): 

189 raise TypeError('Strings cannot be converted to arrays of chars') 

190 

191 if a is None: 

192 a = [] 

193 elif isinstance(a, (Rectangle, Matrix)): 

194 return a.as_array() 

195 elif isinstance(a, Array): 

196 return cast(Array, a.__copy__()) 

197 return _core._new_array(a) 

198 

199 

200class Dictionary(Object, metaclass=_ObjectMeta): 

201 """Construct a PDF Dictionary object.""" 

202 

203 object_type = ObjectType.dictionary 

204 

205 def __new__(cls, d: Mapping | None = None, **kwargs) -> Dictionary: 

206 """Construct a PDF Dictionary. 

207 

208 Works from either a Python ``dict`` or keyword arguments. 

209 

210 These two examples are equivalent: 

211 

212 .. code-block:: python 

213 

214 pikepdf.Dictionary({'/NameOne': 1, '/NameTwo': 'Two'}) 

215 

216 pikepdf.Dictionary(NameOne=1, NameTwo='Two') 

217 

218 In either case, the keys must be strings, and the strings 

219 correspond to the desired Names in the PDF Dictionary. The values 

220 must all be convertible to `pikepdf.Object`. 

221 """ 

222 if kwargs and d is not None: 

223 raise ValueError('Cannot use both a mapping object and keyword args') 

224 if kwargs: 

225 # Add leading slash 

226 # Allows Dictionary(MediaBox=(0,0,1,1), Type=Name('/Page')... 

227 return _core._new_dictionary({('/' + k): v for k, v in kwargs.items()}) 

228 if isinstance(d, Dictionary): 

229 # Already a dictionary 

230 return d.__copy__() 

231 if not d: 

232 d = {} 

233 if d and any(key == '/' or not key.startswith('/') for key in d.keys()): 

234 raise KeyError("Dictionary created from strings must begin with '/'") 

235 return _core._new_dictionary(d) 

236 

237 

238class Stream(Object, metaclass=_ObjectMeta): 

239 """Construct a PDF Stream object.""" 

240 

241 object_type = ObjectType.stream 

242 

243 def __new__(cls, owner: Pdf, data: bytes | None = None, d=None, **kwargs) -> Stream: 

244 """Create a new stream object. 

245 

246 Streams stores arbitrary binary data and may or may not be compressed. 

247 It also may or may not be a page or Form XObject's content stream. 

248 

249 A stream dictionary is like a pikepdf.Dictionary or Python dict, except 

250 it has a binary payload of data attached. The dictionary describes 

251 how the data is compressed or encoded. 

252 

253 The dictionary may be initialized just like pikepdf.Dictionary is initialized, 

254 using a mapping object or keyword arguments. 

255 

256 Args: 

257 owner: The Pdf to which this stream shall be attached. 

258 data: The data bytes for the stream. 

259 d: An optional mapping object that will be used to construct the stream's 

260 dictionary. 

261 kwargs: Keyword arguments that will define the stream dictionary. Do not set 

262 /Length here as pikepdf will manage this value. Set /Filter 

263 if the data is already encoded in some format. 

264 

265 Examples: 

266 Using kwargs: 

267 >>> pdf = pikepdf.Pdf.new() 

268 >>> s1 = pikepdf.Stream( 

269 ... pdf, 

270 ... b"uncompressed image data", 

271 ... BitsPerComponent=8, 

272 ... ColorSpace=pikepdf.Name.DeviceRGB, 

273 ... ) 

274 Using dict: 

275 >>> pdf = pikepdf.Pdf.new() 

276 >>> d = pikepdf.Dictionary(Key1=1, Key2=2) 

277 >>> s2 = pikepdf.Stream( 

278 ... pdf, 

279 ... b"data", 

280 ... d 

281 ... ) 

282 

283 .. versionchanged:: 2.2 

284 Support creation of ``pikepdf.Stream`` from existing dictionary. 

285 

286 .. versionchanged:: 3.0 

287 ``obj`` argument was removed; use ``data``. 

288 """ 

289 if data is None: 

290 raise TypeError("Must make Stream from binary data") 

291 

292 stream_dict = None 

293 if d or kwargs: 

294 stream_dict = Dictionary(d, **kwargs) 

295 

296 stream = _core._new_stream(owner, data) 

297 if stream_dict: 

298 stream.stream_dict = stream_dict 

299 return stream