Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pikepdf/objects.py: 50%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# SPDX-FileCopyrightText: 2022 James R. Barlow
2# SPDX-License-Identifier: MPL-2.0
4"""Provide classes to stand in for PDF objects.
6The purpose of these is to provide nice-looking classes to allow explicit
7construction of PDF objects and more pythonic idioms and facilitate discovery
8by documentation generators and linters.
10It's also a place to narrow the scope of input types to those more easily
11converted to C++.
13There is some deliberate "smoke and mirrors" here: all of the objects are truly
14instances of ``pikepdf.Object``, which is a variant container object. The
15``__new__`` constructs a ``pikepdf.Object`` in each case, and the rest of the
16class definition is present as an aide for code introspection.
17"""
19from __future__ import annotations
21from collections.abc import Iterable, Mapping
23# pylint: disable=unused-import, abstract-method
24from secrets import token_urlsafe
25from typing import TYPE_CHECKING, Any, cast
27from pikepdf import _core
28from pikepdf._core import Matrix, Object, ObjectType, Rectangle
30if TYPE_CHECKING: # pragma: no cover
31 from pikepdf import Pdf
33# By default pikepdf.Object will identify itself as pikepdf._core.Object
34# Here we change the module to discourage people from using that internal name
35# Instead it will become pikepdf.objects.Object
36Object.__module__ = __name__
37ObjectType.__module__ = __name__
40# type(Object) is the metaclass that pybind11 defines; we wish to extend that
41# pylint cannot see the C++ metaclass definition and is thoroughly confused.
42# pylint: disable=invalid-metaclass
45class _ObjectMeta(type(Object)): # type: ignore
46 """Support instance checking."""
48 def __instancecheck__(self, instance: Any) -> bool:
49 # Note: since this class is a metaclass, self is a class object
50 if type(instance) is not Object:
51 return False
52 return self.object_type == instance._type_code
55class _NameObjectMeta(_ObjectMeta):
56 """Support usage pikepdf.Name.Whatever -> Name('/Whatever')."""
58 def __getattr__(self, attr: str) -> Name:
59 if attr.startswith('_') or attr == 'object_type':
60 return getattr(_ObjectMeta, attr)
61 return Name('/' + attr)
63 def __setattr__(self, attr: str, value: Any) -> None:
64 # No need for a symmetric .startswith('_'). To prevent user error, we
65 # simply don't allow mucking with the pikepdf.Name class's attributes.
66 # There is no reason to ever assign to them.
67 raise AttributeError(
68 "Attributes may not be set on pikepdf.Name. Perhaps you meant to "
69 "modify a Dictionary rather than a Name?"
70 )
72 def __getitem__(self, item: str) -> None:
73 if item.startswith('/'):
74 item = item[1:]
75 raise TypeError(
76 "pikepdf.Name is not subscriptable. You probably meant:\n"
77 f" pikepdf.Name.{item}\n"
78 "or\n"
79 f" pikepdf.Name('/{item}')\n"
80 )
83class Name(Object, metaclass=_NameObjectMeta):
84 """Construct a PDF Name object.
86 Names can be constructed with two notations:
88 1. ``Name.Resources``
90 2. ``Name('/Resources')``
92 The two are semantically equivalent. The former is preferred for names
93 that are normally expected to be in a PDF. The latter is preferred for
94 dynamic names and attributes.
95 """
97 object_type = ObjectType.name_
99 def __new__(cls, name: str | Name) -> Name:
100 """Construct a PDF Name."""
101 # QPDF_Name::unparse ensures that names are always saved in a UTF-8
102 # compatible way, so we only need to guard the input.
103 if isinstance(name, bytes):
104 raise TypeError("Name should be str")
105 if isinstance(name, Name):
106 return name # Names are immutable so we can return a reference
107 return _core._new_name(name)
109 @classmethod
110 def random(cls, len_: int = 16, prefix: str = '') -> Name:
111 """Generate a cryptographically strong, random, valid PDF Name.
113 If you are inserting a new name into a PDF (for example,
114 name for a new image), you can use this function to generate a
115 cryptographically strong random name that is almost certainly already
116 not already in the PDF, and not colliding with other existing names.
118 This function uses Python's secrets.token_urlsafe, which returns a
119 URL-safe encoded random number of the desired length. An optional
120 *prefix* may be prepended. (The encoding is ultimately done with
121 :func:`base64.urlsafe_b64encode`.) Serendipitously, URL-safe is also
122 PDF-safe.
124 When the length parameter is 16 (16 random bytes or 128 bits), the result
125 is probably globally unique and can be treated as never colliding with
126 other names.
128 The length of the returned string may vary because it is encoded,
129 but will always have ``8 * len_`` random bits.
131 Args:
132 len_: The length of the random string.
133 prefix: A prefix to prepend to the random string.
134 """
135 random_string = token_urlsafe(len_)
136 return _core._new_name(f"/{prefix}{random_string}")
139class Operator(Object, metaclass=_ObjectMeta):
140 """Construct an operator for use in a content stream.
142 An Operator is one of a limited set of commands that can appear in PDF content
143 streams (roughly the mini-language that draws objects, lines and text on a
144 virtual PDF canvas). The commands :func:`parse_content_stream` and
145 :func:`unparse_content_stream` create and expect Operators respectively, along
146 with their operands.
148 pikepdf uses the special Operator "INLINE IMAGE" to denote an inline image
149 in a content stream.
150 """
152 object_type = ObjectType.operator
154 def __new__(cls, name: str) -> Operator:
155 """Construct an operator."""
156 return cast('Operator', _core._new_operator(name))
159class String(Object, metaclass=_ObjectMeta):
160 """Construct a PDF String object."""
162 object_type = ObjectType.string
164 def __new__(cls, s: str | bytes) -> String:
165 """Construct a PDF String.
167 Args:
168 s: The string to use. String will be encoded for
169 PDF, bytes will be constructed without encoding.
170 """
171 if isinstance(s, bytes):
172 return _core._new_string(s)
173 return _core._new_string_utf8(s)
176class Array(Object, metaclass=_ObjectMeta):
177 """Construct a PDF Array object."""
179 object_type = ObjectType.array
181 def __new__(cls, a: Iterable | Rectangle | Matrix | None = None) -> Array:
182 """Construct a PDF Array.
184 Args:
185 a: An iterable of objects. All objects must be either
186 `pikepdf.Object` or convertible to `pikepdf.Object`.
187 """
188 if isinstance(a, (str, bytes)):
189 raise TypeError('Strings cannot be converted to arrays of chars')
191 if a is None:
192 a = []
193 elif isinstance(a, (Rectangle, Matrix)):
194 return a.as_array()
195 elif isinstance(a, Array):
196 return cast(Array, a.__copy__())
197 return _core._new_array(a)
200class Dictionary(Object, metaclass=_ObjectMeta):
201 """Construct a PDF Dictionary object."""
203 object_type = ObjectType.dictionary
205 def __new__(cls, d: Mapping | None = None, **kwargs) -> Dictionary:
206 """Construct a PDF Dictionary.
208 Works from either a Python ``dict`` or keyword arguments.
210 These two examples are equivalent:
212 .. code-block:: python
214 pikepdf.Dictionary({'/NameOne': 1, '/NameTwo': 'Two'})
216 pikepdf.Dictionary(NameOne=1, NameTwo='Two')
218 In either case, the keys must be strings, and the strings
219 correspond to the desired Names in the PDF Dictionary. The values
220 must all be convertible to `pikepdf.Object`.
221 """
222 if kwargs and d is not None:
223 raise ValueError('Cannot use both a mapping object and keyword args')
224 if kwargs:
225 # Add leading slash
226 # Allows Dictionary(MediaBox=(0,0,1,1), Type=Name('/Page')...
227 return _core._new_dictionary({('/' + k): v for k, v in kwargs.items()})
228 if isinstance(d, Dictionary):
229 # Already a dictionary
230 return d.__copy__()
231 if not d:
232 d = {}
233 if d and any(key == '/' or not key.startswith('/') for key in d.keys()):
234 raise KeyError("Dictionary created from strings must begin with '/'")
235 return _core._new_dictionary(d)
238class Stream(Object, metaclass=_ObjectMeta):
239 """Construct a PDF Stream object."""
241 object_type = ObjectType.stream
243 def __new__(cls, owner: Pdf, data: bytes | None = None, d=None, **kwargs) -> Stream:
244 """Create a new stream object.
246 Streams stores arbitrary binary data and may or may not be compressed.
247 It also may or may not be a page or Form XObject's content stream.
249 A stream dictionary is like a pikepdf.Dictionary or Python dict, except
250 it has a binary payload of data attached. The dictionary describes
251 how the data is compressed or encoded.
253 The dictionary may be initialized just like pikepdf.Dictionary is initialized,
254 using a mapping object or keyword arguments.
256 Args:
257 owner: The Pdf to which this stream shall be attached.
258 data: The data bytes for the stream.
259 d: An optional mapping object that will be used to construct the stream's
260 dictionary.
261 kwargs: Keyword arguments that will define the stream dictionary. Do not set
262 /Length here as pikepdf will manage this value. Set /Filter
263 if the data is already encoded in some format.
265 Examples:
266 Using kwargs:
267 >>> pdf = pikepdf.Pdf.new()
268 >>> s1 = pikepdf.Stream(
269 ... pdf,
270 ... b"uncompressed image data",
271 ... BitsPerComponent=8,
272 ... ColorSpace=pikepdf.Name.DeviceRGB,
273 ... )
274 Using dict:
275 >>> pdf = pikepdf.Pdf.new()
276 >>> d = pikepdf.Dictionary(Key1=1, Key2=2)
277 >>> s2 = pikepdf.Stream(
278 ... pdf,
279 ... b"data",
280 ... d
281 ... )
283 .. versionchanged:: 2.2
284 Support creation of ``pikepdf.Stream`` from existing dictionary.
286 .. versionchanged:: 3.0
287 ``obj`` argument was removed; use ``data``.
288 """
289 if data is None:
290 raise TypeError("Must make Stream from binary data")
292 stream_dict = None
293 if d or kwargs:
294 stream_dict = Dictionary(d, **kwargs)
296 stream = _core._new_stream(owner, data)
297 if stream_dict:
298 stream.stream_dict = stream_dict
299 return stream