Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pikepdf/objects.py: 47%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# SPDX-FileCopyrightText: 2022 James R. Barlow
2# SPDX-License-Identifier: MPL-2.0
4"""Provide classes to stand in for PDF objects.
6The purpose of these is to provide nice-looking classes to allow explicit
7construction of PDF objects and more pythonic idioms and facilitate discovery
8by documentation generators and linters.
10It's also a place to narrow the scope of input types to those more easily
11converted to C++.
13There is some deliberate "smoke and mirrors" here: all of the objects are truly
14instances of ``pikepdf.Object``, which is a variant container object. The
15``__new__`` constructs a ``pikepdf.Object`` in each case, and the rest of the
16class definition is present as an aide for code introspection.
17"""
19from __future__ import annotations
21from collections.abc import Iterable, Mapping
22from decimal import Decimal
24# pylint: disable=unused-import, abstract-method
25from secrets import token_urlsafe
26from typing import TYPE_CHECKING, Any, cast
28from pikepdf import _core
29from pikepdf._core import Matrix, Object, ObjectType, Rectangle
31if TYPE_CHECKING: # pragma: no cover
32 from pikepdf import Pdf
34# By default pikepdf.Object will identify itself as pikepdf._core.Object
35# Here we change the module to discourage people from using that internal name
36# Instead it will become pikepdf.objects.Object
37Object.__module__ = __name__
38ObjectType.__module__ = __name__
41# type(Object) is the metaclass that pybind11 defines; we wish to extend that
42# pylint cannot see the C++ metaclass definition and is thoroughly confused.
43# pylint: disable=invalid-metaclass
46class _ObjectMeta(type(Object)): # type: ignore
47 """Support instance checking."""
49 object_type: ObjectType
51 def __instancecheck__(self, instance: Any) -> bool:
52 # Note: since this class is a metaclass, self is a class object
53 if type(instance) is not Object:
54 return False
55 return self.object_type == instance._type_code
58class _NameObjectMeta(_ObjectMeta):
59 """Support usage pikepdf.Name.Whatever -> Name('/Whatever')."""
61 def __getattr__(self, attr: str) -> Name:
62 if attr.startswith('_') or attr == 'object_type':
63 return getattr(_ObjectMeta, attr)
64 return Name('/' + attr)
66 def __setattr__(self, attr: str, value: Any) -> None:
67 # No need for a symmetric .startswith('_'). To prevent user error, we
68 # simply don't allow mucking with the pikepdf.Name class's attributes.
69 # There is no reason to ever assign to them.
70 raise AttributeError(
71 "Attributes may not be set on pikepdf.Name. Perhaps you meant to "
72 "modify a Dictionary rather than a Name?"
73 )
75 def __getitem__(self, item: str) -> None:
76 if item.startswith('/'):
77 item = item[1:]
78 raise TypeError(
79 "pikepdf.Name is not subscriptable. You probably meant:\n"
80 f" pikepdf.Name.{item}\n"
81 "or\n"
82 f" pikepdf.Name('/{item}')\n"
83 )
86class Name(Object, metaclass=_NameObjectMeta):
87 """Construct a PDF Name object.
89 Names can be constructed with two notations:
91 1. ``Name.Resources``
93 2. ``Name('/Resources')``
95 The two are semantically equivalent. The former is preferred for names
96 that are normally expected to be in a PDF. The latter is preferred for
97 dynamic names and attributes.
98 """
100 object_type = ObjectType.name_
102 def __new__(cls, name: str | Name) -> Name:
103 """Construct a PDF Name."""
104 # QPDF_Name::unparse ensures that names are always saved in a UTF-8
105 # compatible way, so we only need to guard the input.
106 if isinstance(name, bytes):
107 raise TypeError("Name should be str")
108 if isinstance(name, Name):
109 return name # Names are immutable so we can return a reference
110 return _core._new_name(name)
112 @classmethod
113 def random(cls, len_: int = 16, prefix: str = '') -> Name:
114 """Generate a cryptographically strong, random, valid PDF Name.
116 If you are inserting a new name into a PDF (for example,
117 name for a new image), you can use this function to generate a
118 cryptographically strong random name that is almost certainly already
119 not already in the PDF, and not colliding with other existing names.
121 This function uses Python's secrets.token_urlsafe, which returns a
122 URL-safe encoded random number of the desired length. An optional
123 *prefix* may be prepended. (The encoding is ultimately done with
124 :func:`base64.urlsafe_b64encode`.) Serendipitously, URL-safe is also
125 PDF-safe.
127 When the length parameter is 16 (16 random bytes or 128 bits), the result
128 is probably globally unique and can be treated as never colliding with
129 other names.
131 The length of the returned string may vary because it is encoded,
132 but will always have ``8 * len_`` random bits.
134 Args:
135 len_: The length of the random string.
136 prefix: A prefix to prepend to the random string.
137 """
138 random_string = token_urlsafe(len_)
139 return _core._new_name(f"/{prefix}{random_string}")
142class Operator(Object, metaclass=_ObjectMeta):
143 """Construct an operator for use in a content stream.
145 An Operator is one of a limited set of commands that can appear in PDF content
146 streams (roughly the mini-language that draws objects, lines and text on a
147 virtual PDF canvas). The commands :func:`parse_content_stream` and
148 :func:`unparse_content_stream` create and expect Operators respectively, along
149 with their operands.
151 pikepdf uses the special Operator "INLINE IMAGE" to denote an inline image
152 in a content stream.
153 """
155 object_type = ObjectType.operator
157 def __new__(cls, name: str) -> Operator:
158 """Construct an operator."""
159 return cast('Operator', _core._new_operator(name))
162class String(Object, metaclass=_ObjectMeta):
163 """Construct a PDF String object."""
165 object_type = ObjectType.string
167 def __new__(cls, s: str | bytes) -> String:
168 """Construct a PDF String.
170 Args:
171 s: The string to use. String will be encoded for
172 PDF, bytes will be constructed without encoding.
173 """
174 if isinstance(s, bytes | bytearray | memoryview):
175 return _core._new_string(s)
176 return _core._new_string_utf8(s)
179class Array(Object, metaclass=_ObjectMeta):
180 """Construct a PDF Array object."""
182 object_type = ObjectType.array
184 def __new__(cls, a: Iterable | Rectangle | Matrix | None = None) -> Array:
185 """Construct a PDF Array.
187 Args:
188 a: An iterable of objects. All objects must be either
189 `pikepdf.Object` or convertible to `pikepdf.Object`.
190 """
191 if isinstance(a, str | bytes):
192 raise TypeError('Strings cannot be converted to arrays of chars')
194 if a is None:
195 a = []
196 elif isinstance(a, Rectangle | Matrix):
197 return a.as_array()
198 elif isinstance(a, Array):
199 return cast(Array, a.__copy__())
200 return _core._new_array(a)
203class Dictionary(Object, metaclass=_ObjectMeta):
204 """Construct a PDF Dictionary object."""
206 object_type = ObjectType.dictionary
208 def __new__(cls, d: Mapping | None = None, **kwargs) -> Dictionary:
209 """Construct a PDF Dictionary.
211 Works from either a Python ``dict`` or keyword arguments.
213 These two examples are equivalent:
215 .. code-block:: python
217 pikepdf.Dictionary({'/NameOne': 1, '/NameTwo': 'Two'})
219 pikepdf.Dictionary(NameOne=1, NameTwo='Two')
221 In either case, the keys must be strings, and the strings
222 correspond to the desired Names in the PDF Dictionary. The values
223 must all be convertible to `pikepdf.Object`.
224 """
225 if kwargs and d is not None:
226 raise ValueError('Cannot use both a mapping object and keyword args')
227 if kwargs:
228 # Add leading slash
229 # Allows Dictionary(MediaBox=(0,0,1,1), Type=Name('/Page')...
230 return _core._new_dictionary({('/' + k): v for k, v in kwargs.items()})
231 if isinstance(d, Dictionary):
232 # Already a dictionary
233 return cast(Dictionary, d.__copy__())
234 if not d:
235 d = {}
236 if d and any(key == '/' or not key.startswith('/') for key in d.keys()):
237 raise KeyError("Dictionary created from strings must begin with '/'")
238 return _core._new_dictionary(d)
241class Stream(Object, metaclass=_ObjectMeta):
242 """Construct a PDF Stream object."""
244 object_type = ObjectType.stream
246 def __new__(cls, owner: Pdf, data: bytes | None = None, d=None, **kwargs) -> Stream:
247 """Create a new stream object.
249 Streams stores arbitrary binary data and may or may not be compressed.
250 It also may or may not be a page or Form XObject's content stream.
252 A stream dictionary is like a pikepdf.Dictionary or Python dict, except
253 it has a binary payload of data attached. The dictionary describes
254 how the data is compressed or encoded.
256 The dictionary may be initialized just like pikepdf.Dictionary is initialized,
257 using a mapping object or keyword arguments.
259 Args:
260 owner: The Pdf to which this stream shall be attached.
261 data: The data bytes for the stream.
262 d: An optional mapping object that will be used to construct the stream's
263 dictionary.
264 kwargs: Keyword arguments that will define the stream dictionary. Do not set
265 /Length here as pikepdf will manage this value. Set /Filter
266 if the data is already encoded in some format.
268 Examples:
269 Using kwargs:
270 >>> pdf = pikepdf.Pdf.new()
271 >>> s1 = pikepdf.Stream(
272 ... pdf,
273 ... b"uncompressed image data",
274 ... BitsPerComponent=8,
275 ... ColorSpace=pikepdf.Name.DeviceRGB,
276 ... )
277 Using dict:
278 >>> pdf = pikepdf.Pdf.new()
279 >>> d = pikepdf.Dictionary(Key1=1, Key2=2)
280 >>> s2 = pikepdf.Stream(
281 ... pdf,
282 ... b"data",
283 ... d
284 ... )
286 .. versionchanged:: 2.2
287 Support creation of ``pikepdf.Stream`` from existing dictionary.
289 .. versionchanged:: 3.0
290 ``obj`` argument was removed; use ``data``.
291 """
292 if data is None:
293 raise TypeError("Must make Stream from binary data")
295 stream_dict = None
296 if d or kwargs:
297 stream_dict = Dictionary(d, **kwargs)
299 stream = _core._new_stream(owner, data)
300 if stream_dict:
301 stream.stream_dict = stream_dict
302 return stream
305class Integer(Object, metaclass=_ObjectMeta):
306 """A PDF integer object.
308 In explicit conversion mode, PDF integers are returned as this type instead
309 of being automatically converted to Python ``int``.
311 Supports ``int()`` conversion, indexing operations (via ``__index__``),
312 and arithmetic operations. Arithmetic operations return native Python ``int``.
314 .. versionadded:: 10.1
315 """
317 object_type = ObjectType.integer
319 def __new__(cls, val: int | Integer) -> Integer:
320 """Construct a PDF Integer.
322 Args:
323 val: The integer value.
324 """
325 if isinstance(val, Integer):
326 return val
327 return _core._new_integer(val) # type: ignore[return-value]
330class Boolean(Object, metaclass=_ObjectMeta):
331 """A PDF boolean object.
333 In explicit conversion mode, PDF booleans are returned as this type instead
334 of being automatically converted to Python ``bool``.
336 Supports ``bool()`` conversion via ``__bool__``.
338 .. versionadded:: 10.1
339 """
341 object_type = ObjectType.boolean
343 def __new__(cls, val: bool | Boolean) -> Boolean:
344 """Construct a PDF Boolean.
346 Args:
347 val: The boolean value.
348 """
349 if isinstance(val, Boolean):
350 return val
351 return _core._new_boolean(val) # type: ignore[return-value]
354class Real(Object, metaclass=_ObjectMeta):
355 """A PDF real (floating-point) object.
357 In explicit conversion mode, PDF reals are returned as this type instead
358 of being automatically converted to Python ``Decimal``.
360 Supports ``float()`` conversion. Use ``as_decimal()`` for lossless conversion.
362 .. versionadded:: 10.1
363 """
365 object_type = ObjectType.real
367 def __new__(cls, val: float | Decimal | Real, places: int = 6) -> Real:
368 """Construct a PDF Real.
370 Args:
371 val: The real value. Converted to string representation internally.
372 places: Number of decimal places (used when val is float).
373 """
374 if isinstance(val, Real):
375 return val
376 if isinstance(val, float):
377 return _core._new_real(val, places) # type: ignore[return-value]
378 return _core._new_real(str(val)) # type: ignore[return-value]
381# Note on numbers ABC registration:
382# numbers.Integral.register(Integer) and numbers.Real.register(Real) don't work
383# as expected because of the "smoke and mirrors" design - at runtime all Objects
384# are actually pikepdf.Object instances, not Integer/Real instances.
385# The isinstance(obj, Integer) check uses metaclass magic (_ObjectMeta) that
386# checks the object's _type_code attribute. This doesn't satisfy the numbers ABC
387# registration mechanism which checks the actual type hierarchy.
390class _NamePathMeta(type):
391 """Metaclass for NamePath to support NamePath.A.B syntax."""
393 def __getattr__(cls, name: str) -> _core._NamePath:
394 if name.startswith('_'):
395 raise AttributeError(name)
396 return _core._NamePath()._append_name(name)
398 def __getitem__(cls, key: str | int | Name) -> _core._NamePath:
399 # NamePath['/A'] or NamePath[0]
400 if isinstance(key, str):
401 return _core._NamePath()._append_name(key)
402 elif isinstance(key, int):
403 return _core._NamePath()._append_index(key)
404 elif isinstance(key, Name):
405 return _core._NamePath()._append_name(str(key))
406 raise TypeError(f"NamePath key must be str, int, or Name, not {type(key)}")
408 def __call__(cls, *args) -> _core._NamePath:
409 # NamePath() or NamePath('/A', '/B')
410 if not args:
411 return _core._NamePath()
412 return _core._NamePath(*args)
415class NamePath(metaclass=_NamePathMeta):
416 """Path for accessing nested Dictionary/Stream values.
418 NamePath provides ergonomic access to deeply nested PDF structures with a
419 single access operation and helpful error messages when keys are not found.
421 Usage examples::
423 # Shorthand syntax - most common
424 obj[NamePath.Resources.Font.F1]
426 # With array indices
427 obj[NamePath.Pages.Kids[0].MediaBox]
429 # Chained access - supports non Python-identifier names
430 NamePath['/A']['/B'].C[0] # equivalent to NamePath.A.B.C[0]
432 # Alternate syntax to support lists
433 obj[NamePath(Name.Resources, Name.Font)]
435 # Using string objects
436 obj[NamePath('/Resources', '/Weird-Name')]
438 # Empty path returns the object itself
439 obj[NamePath()]
441 # Setting nested values (all parents must exist)
442 obj[NamePath.Root.Info.Title] = pikepdf.String("Test")
444 # With default value
445 obj.get(NamePath.Root.Metadata, None)
447 When a key is not found, the KeyError message identifies the exact failure
448 point, e.g.: "Key /C not found; traversed NamePath.A.B"
450 .. versionadded:: 10.1
451 """
453 # This class is never instantiated - the metaclass intercepts construction
454 # and returns _core._NamePath instances instead
455 pass