Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pikepdf/_methods.py: 37%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# SPDX-FileCopyrightText: 2022 James R. Barlow
2# SPDX-License-Identifier: MPL-2.0
4"""Implement some features in Python and monkey-patch them onto C++ classes.
6In several cases the implementation of some higher levels features might as
7well be in Python. Fortunately we can attach Python methods to C++ class
8bindings after the fact.
10We can also move the implementation to C++ if desired.
11"""
13from __future__ import annotations
15import datetime
16import mimetypes
17import shutil
18from collections.abc import (
19 Callable,
20 ItemsView,
21 Iterator,
22 KeysView,
23 MutableMapping,
24 ValuesView,
25)
26from contextlib import ExitStack, suppress
27from decimal import Decimal
28from io import BytesIO, RawIOBase
29from pathlib import Path
30from subprocess import run
31from tempfile import TemporaryDirectory
32from typing import BinaryIO, Literal, TypeVar
33from warnings import warn
35from pikepdf._augments import augment_override_cpp, augments
36from pikepdf._core import (
37 AccessMode,
38 AttachedFile,
39 AttachedFileSpec,
40 Attachments,
41 NameTree,
42 NumberTree,
43 ObjectStreamMode,
44 ObjectType,
45 Page,
46 Pdf,
47 Rectangle,
48 StreamDecodeLevel,
49 StreamParser,
50 Token,
51 _ObjectMapping,
52)
53from pikepdf._io import atomic_overwrite, check_different_files, check_stream_is_usable
54from pikepdf.models import Encryption, EncryptionInfo, Outline, Permissions
55from pikepdf.models.metadata import PdfMetadata, decode_pdf_date, encode_pdf_date
56from pikepdf.objects import Array, Dictionary, Name, Object, Stream
58# pylint: disable=no-member,unsupported-membership-test,unsubscriptable-object
59# mypy: ignore-errors
61__all__ = []
63Numeric = TypeVar('Numeric', int, float, Decimal)
64T = TypeVar('T')
66# Sentinel for distinguishing "no default provided" from "default=None"
67_MISSING = object()
70def _single_page_pdf(page: Page) -> bytes:
71 """Construct a single page PDF from the provided page in memory."""
72 pdf = Pdf.new()
73 pdf.pages.append(page)
74 bio = BytesIO()
75 pdf.save(bio)
76 bio.seek(0)
77 return bio.read()
80def _run_mudraw(in_path: Path, out_pattern: Path) -> Path:
81 run(
82 ['mutool', 'draw', '-o', str(out_pattern), str(in_path)],
83 check=True,
84 )
85 out_path = out_pattern.with_name(out_pattern.name.format(1)) # Replace %d with 1
86 if not out_path.exists():
87 raise FileNotFoundError(out_path)
88 return out_path
91def _mudraw(buffer: bytes | memoryview, fmt: Literal["svg"]) -> bytes:
92 """Use mupdf draw to rasterize the PDF in the memory buffer."""
93 # mudraw cannot read from stdin so a temporary file is required
94 # '-o -' does not work on macos-14
95 # '-o <path>' can accidentally prepend numbers to dots, so use explicit %d
96 # instead; see https://bugs.ghostscript.com/show_bug.cgi?id=708653
97 with TemporaryDirectory() as tmp_dir:
98 in_path = Path(tmp_dir) / 'input.pdf'
99 out_pattern = Path(tmp_dir) / f'output%d.{fmt}'
100 out_path = Path(tmp_dir) / f'output1.{fmt}'
101 in_path.write_bytes(buffer)
102 out_path = _run_mudraw(in_path, out_pattern)
103 return out_path.read_bytes()
106@augments(Object)
107class Extend_Object:
108 def _ipython_key_completions_(self):
109 if isinstance(self, Dictionary | Stream):
110 return self.keys()
111 return None
113 def emplace(self, other: Object, retain=(Name.Parent,)):
114 if not self.same_owner_as(other):
115 raise TypeError("Objects must have the same owner for emplace()")
117 # .keys() returns strings, so make all strings
118 retain = {str(k) for k in retain}
119 self_keys = set(self.keys())
120 other_keys = set(other.keys())
122 assert all(isinstance(k, str) for k in (retain | self_keys | other_keys))
124 del_keys = self_keys - other_keys - retain
125 for k in (k for k in other_keys if k not in retain):
126 self[k] = other[k] # pylint: disable=unsupported-assignment-operation
127 for k in del_keys:
128 del self[k] # pylint: disable=unsupported-delete-operation
130 def _type_check_write(self, filter_, decode_parms):
131 if isinstance(filter_, list):
132 filter_ = Array(filter_)
133 filter_ = filter_.wrap_in_array()
135 if isinstance(decode_parms, list):
136 decode_parms = Array(decode_parms)
137 elif decode_parms is None:
138 decode_parms = Array([])
139 else:
140 decode_parms = decode_parms.wrap_in_array()
142 if not all(isinstance(item, Name) for item in filter_):
143 raise TypeError(
144 "filter must be: pikepdf.Name or pikepdf.Array([pikepdf.Name])"
145 )
146 if not all(
147 (isinstance(item, Dictionary) or item is None) for item in decode_parms
148 ):
149 raise TypeError(
150 "decode_parms must be: pikepdf.Dictionary or "
151 "pikepdf.Array([pikepdf.Dictionary])"
152 )
153 if len(decode_parms) != 0 and len(filter_) != len(decode_parms):
154 raise ValueError(
155 f"filter ({repr(filter_)}) and decode_parms "
156 f"({repr(decode_parms)}) must be arrays of same length"
157 )
158 if len(filter_) == 1:
159 filter_ = filter_[0]
160 if len(decode_parms) == 0:
161 decode_parms = None
162 elif len(decode_parms) == 1:
163 decode_parms = decode_parms[0]
164 return filter_, decode_parms
166 def write(
167 self,
168 data: bytes,
169 *,
170 filter: Name | Array | None = None,
171 decode_parms: Dictionary | Array | None = None,
172 type_check: bool = True,
173 ): # pylint: disable=redefined-builtin
174 if type_check and filter is not None:
175 filter, decode_parms = self._type_check_write(filter, decode_parms)
177 self._write(data, filter=filter, decode_parms=decode_parms)
179 def as_int(self, default: T = _MISSING) -> int | T:
180 """Convert to int, or return default if not an integer.
182 In explicit conversion mode, this provides a safe way to convert
183 pikepdf.Integer to Python int with proper type hints.
185 Args:
186 default: Value to return if this object is not an integer.
187 If not provided and the object is not an integer,
188 raises TypeError.
190 Returns:
191 The integer value, or the default if provided and object is
192 not an integer.
194 Raises:
195 TypeError: If object is not an integer and no default was provided.
197 .. versionadded:: 10.1
198 """
199 if self._type_code != ObjectType.integer:
200 if default is _MISSING:
201 raise TypeError(f"Expected integer, got {self._type_name}")
202 return default
203 return int(self)
205 def as_bool(self, default: T = _MISSING) -> bool | T:
206 """Convert to bool, or return default if not a boolean.
208 In explicit conversion mode, this provides a safe way to convert
209 pikepdf.Boolean to Python bool with proper type hints.
211 Args:
212 default: Value to return if this object is not a boolean.
213 If not provided and the object is not a boolean,
214 raises TypeError.
216 Returns:
217 The boolean value, or the default if provided and object is
218 not a boolean.
220 Raises:
221 TypeError: If object is not a boolean and no default was provided.
223 .. versionadded:: 10.1
224 """
225 if self._type_code != ObjectType.boolean:
226 if default is _MISSING:
227 raise TypeError(f"Expected boolean, got {self._type_name}")
228 return default
229 return bool(self)
231 def as_float(self, default: T = _MISSING) -> float | T:
232 """Convert to float, or return default if not numeric.
234 Works for both Integer and Real objects.
236 Args:
237 default: Value to return if this object is not numeric.
238 If not provided and the object is not numeric,
239 raises TypeError.
241 Returns:
242 The float value, or the default if provided and object is
243 not numeric.
245 Raises:
246 TypeError: If object is not numeric and no default was provided.
248 .. versionadded:: 10.1
249 """
250 if self._type_code not in (ObjectType.integer, ObjectType.real):
251 if default is _MISSING:
252 raise TypeError(f"Expected numeric, got {self._type_name}")
253 return default
254 return float(self)
256 def as_decimal(self, default: T = _MISSING) -> Decimal | T:
257 """Convert to Decimal, or return default if not a Real.
259 Preferred over as_float() for PDF reals to preserve precision.
260 Only works for Real objects, not Integer.
262 Args:
263 default: Value to return if this object is not a Real.
264 If not provided and the object is not a Real,
265 raises TypeError.
267 Returns:
268 The Decimal value, or the default if provided and object is
269 not a Real.
271 Raises:
272 TypeError: If object is not a Real and no default was provided.
274 .. versionadded:: 10.1
275 """
276 if self._type_code != ObjectType.real:
277 if default is _MISSING:
278 raise TypeError(f"Expected real, got {self._type_name}")
279 return default
280 return Decimal(self._get_real_value())
283@augments(Pdf)
284class Extend_Pdf:
285 def _quick_save(self):
286 bio = BytesIO()
287 self.save(bio)
288 bio.seek(0)
289 return bio
291 def _repr_mimebundle_(self, include=None, exclude=None): # pylint: disable=unused-argument
292 pdf_data = self._quick_save().read()
293 data = {
294 'application/pdf': pdf_data,
295 }
296 with suppress(FileNotFoundError, RuntimeError):
297 data['image/svg+xml'] = _mudraw(pdf_data, 'svg').decode('utf-8')
298 return data
300 @property
301 def docinfo(self) -> Dictionary:
302 if Name.Info not in self.trailer or not isinstance(
303 self.trailer.Info, Dictionary
304 ):
305 self.trailer.Info = self.make_indirect(Dictionary())
306 if not self.trailer.Info.is_indirect:
307 self.trailer.Info = self.make_indirect(self.trailer.Info)
308 return self.trailer.Info
310 @docinfo.setter
311 def docinfo(self, new_docinfo: Dictionary):
312 if not new_docinfo.is_indirect:
313 raise ValueError(
314 "docinfo must be an indirect object - use Pdf.make_indirect"
315 )
316 self.trailer.Info = new_docinfo
318 @docinfo.deleter
319 def docinfo(self):
320 if Name.Info in self.trailer:
321 del self.trailer.Info
323 def open_metadata(
324 self,
325 set_pikepdf_as_editor: bool = True,
326 update_docinfo: bool = True,
327 strict: bool = False,
328 ) -> PdfMetadata:
329 return PdfMetadata(
330 self,
331 pikepdf_mark=set_pikepdf_as_editor,
332 sync_docinfo=update_docinfo,
333 overwrite_invalid_xml=not strict,
334 )
336 def open_outline(self, max_depth: int = 15, strict: bool = False) -> Outline:
337 return Outline(self, max_depth=max_depth, strict=strict)
339 def make_stream(self, data: bytes, d=None, **kwargs) -> Stream:
340 return Stream(self, data, d, **kwargs)
342 def add_blank_page(
343 self, *, page_size: tuple[Numeric, Numeric] = (612.0, 792.0)
344 ) -> Page:
345 for dim in page_size:
346 if not (3 <= dim <= 14400):
347 raise ValueError('Page size must be between 3 and 14400 PDF units')
349 page_dict = Dictionary(
350 Type=Name.Page,
351 MediaBox=Array([0, 0, page_size[0], page_size[1]]),
352 Contents=self.make_stream(b''),
353 Resources=Dictionary(),
354 )
355 page_obj = self.make_indirect(page_dict)
356 self._add_page(page_obj, first=False)
357 return Page(page_obj)
359 def close(self) -> None:
360 self._close()
361 if getattr(self, '_tmp_stream', None):
362 self._tmp_stream.close()
364 def __enter__(self):
365 return self
367 def __exit__(self, exc_type, exc_value, traceback):
368 self.close()
370 @property
371 def allow(self) -> Permissions:
372 results = {}
373 for field in Permissions._fields:
374 results[field] = getattr(self, '_allow_' + field)
375 return Permissions(**results)
377 @property
378 def encryption(self) -> EncryptionInfo:
379 return EncryptionInfo(self._encryption_data)
381 def check_pdf_syntax(
382 self, progress: Callable[[int], None] | None = None
383 ) -> list[str]:
384 class DiscardingParser(StreamParser):
385 def __init__(self): # pylint: disable=useless-super-delegation
386 super().__init__() # required for C++
388 def handle_object(self, *_args):
389 pass
391 def handle_eof(self):
392 pass
394 problems: list[str] = []
396 self._decode_all_streams_and_discard(progress)
398 discarding_parser = DiscardingParser()
399 for page in self.pages:
400 page.parse_contents(discarding_parser)
402 for warning in self.get_warnings():
403 problems.append("WARNING: " + warning)
405 return problems
407 def save(
408 self,
409 filename_or_stream: Path | str | BinaryIO | None = None,
410 *,
411 static_id: bool = False,
412 preserve_pdfa: bool = True,
413 min_version: str | tuple[str, int] = "",
414 force_version: str | tuple[str, int] = "",
415 fix_metadata_version: bool = True,
416 compress_streams: bool = True,
417 stream_decode_level: StreamDecodeLevel | None = None,
418 object_stream_mode: ObjectStreamMode = ObjectStreamMode.preserve,
419 normalize_content: bool = False,
420 linearize: bool = False,
421 qdf: bool = False,
422 progress: Callable[[int], None] | None = None,
423 encryption: Encryption | bool | None = None,
424 recompress_flate: bool = False,
425 deterministic_id: bool = False,
426 ) -> None:
427 if not filename_or_stream and getattr(self, '_original_filename', None):
428 filename_or_stream = self._original_filename
429 if not filename_or_stream:
430 raise ValueError(
431 "Cannot save to original filename because the original file was "
432 "not opening using Pdf.open(..., allow_overwriting_input=True). "
433 "Either specify a new destination filename/file stream or open "
434 "with allow_overwriting_input=True. If this Pdf was created using "
435 "Pdf.new(), you must specify a destination object since there is "
436 "no original filename to save to."
437 )
438 with ExitStack() as stack:
439 if hasattr(filename_or_stream, 'seek'):
440 stream = filename_or_stream
441 check_stream_is_usable(filename_or_stream)
442 else:
443 if not isinstance(filename_or_stream, str | bytes | Path):
444 raise TypeError("expected str, bytes or os.PathLike object")
445 filename = Path(filename_or_stream)
446 if (
447 not getattr(self, '_tmp_stream', None)
448 and getattr(self, '_original_filename', None) is not None
449 ):
450 check_different_files(self._original_filename, filename)
451 stream = stack.enter_context(atomic_overwrite(filename))
452 self._save(
453 stream,
454 static_id=static_id,
455 preserve_pdfa=preserve_pdfa,
456 min_version=min_version,
457 force_version=force_version,
458 fix_metadata_version=fix_metadata_version,
459 compress_streams=compress_streams,
460 stream_decode_level=stream_decode_level,
461 object_stream_mode=object_stream_mode,
462 normalize_content=normalize_content,
463 linearize=linearize,
464 qdf=qdf,
465 progress=progress,
466 encryption=encryption,
467 samefile_check=getattr(self, '_tmp_stream', None) is None,
468 recompress_flate=recompress_flate,
469 deterministic_id=deterministic_id,
470 )
472 @staticmethod
473 def open(
474 filename_or_stream: Path | str | BinaryIO,
475 *,
476 password: str | bytes = "",
477 hex_password: bool = False,
478 ignore_xref_streams: bool = False,
479 suppress_warnings: bool = True,
480 attempt_recovery: bool = True,
481 inherit_page_attributes: bool = True,
482 access_mode: AccessMode = AccessMode.default,
483 allow_overwriting_input: bool = False,
484 ) -> Pdf:
485 if isinstance(filename_or_stream, bytes) and filename_or_stream.startswith(
486 b'%PDF-'
487 ):
488 warn(
489 "It looks like you called with Pdf.open(data) with a bytes-like object "
490 "containing a PDF. This will probably fail because this function "
491 "expects a filename or opened file-like object. Instead, please use "
492 "Pdf.open(BytesIO(data))."
493 )
494 if isinstance(filename_or_stream, int | float):
495 # Attempted to open with integer file descriptor?
496 # TODO improve error
497 raise TypeError("expected str, bytes or os.PathLike object")
499 stream: RawIOBase | None = None
500 closing_stream: bool = False
501 original_filename: Path | None = None
503 if allow_overwriting_input:
504 try:
505 Path(filename_or_stream)
506 except TypeError as error:
507 raise ValueError(
508 '"allow_overwriting_input=True" requires "open" first argument '
509 'to be a file path'
510 ) from error
511 original_filename = Path(filename_or_stream)
512 with open(original_filename, 'rb') as pdf_file:
513 stream = BytesIO()
514 shutil.copyfileobj(pdf_file, stream)
515 stream.seek(0)
516 # description = f"memory copy of {original_filename}"
517 description = str(original_filename)
518 elif hasattr(filename_or_stream, 'read') and hasattr(
519 filename_or_stream, 'seek'
520 ):
521 stream = filename_or_stream
522 description = f"stream {stream}"
523 else:
524 stream = open(filename_or_stream, 'rb')
525 original_filename = Path(filename_or_stream)
526 description = str(filename_or_stream)
527 closing_stream = True
529 try:
530 check_stream_is_usable(stream)
531 pdf = Pdf._open(
532 stream,
533 password=password,
534 hex_password=hex_password,
535 ignore_xref_streams=ignore_xref_streams,
536 suppress_warnings=suppress_warnings,
537 attempt_recovery=attempt_recovery,
538 inherit_page_attributes=inherit_page_attributes,
539 access_mode=access_mode,
540 description=description,
541 closing_stream=closing_stream,
542 )
543 except Exception:
544 if stream is not None and closing_stream:
545 stream.close()
546 raise
547 pdf._tmp_stream = stream if allow_overwriting_input else None
548 pdf._original_filename = original_filename
549 return pdf
552@augments(_ObjectMapping)
553class Extend_ObjectMapping:
554 def get(self, key, default=None) -> Object:
555 try:
556 return self[key]
557 except KeyError:
558 return default
560 @augment_override_cpp
561 def __contains__(self, key: Name | str) -> bool:
562 if isinstance(key, Name):
563 key = str(key)
564 return _ObjectMapping._cpp__contains__(self, key)
566 @augment_override_cpp
567 def __getitem__(self, key: Name | str) -> Object:
568 if isinstance(key, Name):
569 key = str(key)
570 return _ObjectMapping._cpp__getitem__(self, key)
573def check_is_box(obj) -> None:
574 with suppress(AttributeError):
575 if obj.is_rectangle:
576 return
577 try:
578 pdfobj = Array(obj)
579 if pdfobj.is_rectangle:
580 return
581 except Exception as e:
582 raise ValueError("object is not a rectangle") from e
583 raise ValueError("object is not a rectangle")
586@augments(Page)
587class Extend_Page:
588 @property
589 def mediabox(self):
590 return self._get_mediabox(True)
592 @mediabox.setter
593 def mediabox(self, value):
594 check_is_box(value)
595 self.obj['/MediaBox'] = value
597 @property
598 def artbox(self):
599 return self._get_artbox(True, False)
601 @artbox.setter
602 def artbox(self, value):
603 check_is_box(value)
604 self.obj['/ArtBox'] = value
606 @property
607 def bleedbox(self):
608 return self._get_bleedbox(True, False)
610 @bleedbox.setter
611 def bleedbox(self, value):
612 check_is_box(value)
613 self.obj['/BleedBox'] = value
615 @property
616 def cropbox(self):
617 return self._get_cropbox(True, False)
619 @cropbox.setter
620 def cropbox(self, value):
621 check_is_box(value)
622 self.obj['/CropBox'] = value
624 @property
625 def trimbox(self):
626 return self._get_trimbox(True, False)
628 @trimbox.setter
629 def trimbox(self, value):
630 check_is_box(value)
631 self.obj['/TrimBox'] = value
633 @property
634 def images(self) -> _ObjectMapping:
635 return self._images
637 @property
638 def form_xobjects(self) -> _ObjectMapping:
639 return self._form_xobjects
641 @property
642 def resources(self) -> Dictionary:
643 if Name.Resources not in self.obj:
644 self.obj.Resources = Dictionary()
645 elif not isinstance(self.obj.Resources, Dictionary):
646 raise TypeError("Page /Resources exists but is not a dictionary")
647 return self.obj.Resources
649 def add_resource(
650 self,
651 res: Object,
652 res_type: Name,
653 name: Name | None = None,
654 *,
655 prefix: str = '',
656 replace_existing: bool = True,
657 ) -> Name:
658 resources = self.resources
659 if res_type not in resources:
660 resources[res_type] = Dictionary()
662 if name is not None and prefix:
663 raise ValueError("Must specify one of name= or prefix=")
664 if name is None:
665 name = Name.random(prefix=prefix)
667 for res_dict in resources.as_dict().values():
668 if not isinstance(res_dict, Dictionary):
669 continue
670 if name in res_dict:
671 if replace_existing:
672 del res_dict[name]
673 else:
674 raise ValueError(f"Name {name} already exists in page /Resources")
676 resources[res_type][name] = res.with_same_owner_as(self.obj)
677 return name
679 def _over_underlay(
680 self,
681 other,
682 rect: Rectangle | None,
683 under: bool,
684 push_stack: bool,
685 shrink: bool,
686 expand: bool,
687 ) -> Name:
688 formx = None
689 if isinstance(other, Page):
690 formx = other.as_form_xobject()
691 elif isinstance(other, Dictionary) and other.get(Name.Type) == Name.Page:
692 formx = Page(other).as_form_xobject()
693 elif (
694 isinstance(other, Stream)
695 and other.get(Name.Type) == Name.XObject
696 and other.get(Name.Subtype) == Name.Form
697 ):
698 formx = other
700 if formx is None:
701 raise TypeError(
702 "other object is not something we can convert to Form XObject"
703 )
705 if rect is None:
706 rect = Rectangle(self.trimbox)
708 formx_placed_name = self.add_resource(formx, Name.XObject)
709 cs = self.calc_form_xobject_placement(
710 formx, formx_placed_name, rect, allow_shrink=shrink, allow_expand=expand
711 )
713 if push_stack:
714 self.contents_add(b'q\n', prepend=True) # prepend q
715 self.contents_add(b'Q\n', prepend=False) # i.e. append Q
717 self.contents_add(cs, prepend=under)
718 self.contents_coalesce()
719 return formx_placed_name
721 def add_overlay(
722 self,
723 other: Object | Page,
724 rect: Rectangle | None = None,
725 *,
726 push_stack: bool = True,
727 shrink: bool = True,
728 expand: bool = True,
729 ) -> Name:
730 return self._over_underlay(
731 other,
732 rect,
733 under=False,
734 push_stack=push_stack,
735 expand=expand,
736 shrink=shrink,
737 )
739 def add_underlay(
740 self,
741 other: Object | Page,
742 rect: Rectangle | None = None,
743 *,
744 shrink: bool = True,
745 expand: bool = True,
746 ) -> Name:
747 return self._over_underlay(
748 other, rect, under=True, push_stack=False, expand=expand, shrink=shrink
749 )
751 def contents_add(self, contents: Stream | bytes, *, prepend: bool = False):
752 return self._contents_add(contents, prepend=prepend)
754 def __getattr__(self, name):
755 return getattr(self.obj, name)
757 @augment_override_cpp
758 def __setattr__(self, name, value):
759 if hasattr(self.__class__, name):
760 object.__setattr__(self, name, value)
761 else:
762 setattr(self.obj, name, value)
764 @augment_override_cpp
765 def __delattr__(self, name):
766 if hasattr(self.__class__, name):
767 object.__delattr__(self, name)
768 else:
769 delattr(self.obj, name)
771 def __getitem__(self, key):
772 return self.obj[key]
774 def __setitem__(self, key, value):
775 self.obj[key] = value
777 def __delitem__(self, key):
778 del self.obj[key]
780 def __contains__(self, key):
781 return key in self.obj
783 def get(self, key, default=None):
784 try:
785 return self[key]
786 except KeyError:
787 return default
789 def emplace(self, other: Page, retain=(Name.Parent,)):
790 return self.obj.emplace(other.obj, retain=retain)
792 def __repr__(self):
793 return (
794 repr(self.obj)
795 .replace('Dictionary', 'Page', 1)
796 .replace('(Type="/Page")', '', 1)
797 )
799 def _repr_mimebundle_(self, include=None, exclude=None):
800 data = {}
801 bundle = {'application/pdf', 'image/svg+xml'}
802 if include:
803 bundle = {k for k in bundle if k in include}
804 if exclude:
805 bundle = {k for k in bundle if k not in exclude}
806 pagedata = _single_page_pdf(self)
807 if 'application/pdf' in bundle:
808 data['application/pdf'] = pagedata
809 if 'image/svg+xml' in bundle:
810 with suppress(FileNotFoundError, RuntimeError):
811 data['image/svg+xml'] = _mudraw(pagedata, 'svg').decode('utf-8')
812 return data
815@augments(Token)
816class Extend_Token:
817 def __repr__(self):
818 return f'pikepdf.Token({self.type_}, {self.raw_value})'
821@augments(Rectangle)
822class Extend_Rectangle:
823 def __repr__(self):
824 return f'pikepdf.Rectangle({self.llx}, {self.lly}, {self.urx}, {self.ury})'
826 def __hash__(self):
827 return hash((self.llx, self.lly, self.urx, self.ury))
829 def to_bbox(self) -> Rectangle:
830 """Returns the origin-centred bounding box that encloses this rectangle.
832 Create a new rectangle with the same width and height as this one, but located
833 at the origin (0, 0).
835 Bounding boxes represent independent coordinate systems, such as for Form
836 XObjects.
837 """
838 return Rectangle(0, 0, self.width, self.height)
841@augments(Attachments)
842class Extend_Attachments(MutableMapping):
843 def __getitem__(self, k: str) -> AttachedFileSpec:
844 filespec = self._get_filespec(k)
845 if filespec is None:
846 raise KeyError(k)
847 return filespec
849 def __setitem__(self, k: str, v: AttachedFileSpec | bytes) -> None:
850 if isinstance(v, bytes):
851 return self._attach_data(k, v)
852 if not v.filename:
853 v.filename = k
854 return self._add_replace_filespec(k, v)
856 def __delitem__(self, k: str) -> None:
857 return self._remove_filespec(k)
859 def __len__(self):
860 return len(self._get_all_filespecs())
862 def __iter__(self) -> Iterator[str]:
863 yield from self._get_all_filespecs()
865 def __repr__(self):
866 return f"<pikepdf._core.Attachments: {list(self)}>"
869@augments(AttachedFileSpec)
870class Extend_AttachedFileSpec:
871 @staticmethod
872 def from_filepath(
873 pdf: Pdf,
874 path: Path | str,
875 *,
876 description: str = '',
877 relationship: Name | None = Name.Unspecified,
878 ):
879 mime, _ = mimetypes.guess_type(str(path))
880 if mime is None:
881 mime = ''
882 if not isinstance(path, Path):
883 path = Path(path)
885 stat = path.stat()
886 return AttachedFileSpec(
887 pdf,
888 path.read_bytes(),
889 description=description,
890 filename=str(path.name),
891 mime_type=mime,
892 creation_date=encode_pdf_date(
893 datetime.datetime.fromtimestamp(stat.st_ctime)
894 ),
895 mod_date=encode_pdf_date(datetime.datetime.fromtimestamp(stat.st_mtime)),
896 relationship=relationship,
897 )
899 @property
900 def relationship(self) -> Name | None:
901 return self.obj.get(Name.AFRelationship)
903 @relationship.setter
904 def relationship(self, value: Name | None):
905 if value is None:
906 del self.obj[Name.AFRelationship]
907 else:
908 self.obj[Name.AFRelationship] = value
910 def __repr__(self):
911 if self.filename:
912 return (
913 f"<pikepdf._core.AttachedFileSpec for {self.filename!r}, "
914 f"description {self.description!r}>"
915 )
916 return f"<pikepdf._core.AttachedFileSpec description {self.description!r}>"
919@augments(AttachedFile)
920class Extend_AttachedFile:
921 @property
922 def creation_date(self) -> datetime.datetime | None:
923 if not self._creation_date:
924 return None
925 return decode_pdf_date(self._creation_date)
927 @creation_date.setter
928 def creation_date(self, value: datetime.datetime):
929 self._creation_date = encode_pdf_date(value)
931 @property
932 def mod_date(self) -> datetime.datetime | None:
933 if not self._mod_date:
934 return None
935 return decode_pdf_date(self._mod_date)
937 @mod_date.setter
938 def mod_date(self, value: datetime.datetime):
939 self._mod_date = encode_pdf_date(value)
941 def read_bytes(self) -> bytes:
942 return self.obj.read_bytes()
944 def __repr__(self):
945 return (
946 f'<pikepdf._core.AttachedFile objid={self.obj.objgen} size={self.size} '
947 f'mime_type={self.mime_type} creation_date={self.creation_date} '
948 f'mod_date={self.mod_date}>'
949 )
952@augments(NameTree)
953class Extend_NameTree:
954 def keys(self):
955 return KeysView(self._as_map())
957 def values(self):
958 return ValuesView(self._as_map())
960 def items(self):
961 return ItemsView(self._as_map())
963 get = MutableMapping.get
964 pop = MutableMapping.pop
965 popitem = MutableMapping.popitem
966 clear = MutableMapping.clear
967 update = MutableMapping.update
968 setdefault = MutableMapping.setdefault
971MutableMapping.register(NameTree)
974@augments(NumberTree)
975class Extend_NumberTree:
976 def keys(self):
977 return KeysView(self._as_map())
979 def values(self):
980 return ValuesView(self._as_map())
982 def items(self):
983 return ItemsView(self._as_map())
985 get = MutableMapping.get
986 pop = MutableMapping.pop
987 popitem = MutableMapping.popitem
988 clear = MutableMapping.clear
989 update = MutableMapping.update
990 setdefault = MutableMapping.setdefault
993MutableMapping.register(NumberTree)