1#
2# The Python Imaging Library.
3# $Id$
4#
5# PDF (Acrobat) file handling
6#
7# History:
8# 1996-07-16 fl Created
9# 1997-01-18 fl Fixed header
10# 2004-02-21 fl Fixes for 1/L/CMYK images, etc.
11# 2004-02-24 fl Fixes for 1 and P images.
12#
13# Copyright (c) 1997-2004 by Secret Labs AB. All rights reserved.
14# Copyright (c) 1996-1997 by Fredrik Lundh.
15#
16# See the README file for information on usage and redistribution.
17#
18
19##
20# Image plugin for PDF images (output only).
21##
22from __future__ import annotations
23
24import io
25import math
26import os
27import time
28from typing import IO, Any
29
30from . import Image, ImageFile, ImageSequence, PdfParser, __version__, features
31
32#
33# --------------------------------------------------------------------
34
35# object ids:
36# 1. catalogue
37# 2. pages
38# 3. image
39# 4. page
40# 5. page contents
41
42
43def _save_all(im: Image.Image, fp: IO[bytes], filename: str | bytes) -> None:
44 _save(im, fp, filename, save_all=True)
45
46
47##
48# (Internal) Image save plugin for the PDF format.
49
50
51def _write_image(
52 im: Image.Image,
53 filename: str | bytes,
54 existing_pdf: PdfParser.PdfParser,
55 image_refs: list[PdfParser.IndirectReference],
56) -> tuple[PdfParser.IndirectReference, str]:
57 # FIXME: Should replace ASCIIHexDecode with RunLengthDecode
58 # (packbits) or LZWDecode (tiff/lzw compression). Note that
59 # PDF 1.2 also supports Flatedecode (zip compression).
60
61 params = None
62 decode = None
63
64 #
65 # Get image characteristics
66
67 width, height = im.size
68
69 dict_obj: dict[str, Any] = {"BitsPerComponent": 8}
70 if im.mode == "1":
71 if features.check("libtiff"):
72 decode_filter = "CCITTFaxDecode"
73 dict_obj["BitsPerComponent"] = 1
74 params = PdfParser.PdfArray(
75 [
76 PdfParser.PdfDict(
77 {
78 "K": -1,
79 "BlackIs1": True,
80 "Columns": width,
81 "Rows": height,
82 }
83 )
84 ]
85 )
86 else:
87 decode_filter = "DCTDecode"
88 dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceGray")
89 procset = "ImageB" # grayscale
90 elif im.mode == "L":
91 decode_filter = "DCTDecode"
92 # params = f"<< /Predictor 15 /Columns {width-2} >>"
93 dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceGray")
94 procset = "ImageB" # grayscale
95 elif im.mode == "LA":
96 decode_filter = "JPXDecode"
97 # params = f"<< /Predictor 15 /Columns {width-2} >>"
98 procset = "ImageB" # grayscale
99 dict_obj["SMaskInData"] = 1
100 elif im.mode == "P":
101 decode_filter = "ASCIIHexDecode"
102 palette = im.getpalette()
103 assert palette is not None
104 dict_obj["ColorSpace"] = [
105 PdfParser.PdfName("Indexed"),
106 PdfParser.PdfName("DeviceRGB"),
107 len(palette) // 3 - 1,
108 PdfParser.PdfBinary(palette),
109 ]
110 procset = "ImageI" # indexed color
111
112 if "transparency" in im.info:
113 smask = im.convert("LA").getchannel("A")
114 smask.encoderinfo = {}
115
116 image_ref = _write_image(smask, filename, existing_pdf, image_refs)[0]
117 dict_obj["SMask"] = image_ref
118 elif im.mode == "RGB":
119 decode_filter = "DCTDecode"
120 dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceRGB")
121 procset = "ImageC" # color images
122 elif im.mode == "RGBA":
123 decode_filter = "JPXDecode"
124 procset = "ImageC" # color images
125 dict_obj["SMaskInData"] = 1
126 elif im.mode == "CMYK":
127 decode_filter = "DCTDecode"
128 dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceCMYK")
129 procset = "ImageC" # color images
130 decode = [1, 0, 1, 0, 1, 0, 1, 0]
131 else:
132 msg = f"cannot save mode {im.mode}"
133 raise ValueError(msg)
134
135 #
136 # image
137
138 op = io.BytesIO()
139
140 if decode_filter == "ASCIIHexDecode":
141 ImageFile._save(im, op, [ImageFile._Tile("hex", (0, 0) + im.size, 0, im.mode)])
142 elif decode_filter == "CCITTFaxDecode":
143 im.save(
144 op,
145 "TIFF",
146 compression="group4",
147 # use a single strip
148 strip_size=math.ceil(width / 8) * height,
149 )
150 elif decode_filter == "DCTDecode":
151 Image.SAVE["JPEG"](im, op, filename)
152 elif decode_filter == "JPXDecode":
153 del dict_obj["BitsPerComponent"]
154 Image.SAVE["JPEG2000"](im, op, filename)
155 else:
156 msg = f"unsupported PDF filter ({decode_filter})"
157 raise ValueError(msg)
158
159 stream = op.getvalue()
160 filter: PdfParser.PdfArray | PdfParser.PdfName
161 if decode_filter == "CCITTFaxDecode":
162 stream = stream[8:]
163 filter = PdfParser.PdfArray([PdfParser.PdfName(decode_filter)])
164 else:
165 filter = PdfParser.PdfName(decode_filter)
166
167 image_ref = image_refs.pop(0)
168 existing_pdf.write_obj(
169 image_ref,
170 stream=stream,
171 Type=PdfParser.PdfName("XObject"),
172 Subtype=PdfParser.PdfName("Image"),
173 Width=width, # * 72.0 / x_resolution,
174 Height=height, # * 72.0 / y_resolution,
175 Filter=filter,
176 Decode=decode,
177 DecodeParms=params,
178 **dict_obj,
179 )
180
181 return image_ref, procset
182
183
184def _save(
185 im: Image.Image, fp: IO[bytes], filename: str | bytes, save_all: bool = False
186) -> None:
187 is_appending = im.encoderinfo.get("append", False)
188 filename_str = filename.decode() if isinstance(filename, bytes) else filename
189 if is_appending:
190 existing_pdf = PdfParser.PdfParser(f=fp, filename=filename_str, mode="r+b")
191 else:
192 existing_pdf = PdfParser.PdfParser(f=fp, filename=filename_str, mode="w+b")
193
194 dpi = im.encoderinfo.get("dpi")
195 if dpi:
196 x_resolution = dpi[0]
197 y_resolution = dpi[1]
198 else:
199 x_resolution = y_resolution = im.encoderinfo.get("resolution", 72.0)
200
201 info = {
202 "title": (
203 None if is_appending else os.path.splitext(os.path.basename(filename))[0]
204 ),
205 "author": None,
206 "subject": None,
207 "keywords": None,
208 "creator": None,
209 "producer": None,
210 "creationDate": None if is_appending else time.gmtime(),
211 "modDate": None if is_appending else time.gmtime(),
212 }
213 for k, default in info.items():
214 v = im.encoderinfo.get(k) if k in im.encoderinfo else default
215 if v:
216 existing_pdf.info[k[0].upper() + k[1:]] = v
217
218 #
219 # make sure image data is available
220 im.load()
221
222 existing_pdf.start_writing()
223 existing_pdf.write_header()
224 existing_pdf.write_comment(f"created by Pillow {__version__} PDF driver")
225
226 #
227 # pages
228 ims = [im]
229 if save_all:
230 append_images = im.encoderinfo.get("append_images", [])
231 for append_im in append_images:
232 append_im.encoderinfo = im.encoderinfo.copy()
233 ims.append(append_im)
234 number_of_pages = 0
235 image_refs = []
236 page_refs = []
237 contents_refs = []
238 for im in ims:
239 im_number_of_pages = 1
240 if save_all:
241 im_number_of_pages = getattr(im, "n_frames", 1)
242 number_of_pages += im_number_of_pages
243 for i in range(im_number_of_pages):
244 image_refs.append(existing_pdf.next_object_id(0))
245 if im.mode == "P" and "transparency" in im.info:
246 image_refs.append(existing_pdf.next_object_id(0))
247
248 page_refs.append(existing_pdf.next_object_id(0))
249 contents_refs.append(existing_pdf.next_object_id(0))
250 existing_pdf.pages.append(page_refs[-1])
251
252 #
253 # catalog and list of pages
254 existing_pdf.write_catalog()
255
256 page_number = 0
257 for im_sequence in ims:
258 im_pages: ImageSequence.Iterator | list[Image.Image] = (
259 ImageSequence.Iterator(im_sequence) if save_all else [im_sequence]
260 )
261 for im in im_pages:
262 image_ref, procset = _write_image(im, filename, existing_pdf, image_refs)
263
264 #
265 # page
266
267 existing_pdf.write_page(
268 page_refs[page_number],
269 Resources=PdfParser.PdfDict(
270 ProcSet=[PdfParser.PdfName("PDF"), PdfParser.PdfName(procset)],
271 XObject=PdfParser.PdfDict(image=image_ref),
272 ),
273 MediaBox=[
274 0,
275 0,
276 im.width * 72.0 / x_resolution,
277 im.height * 72.0 / y_resolution,
278 ],
279 Contents=contents_refs[page_number],
280 )
281
282 #
283 # page contents
284
285 page_contents = b"q %f 0 0 %f 0 0 cm /image Do Q\n" % (
286 im.width * 72.0 / x_resolution,
287 im.height * 72.0 / y_resolution,
288 )
289
290 existing_pdf.write_obj(contents_refs[page_number], stream=page_contents)
291
292 page_number += 1
293
294 #
295 # trailer
296 existing_pdf.write_xref_and_trailer()
297 if hasattr(fp, "flush"):
298 fp.flush()
299 existing_pdf.close()
300
301
302#
303# --------------------------------------------------------------------
304
305
306Image.register_save("PDF", _save)
307Image.register_save_all("PDF", _save_all)
308
309Image.register_extension("PDF", ".pdf")
310
311Image.register_mime("PDF", "application/pdf")