1#
2# The Python Imaging Library.
3# $Id$
4#
5# PDF (Acrobat) file handling
6#
7# History:
8# 1996-07-16 fl Created
9# 1997-01-18 fl Fixed header
10# 2004-02-21 fl Fixes for 1/L/CMYK images, etc.
11# 2004-02-24 fl Fixes for 1 and P images.
12#
13# Copyright (c) 1997-2004 by Secret Labs AB. All rights reserved.
14# Copyright (c) 1996-1997 by Fredrik Lundh.
15#
16# See the README file for information on usage and redistribution.
17#
18
19##
20# Image plugin for PDF images (output only).
21##
22from __future__ import annotations
23
24import io
25import math
26import os
27import time
28from typing import IO
29
30from . import Image, ImageFile, ImageSequence, PdfParser, __version__, features
31
32#
33# --------------------------------------------------------------------
34
35# object ids:
36# 1. catalogue
37# 2. pages
38# 3. image
39# 4. page
40# 5. page contents
41
42
43def _save_all(im: Image.Image, fp: IO[bytes], filename: str | bytes) -> None:
44 _save(im, fp, filename, save_all=True)
45
46
47##
48# (Internal) Image save plugin for the PDF format.
49
50
51def _write_image(im, filename, existing_pdf, image_refs):
52 # FIXME: Should replace ASCIIHexDecode with RunLengthDecode
53 # (packbits) or LZWDecode (tiff/lzw compression). Note that
54 # PDF 1.2 also supports Flatedecode (zip compression).
55
56 params = None
57 decode = None
58
59 #
60 # Get image characteristics
61
62 width, height = im.size
63
64 dict_obj = {"BitsPerComponent": 8}
65 if im.mode == "1":
66 if features.check("libtiff"):
67 filter = "CCITTFaxDecode"
68 dict_obj["BitsPerComponent"] = 1
69 params = PdfParser.PdfArray(
70 [
71 PdfParser.PdfDict(
72 {
73 "K": -1,
74 "BlackIs1": True,
75 "Columns": width,
76 "Rows": height,
77 }
78 )
79 ]
80 )
81 else:
82 filter = "DCTDecode"
83 dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceGray")
84 procset = "ImageB" # grayscale
85 elif im.mode == "L":
86 filter = "DCTDecode"
87 # params = f"<< /Predictor 15 /Columns {width-2} >>"
88 dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceGray")
89 procset = "ImageB" # grayscale
90 elif im.mode == "LA":
91 filter = "JPXDecode"
92 # params = f"<< /Predictor 15 /Columns {width-2} >>"
93 procset = "ImageB" # grayscale
94 dict_obj["SMaskInData"] = 1
95 elif im.mode == "P":
96 filter = "ASCIIHexDecode"
97 palette = im.getpalette()
98 dict_obj["ColorSpace"] = [
99 PdfParser.PdfName("Indexed"),
100 PdfParser.PdfName("DeviceRGB"),
101 len(palette) // 3 - 1,
102 PdfParser.PdfBinary(palette),
103 ]
104 procset = "ImageI" # indexed color
105
106 if "transparency" in im.info:
107 smask = im.convert("LA").getchannel("A")
108 smask.encoderinfo = {}
109
110 image_ref = _write_image(smask, filename, existing_pdf, image_refs)[0]
111 dict_obj["SMask"] = image_ref
112 elif im.mode == "RGB":
113 filter = "DCTDecode"
114 dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceRGB")
115 procset = "ImageC" # color images
116 elif im.mode == "RGBA":
117 filter = "JPXDecode"
118 procset = "ImageC" # color images
119 dict_obj["SMaskInData"] = 1
120 elif im.mode == "CMYK":
121 filter = "DCTDecode"
122 dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceCMYK")
123 procset = "ImageC" # color images
124 decode = [1, 0, 1, 0, 1, 0, 1, 0]
125 else:
126 msg = f"cannot save mode {im.mode}"
127 raise ValueError(msg)
128
129 #
130 # image
131
132 op = io.BytesIO()
133
134 if filter == "ASCIIHexDecode":
135 ImageFile._save(im, op, [("hex", (0, 0) + im.size, 0, im.mode)])
136 elif filter == "CCITTFaxDecode":
137 im.save(
138 op,
139 "TIFF",
140 compression="group4",
141 # use a single strip
142 strip_size=math.ceil(width / 8) * height,
143 )
144 elif filter == "DCTDecode":
145 Image.SAVE["JPEG"](im, op, filename)
146 elif filter == "JPXDecode":
147 del dict_obj["BitsPerComponent"]
148 Image.SAVE["JPEG2000"](im, op, filename)
149 else:
150 msg = f"unsupported PDF filter ({filter})"
151 raise ValueError(msg)
152
153 stream = op.getvalue()
154 if filter == "CCITTFaxDecode":
155 stream = stream[8:]
156 filter = PdfParser.PdfArray([PdfParser.PdfName(filter)])
157 else:
158 filter = PdfParser.PdfName(filter)
159
160 image_ref = image_refs.pop(0)
161 existing_pdf.write_obj(
162 image_ref,
163 stream=stream,
164 Type=PdfParser.PdfName("XObject"),
165 Subtype=PdfParser.PdfName("Image"),
166 Width=width, # * 72.0 / x_resolution,
167 Height=height, # * 72.0 / y_resolution,
168 Filter=filter,
169 Decode=decode,
170 DecodeParms=params,
171 **dict_obj,
172 )
173
174 return image_ref, procset
175
176
177def _save(im, fp, filename, save_all=False):
178 is_appending = im.encoderinfo.get("append", False)
179 if is_appending:
180 existing_pdf = PdfParser.PdfParser(f=fp, filename=filename, mode="r+b")
181 else:
182 existing_pdf = PdfParser.PdfParser(f=fp, filename=filename, mode="w+b")
183
184 dpi = im.encoderinfo.get("dpi")
185 if dpi:
186 x_resolution = dpi[0]
187 y_resolution = dpi[1]
188 else:
189 x_resolution = y_resolution = im.encoderinfo.get("resolution", 72.0)
190
191 info = {
192 "title": (
193 None if is_appending else os.path.splitext(os.path.basename(filename))[0]
194 ),
195 "author": None,
196 "subject": None,
197 "keywords": None,
198 "creator": None,
199 "producer": None,
200 "creationDate": None if is_appending else time.gmtime(),
201 "modDate": None if is_appending else time.gmtime(),
202 }
203 for k, default in info.items():
204 v = im.encoderinfo.get(k) if k in im.encoderinfo else default
205 if v:
206 existing_pdf.info[k[0].upper() + k[1:]] = v
207
208 #
209 # make sure image data is available
210 im.load()
211
212 existing_pdf.start_writing()
213 existing_pdf.write_header()
214 existing_pdf.write_comment(f"created by Pillow {__version__} PDF driver")
215
216 #
217 # pages
218 ims = [im]
219 if save_all:
220 append_images = im.encoderinfo.get("append_images", [])
221 for append_im in append_images:
222 append_im.encoderinfo = im.encoderinfo.copy()
223 ims.append(append_im)
224 number_of_pages = 0
225 image_refs = []
226 page_refs = []
227 contents_refs = []
228 for im in ims:
229 im_number_of_pages = 1
230 if save_all:
231 try:
232 im_number_of_pages = im.n_frames
233 except AttributeError:
234 # Image format does not have n_frames.
235 # It is a single frame image
236 pass
237 number_of_pages += im_number_of_pages
238 for i in range(im_number_of_pages):
239 image_refs.append(existing_pdf.next_object_id(0))
240 if im.mode == "P" and "transparency" in im.info:
241 image_refs.append(existing_pdf.next_object_id(0))
242
243 page_refs.append(existing_pdf.next_object_id(0))
244 contents_refs.append(existing_pdf.next_object_id(0))
245 existing_pdf.pages.append(page_refs[-1])
246
247 #
248 # catalog and list of pages
249 existing_pdf.write_catalog()
250
251 page_number = 0
252 for im_sequence in ims:
253 im_pages = ImageSequence.Iterator(im_sequence) if save_all else [im_sequence]
254 for im in im_pages:
255 image_ref, procset = _write_image(im, filename, existing_pdf, image_refs)
256
257 #
258 # page
259
260 existing_pdf.write_page(
261 page_refs[page_number],
262 Resources=PdfParser.PdfDict(
263 ProcSet=[PdfParser.PdfName("PDF"), PdfParser.PdfName(procset)],
264 XObject=PdfParser.PdfDict(image=image_ref),
265 ),
266 MediaBox=[
267 0,
268 0,
269 im.width * 72.0 / x_resolution,
270 im.height * 72.0 / y_resolution,
271 ],
272 Contents=contents_refs[page_number],
273 )
274
275 #
276 # page contents
277
278 page_contents = b"q %f 0 0 %f 0 0 cm /image Do Q\n" % (
279 im.width * 72.0 / x_resolution,
280 im.height * 72.0 / y_resolution,
281 )
282
283 existing_pdf.write_obj(contents_refs[page_number], stream=page_contents)
284
285 page_number += 1
286
287 #
288 # trailer
289 existing_pdf.write_xref_and_trailer()
290 if hasattr(fp, "flush"):
291 fp.flush()
292 existing_pdf.close()
293
294
295#
296# --------------------------------------------------------------------
297
298
299Image.register_save("PDF", _save)
300Image.register_save_all("PDF", _save_all)
301
302Image.register_extension("PDF", ".pdf")
303
304Image.register_mime("PDF", "application/pdf")