/src/qpdf/include/qpdf/QPDFWriter.hh
Line | Count | Source |
1 | | // Copyright (c) 2005-2021 Jay Berkenbilt |
2 | | // Copyright (c) 2022-2026 Jay Berkenbilt and Manfred Holger |
3 | | // |
4 | | // This file is part of qpdf. |
5 | | // |
6 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
7 | | // in compliance with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
12 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
13 | | // or implied. See the License for the specific language governing permissions and limitations under |
14 | | // the License. |
15 | | // |
16 | | // Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic |
17 | | // License. At your option, you may continue to consider qpdf to be licensed under those terms. |
18 | | // Please see the manual for additional information. |
19 | | |
20 | | #ifndef QPDFWRITER_HH |
21 | | #define QPDFWRITER_HH |
22 | | |
23 | | #include <qpdf/Constants.h> |
24 | | #include <qpdf/DLL.h> |
25 | | #include <qpdf/Types.h> |
26 | | |
27 | | #include <qpdf/Buffer.hh> |
28 | | #include <qpdf/PDFVersion.hh> |
29 | | #include <qpdf/Pipeline.hh> |
30 | | #include <qpdf/Pl_Buffer.hh> |
31 | | #include <qpdf/QPDFObjGen.hh> |
32 | | #include <qpdf/QPDFObjectHandle.hh> |
33 | | #include <qpdf/QPDFXRefEntry.hh> |
34 | | |
35 | | #include <bitset> |
36 | | #include <cstdio> |
37 | | #include <functional> |
38 | | #include <list> |
39 | | #include <map> |
40 | | #include <memory> |
41 | | #include <set> |
42 | | #include <string> |
43 | | #include <string_view> |
44 | | #include <vector> |
45 | | |
46 | | namespace qpdf |
47 | | { |
48 | | class Writer; |
49 | | } |
50 | | |
51 | | class QPDF; |
52 | | |
53 | | // This class implements a simple writer for saving QPDF objects to new PDF files. See comments |
54 | | // through the header file for additional details. |
55 | | class QPDFWriter |
56 | | { |
57 | | public: |
58 | | // Construct a QPDFWriter object without specifying output. You must call one of the output |
59 | | // setting routines defined below. |
60 | | QPDF_DLL |
61 | | QPDFWriter(QPDF& pdf); |
62 | | |
63 | | // Create a QPDFWriter object that writes its output to a file or to stdout. This is equivalent |
64 | | // to using the previous constructor and then calling setOutputFilename(). See |
65 | | // setOutputFilename() for details. |
66 | | QPDF_DLL |
67 | | QPDFWriter(QPDF& pdf, char const* filename); |
68 | | |
69 | | // Create a QPDFWriter object that writes its output to an already open FILE*. This is |
70 | | // equivalent to calling the first constructor and then calling setOutputFile(). See |
71 | | // setOutputFile() for details. |
72 | | QPDF_DLL |
73 | | QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file); |
74 | | |
75 | 9.05k | ~QPDFWriter() = default; |
76 | | |
77 | | class QPDF_DLL_CLASS ProgressReporter |
78 | | { |
79 | | public: |
80 | | QPDF_DLL |
81 | | virtual ~ProgressReporter(); |
82 | | |
83 | | // This method is called with a value from 0 to 100 to indicate approximate progress through |
84 | | // the write process. See registerProgressReporter. |
85 | | virtual void reportProgress(int) = 0; |
86 | | }; |
87 | | |
88 | | // This is a progress reporter that takes a function. It is used by the C APIs, but it is |
89 | | // available if you want to just register a C function as a handler. |
90 | | class QPDF_DLL_CLASS FunctionProgressReporter: public ProgressReporter |
91 | | { |
92 | | public: |
93 | | QPDF_DLL |
94 | | FunctionProgressReporter(std::function<void(int)>); |
95 | | QPDF_DLL |
96 | | ~FunctionProgressReporter() override; |
97 | | QPDF_DLL |
98 | | void reportProgress(int) override; |
99 | | |
100 | | private: |
101 | | std::function<void(int)> handler; |
102 | | }; |
103 | | |
104 | | // Setting Output. Output may be set only one time. If you don't use the filename version of |
105 | | // the QPDFWriter constructor, you must call exactly one of these methods. |
106 | | |
107 | | // Passing nullptr as filename means write to stdout. QPDFWriter will create a zero-length |
108 | | // output file upon construction. If write fails, the empty or partially written file will not |
109 | | // be deleted. This is by design: sometimes the partial file may be useful for tracking down |
110 | | // problems. If your application doesn't want the partially written file to be left behind, you |
111 | | // should delete it if the eventual call to write fails. |
112 | | QPDF_DLL |
113 | | void setOutputFilename(char const* filename); |
114 | | |
115 | | // Write to the given FILE*, which must be opened by the caller. If close_file is true, |
116 | | // QPDFWriter will close the file. Otherwise, the caller must close the file. The file does not |
117 | | // need to be seekable; it will be written to in a single pass. It must be open in binary mode. |
118 | | QPDF_DLL |
119 | | void setOutputFile(char const* description, FILE* file, bool close_file); |
120 | | |
121 | | // Indicate that QPDFWriter should create a memory buffer to contain the final PDF file. Obtain |
122 | | // the memory by calling getBuffer(). |
123 | | QPDF_DLL |
124 | | void setOutputMemory(); |
125 | | |
126 | | // Return the buffer object containing the PDF file. If setOutputMemory() has been called, this |
127 | | // method may be called exactly one time after write() has returned. The caller is responsible |
128 | | // for deleting the buffer when done. See also getBufferSharedPointer(). |
129 | | QPDF_DLL |
130 | | Buffer* getBuffer(); |
131 | | |
132 | | // Return getBuffer() in a shared pointer. |
133 | | QPDF_DLL |
134 | | std::shared_ptr<Buffer> getBufferSharedPointer(); |
135 | | |
136 | | // Supply your own pipeline object. Output will be written to this pipeline, and QPDFWriter |
137 | | // will call finish() on the pipeline. It is the caller's responsibility to manage the memory |
138 | | // for the pipeline. The pipeline is never deleted by QPDFWriter, which makes it possible for |
139 | | // you to call additional methods on the pipeline after the writing is finished. |
140 | | QPDF_DLL |
141 | | void setOutputPipeline(Pipeline*); |
142 | | |
143 | | // Setting Parameters |
144 | | |
145 | | // Set the value of object stream mode. In disable mode, we never generate any object streams. |
146 | | // In preserve mode, we preserve object stream structure from the original file. In generate |
147 | | // mode, we generate our own object streams. In all cases, we generate a conventional |
148 | | // cross-reference table if there are no object streams and a cross-reference stream if there |
149 | | // are object streams. The default is o_preserve. |
150 | | QPDF_DLL |
151 | | void setObjectStreamMode(qpdf_object_stream_e); |
152 | | |
153 | | // Set value of stream data mode. This is an older interface. Instead of using this, prefer |
154 | | // setCompressStreams() and setDecodeLevel(). This method is retained for compatibility, but it |
155 | | // does not cover the full range of available configurations. The mapping between this and the |
156 | | // new methods is as follows: |
157 | | // |
158 | | // qpdf_s_uncompress: |
159 | | // setCompressStreams(false) |
160 | | // setDecodeLevel(qpdf_dl_generalized) |
161 | | // qpdf_s_preserve: |
162 | | // setCompressStreams(false) |
163 | | // setDecodeLevel(qpdf_dl_none) |
164 | | // qpdf_s_compress: |
165 | | // setCompressStreams(true) |
166 | | // setDecodeLevel(qpdf_dl_generalized) |
167 | | // |
168 | | // The default is qpdf_s_compress. |
169 | | QPDF_DLL |
170 | | void setStreamDataMode(qpdf_stream_data_e); |
171 | | |
172 | | // If true, compress any uncompressed streams when writing them. Metadata streams are a special |
173 | | // case and are not compressed even if this is true. This is true by default for QPDFWriter. If |
174 | | // you want QPDFWriter to leave uncompressed streams uncompressed, pass false to this method. |
175 | | QPDF_DLL |
176 | | void setCompressStreams(bool); |
177 | | |
178 | | // When QPDFWriter encounters streams, this parameter controls the behavior with respect to |
179 | | // attempting to apply any filters to the streams when copying to the output. The decode levels |
180 | | // are as follows: |
181 | | // |
182 | | // qpdf_dl_none: Do not attempt to apply any filters. Streams remain as they appear in the |
183 | | // original file. Note that uncompressed streams may still be compressed on output. You can |
184 | | // disable that by calling setCompressStreams(false). |
185 | | // |
186 | | // qpdf_dl_generalized: This is the default. QPDFWriter will apply LZWDecode, ASCII85Decode, |
187 | | // ASCIIHexDecode, and FlateDecode filters on the input. When combined with |
188 | | // setCompressStreams(true), which is the default, the effect of this is that streams filtered |
189 | | // with these older and less efficient filters will be recompressed with the Flate filter. By |
190 | | // default, as a special case, if a stream is already compressed with FlateDecode and |
191 | | // setCompressStreams is enabled, the original compressed data will be preserved. This behavior |
192 | | // can be overridden by calling setRecompressFlate(true). |
193 | | // |
194 | | // qpdf_dl_specialized: In addition to uncompressing the generalized compression formats, |
195 | | // supported non-lossy compression will also be decoded. At present, this includes the |
196 | | // RunLengthDecode filter. |
197 | | // |
198 | | // qpdf_dl_all: In addition to generalized and non-lossy specialized filters, supported lossy |
199 | | // compression filters will be applied. At present, this includes DCTDecode (JPEG) compression. |
200 | | // Note that compressing the resulting data with DCTDecode again will accumulate loss, so avoid |
201 | | // multiple compression and decompression cycles. This is mostly useful for retrieving image |
202 | | // data. |
203 | | QPDF_DLL |
204 | | void setDecodeLevel(qpdf_stream_decode_level_e); |
205 | | |
206 | | // By default, when both the input and output contents of a stream are compressed with Flate, |
207 | | // qpdf does not uncompress and recompress the stream. Passing true here causes it to do so. |
208 | | // This can be useful if recompressing all streams with a higher compression level, which can be |
209 | | // set by calling the static method Pl_Flate::setCompressionLevel. |
210 | | QPDF_DLL |
211 | | void setRecompressFlate(bool); |
212 | | |
213 | | // Set value of content stream normalization. The default is "false". If true, we attempt to |
214 | | // normalize newlines inside of content streams. Some constructs such as inline images may |
215 | | // thwart our efforts. There may be some cases where this can damage the content stream. This |
216 | | // flag should be used only for debugging and experimenting with PDF content streams. Never use |
217 | | // it for production files. |
218 | | QPDF_DLL |
219 | | void setContentNormalization(bool); |
220 | | |
221 | | // Set QDF mode. QDF mode causes special "pretty printing" of PDF objects, adds comments for |
222 | | // easier perusing of files. Resulting PDF files can be edited in a text editor and then run |
223 | | // through fix-qdf to update cross reference tables and stream lengths. |
224 | | QPDF_DLL |
225 | | void setQDFMode(bool); |
226 | | |
227 | | // Preserve unreferenced objects. The default behavior is to discard any object that is not |
228 | | // visited during a traversal of the object structure from the trailer. |
229 | | QPDF_DLL |
230 | | void setPreserveUnreferencedObjects(bool); |
231 | | |
232 | | // Always write a newline before the endstream keyword. This helps with PDF/A compliance, though |
233 | | // it is not sufficient for it. |
234 | | QPDF_DLL |
235 | | void setNewlineBeforeEndstream(bool); |
236 | | |
237 | | // Set the minimum PDF version. If the PDF version of the input file (or previously set minimum |
238 | | // version) is less than the version passed to this method, the PDF version of the output file |
239 | | // will be set to this value. If the original PDF file's version or previously set minimum |
240 | | // version is already this version or later, the original file's version will be used. |
241 | | // QPDFWriter automatically sets the minimum version to 1.4 when R3 encryption parameters are |
242 | | // used, and to 1.5 when object streams are used. |
243 | | QPDF_DLL |
244 | | void setMinimumPDFVersion(std::string const&, int extension_level = 0); |
245 | | QPDF_DLL |
246 | | void setMinimumPDFVersion(PDFVersion const&); |
247 | | |
248 | | // Force the PDF version of the output file to be a given version. Use of this function may |
249 | | // create PDF files that will not work properly with older PDF viewers. When a PDF version is |
250 | | // set using this function, qpdf will use this version even if the file contains features that |
251 | | // are not supported in that version of PDF. In other words, you should only use this function |
252 | | // if you are sure the PDF file in question has no features of newer versions of PDF or if you |
253 | | // are willing to create files that old viewers may try to open but not be able to properly |
254 | | // interpret. If any encryption has been applied to the document either explicitly or by |
255 | | // preserving the encryption of the source document, forcing the PDF version to a value too low |
256 | | // to support that type of encryption will explicitly disable decryption. Additionally, forcing |
257 | | // to a version below 1.5 will disable object streams. |
258 | | QPDF_DLL |
259 | | void forcePDFVersion(std::string const&, int extension_level = 0); |
260 | | |
261 | | // Provide additional text to insert in the PDF file somewhere near the beginning of the file. |
262 | | // This can be used to add comments to the beginning of a PDF file, for example, if those |
263 | | // comments are to be consumed by some other application. No checks are performed to ensure |
264 | | // that the text inserted here is valid PDF. If you want to insert multiline comments, you will |
265 | | // need to include \n in the string yourself and start each line with %. An extra newline will |
266 | | // be appended if one is not already present at the end of your text. |
267 | | QPDF_DLL |
268 | | void setExtraHeaderText(std::string const&); |
269 | | |
270 | | // Causes a deterministic /ID value to be generated. When this is set, the current time and |
271 | | // output file name are not used as part of /ID generation. Instead, a digest of all significant |
272 | | // parts of the output file's contents is included in the /ID calculation. Use of a |
273 | | // deterministic /ID can be handy when it is desirable for a repeat of the same qpdf operation |
274 | | // on the same inputs being written to the same outputs with the same parameters to generate |
275 | | // exactly the same results. This feature is incompatible with encrypted files because, for |
276 | | // encrypted files, the /ID is generated before any part of the file is written since it is an |
277 | | // input to the encryption process. |
278 | | QPDF_DLL |
279 | | void setDeterministicID(bool); |
280 | | |
281 | | // Cause a static /ID value to be generated. Use only in test suites. See also |
282 | | // setDeterministicID. |
283 | | QPDF_DLL |
284 | | void setStaticID(bool); |
285 | | |
286 | | // Use a fixed initialization vector for AES-CBC encryption. This is not secure. It should be |
287 | | // used only in test suites for creating predictable encrypted output. |
288 | | QPDF_DLL |
289 | | void setStaticAesIV(bool); |
290 | | |
291 | | // Suppress inclusion of comments indicating original object IDs when writing QDF files. This |
292 | | // can also be useful for testing, particularly when using comparison of two qdf files to |
293 | | // determine whether two PDF files have identical content. |
294 | | QPDF_DLL |
295 | | void setSuppressOriginalObjectIDs(bool); |
296 | | |
297 | | // Preserve encryption. The default is true unless prefiltering, content normalization, or qdf |
298 | | // mode has been selected in which case encryption is never preserved. Encryption is also not |
299 | | // preserved if we explicitly set encryption parameters. |
300 | | QPDF_DLL |
301 | | void setPreserveEncryption(bool); |
302 | | |
303 | | // Copy encryption parameters from another QPDF object. If you want to copy encryption from the |
304 | | // object you are writing, call setPreserveEncryption(true) instead. |
305 | | QPDF_DLL |
306 | | void copyEncryptionParameters(QPDF&); |
307 | | |
308 | | // Set up for encrypted output. User and owner password both must be specified. Either or both |
309 | | // may be the empty string. Note that qpdf does not apply any special treatment to the empty |
310 | | // string, which makes it possible to create encrypted files with empty owner passwords and |
311 | | // non-empty user passwords or with the same password for both user and owner. Some PDF reading |
312 | | // products don't handle such files very well. Enabling encryption disables stream prefiltering |
313 | | // and content normalization. Note that setting R2 encryption parameters sets the PDF version |
314 | | // to at least 1.3, setting R3 encryption parameters pushes the PDF version number to at |
315 | | // least 1.4, setting R4 parameters pushes the version to at least 1.5, or if AES is used, 1.6, |
316 | | // and setting R5 or R6 parameters pushes the version to at least 1.7 with extension level 3. |
317 | | // |
318 | | // Note about Unicode passwords: the PDF specification requires passwords to be encoded with PDF |
319 | | // Doc encoding for R <= 4 and UTF-8 for R >= 5. In all cases, these methods take strings of |
320 | | // bytes as passwords. It is up to the caller to ensure that passwords are properly encoded. The |
321 | | // qpdf command-line tool tries to do this, as discussed in the manual. If you are doing this |
322 | | // from your own application, QUtil contains many transcoding functions that could be useful to |
323 | | // you, most notably utf8_to_pdf_doc. |
324 | | |
325 | | // R2 uses RC4, which is a weak cryptographic algorithm. Don't use it unless you have to. See |
326 | | // "Weak Cryptography" in the manual. This encryption format is deprecated in the PDF 2.0 |
327 | | // specification. |
328 | | QPDF_DLL |
329 | | void setR2EncryptionParametersInsecure( |
330 | | char const* user_password, |
331 | | char const* owner_password, |
332 | | bool allow_print, |
333 | | bool allow_modify, |
334 | | bool allow_extract, |
335 | | bool allow_annotate); |
336 | | // R3 uses RC4, which is a weak cryptographic algorithm. Don't use it unless you have to. See |
337 | | // "Weak Cryptography" in the manual. This encryption format is deprecated in the PDF 2.0 |
338 | | // specification. |
339 | | QPDF_DLL |
340 | | void setR3EncryptionParametersInsecure( |
341 | | char const* user_password, |
342 | | char const* owner_password, |
343 | | bool allow_accessibility, |
344 | | bool allow_extract, |
345 | | bool allow_assemble, |
346 | | bool allow_annotate_and_form, |
347 | | bool allow_form_filling, |
348 | | bool allow_modify_other, |
349 | | qpdf_r3_print_e print); |
350 | | // When use_aes=false, this call enables R4 with RC4, which is a weak cryptographic algorithm. |
351 | | // Even with use_aes=true, the overall encryption scheme is weak. Don't use it unless you have |
352 | | // to. See "Weak Cryptography" in the manual. This encryption format is deprecated in the |
353 | | // PDF 2.0 specification. |
354 | | QPDF_DLL |
355 | | void setR4EncryptionParametersInsecure( |
356 | | char const* user_password, |
357 | | char const* owner_password, |
358 | | bool allow_accessibility, |
359 | | bool allow_extract, |
360 | | bool allow_assemble, |
361 | | bool allow_annotate_and_form, |
362 | | bool allow_form_filling, |
363 | | bool allow_modify_other, |
364 | | qpdf_r3_print_e print, |
365 | | bool encrypt_metadata, |
366 | | bool use_aes); |
367 | | // R5 is deprecated. Do not use it for production use. Writing R5 is supported by qpdf |
368 | | // primarily to generate test files for applications that may need to test R5 support. |
369 | | QPDF_DLL |
370 | | void setR5EncryptionParameters( |
371 | | char const* user_password, |
372 | | char const* owner_password, |
373 | | bool allow_accessibility, |
374 | | bool allow_extract, |
375 | | bool allow_assemble, |
376 | | bool allow_annotate_and_form, |
377 | | bool allow_form_filling, |
378 | | bool allow_modify_other, |
379 | | qpdf_r3_print_e print, |
380 | | bool encrypt_metadata); |
381 | | // This is the only password-based encryption format supported by the PDF specification. |
382 | | QPDF_DLL |
383 | | void setR6EncryptionParameters( |
384 | | char const* user_password, |
385 | | char const* owner_password, |
386 | | bool allow_accessibility, |
387 | | bool allow_extract, |
388 | | bool allow_assemble, |
389 | | bool allow_annotate_and_form, |
390 | | bool allow_form_filling, |
391 | | bool allow_modify_other, |
392 | | qpdf_r3_print_e print, |
393 | | bool encrypt_metadata_aes); |
394 | | |
395 | | // Create linearized output. Disables qdf mode, content normalization, and stream prefiltering. |
396 | | QPDF_DLL |
397 | | void setLinearization(bool); |
398 | | |
399 | | // For debugging QPDF: provide the name of a file to write pass1 of linearization to. The only |
400 | | // reason to use this is to debug QPDF. To linearize, QPDF writes out the file in two passes. |
401 | | // Usually the first pass is discarded, but lots of computations are made in pass 1. If a |
402 | | // linearized file comes out wrong, it can be helpful to look at the first pass. |
403 | | QPDF_DLL |
404 | | void setLinearizationPass1Filename(std::string const&); |
405 | | |
406 | | // Create PCLm output. This is only useful for clients that know how to create PCLm files. If a |
407 | | // file is structured exactly as PCLm requires, this call will tell QPDFWriter to write the PCLm |
408 | | // header, create certain unreferenced streams required by the standard, and write the objects |
409 | | // in the required order. Calling this on an ordinary PDF serves no purpose. There is no |
410 | | // command-line argument that causes this method to be called. |
411 | | QPDF_DLL |
412 | | void setPCLm(bool); |
413 | | |
414 | | // If you want to be notified of progress, derive a class from ProgressReporter and override the |
415 | | // reportProgress method. |
416 | | QPDF_DLL |
417 | | void registerProgressReporter(std::shared_ptr<ProgressReporter>); |
418 | | |
419 | | // Return the PDF version that will be written into the header. Calling this method does all the |
420 | | // preparation for writing, so it is an error to call any methods that may cause a change to the |
421 | | // version. Adding new objects to the original file after calling this may also cause problems. |
422 | | // It is safe to update existing objects or stream contents after calling this method, e.g., to |
423 | | // include the final version number in metadata. |
424 | | QPDF_DLL |
425 | | std::string getFinalVersion(); |
426 | | |
427 | | // Write the final file. There is no expectation of being able to call write() more than once. |
428 | | QPDF_DLL |
429 | | void write(); |
430 | | |
431 | | // Return renumbered ObjGen that was written into the final file. This method can be used after |
432 | | // calling write(). |
433 | | QPDF_DLL |
434 | | QPDFObjGen getRenumberedObjGen(QPDFObjGen); |
435 | | |
436 | | // Return XRef entry that was written into the final file. This method can be used after calling |
437 | | // write(). |
438 | | QPDF_DLL |
439 | | std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable(); |
440 | | |
441 | | // The following structs / classes are not part of the public API. |
442 | | struct Object; |
443 | | struct NewObject; |
444 | | class ObjTable; |
445 | | class NewObjTable; |
446 | | |
447 | | private: |
448 | | friend class qpdf::Writer; |
449 | | |
450 | | class Members; |
451 | | |
452 | | std::shared_ptr<Members> m; |
453 | | }; |
454 | | |
455 | | #endif // QPDFWRITER_HH |