Coverage Report

Created: 2026-01-10 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/include/qpdf/QPDFWriter.hh
Line
Count
Source
1
// Copyright (c) 2005-2021 Jay Berkenbilt
2
// Copyright (c) 2022-2026 Jay Berkenbilt and Manfred Holger
3
//
4
// This file is part of qpdf.
5
//
6
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
7
// in compliance with the License. You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software distributed under the License
12
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
13
// or implied. See the License for the specific language governing permissions and limitations under
14
// the License.
15
//
16
// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
17
// License. At your option, you may continue to consider qpdf to be licensed under those terms.
18
// Please see the manual for additional information.
19
20
#ifndef QPDFWRITER_HH
21
#define QPDFWRITER_HH
22
23
#include <qpdf/Constants.h>
24
#include <qpdf/DLL.h>
25
#include <qpdf/Types.h>
26
27
#include <qpdf/Buffer.hh>
28
#include <qpdf/PDFVersion.hh>
29
#include <qpdf/Pipeline.hh>
30
#include <qpdf/Pl_Buffer.hh>
31
#include <qpdf/QPDFObjGen.hh>
32
#include <qpdf/QPDFObjectHandle.hh>
33
#include <qpdf/QPDFXRefEntry.hh>
34
35
#include <bitset>
36
#include <cstdio>
37
#include <functional>
38
#include <list>
39
#include <map>
40
#include <memory>
41
#include <set>
42
#include <string>
43
#include <string_view>
44
#include <vector>
45
46
namespace qpdf
47
{
48
    class Writer;
49
}
50
51
class QPDF;
52
53
// This class implements a simple writer for saving QPDF objects to new PDF files.  See comments
54
// through the header file for additional details.
55
class QPDFWriter
56
{
57
  public:
58
    // Construct a QPDFWriter object without specifying output.  You must call one of the output
59
    // setting routines defined below.
60
    QPDF_DLL
61
    QPDFWriter(QPDF& pdf);
62
63
    // Create a QPDFWriter object that writes its output to a file or to stdout.  This is equivalent
64
    // to using the previous constructor and then calling setOutputFilename().  See
65
    // setOutputFilename() for details.
66
    QPDF_DLL
67
    QPDFWriter(QPDF& pdf, char const* filename);
68
69
    // Create a QPDFWriter object that writes its output to an already open FILE*.  This is
70
    // equivalent to calling the first constructor and then calling setOutputFile().  See
71
    // setOutputFile() for details.
72
    QPDF_DLL
73
    QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file);
74
75
9.05k
    ~QPDFWriter() = default;
76
77
    class QPDF_DLL_CLASS ProgressReporter
78
    {
79
      public:
80
        QPDF_DLL
81
        virtual ~ProgressReporter();
82
83
        // This method is called with a value from 0 to 100 to indicate approximate progress through
84
        // the write process. See registerProgressReporter.
85
        virtual void reportProgress(int) = 0;
86
    };
87
88
    // This is a progress reporter that takes a function. It is used by the C APIs, but it is
89
    // available if you want to just register a C function as a handler.
90
    class QPDF_DLL_CLASS FunctionProgressReporter: public ProgressReporter
91
    {
92
      public:
93
        QPDF_DLL
94
        FunctionProgressReporter(std::function<void(int)>);
95
        QPDF_DLL
96
        ~FunctionProgressReporter() override;
97
        QPDF_DLL
98
        void reportProgress(int) override;
99
100
      private:
101
        std::function<void(int)> handler;
102
    };
103
104
    // Setting Output.  Output may be set only one time.  If you don't use the filename version of
105
    // the QPDFWriter constructor, you must call exactly one of these methods.
106
107
    // Passing nullptr as filename means write to stdout.  QPDFWriter will create a zero-length
108
    // output file upon construction.  If write fails, the empty or partially written file will not
109
    // be deleted.  This is by design: sometimes the partial file may be useful for tracking down
110
    // problems.  If your application doesn't want the partially written file to be left behind, you
111
    // should delete it if the eventual call to write fails.
112
    QPDF_DLL
113
    void setOutputFilename(char const* filename);
114
115
    // Write to the given FILE*, which must be opened by the caller. If close_file is true,
116
    // QPDFWriter will close the file. Otherwise, the caller must close the file.  The file does not
117
    // need to be seekable; it will be written to in a single pass. It must be open in binary mode.
118
    QPDF_DLL
119
    void setOutputFile(char const* description, FILE* file, bool close_file);
120
121
    // Indicate that QPDFWriter should create a memory buffer to contain the final PDF file.  Obtain
122
    // the memory by calling getBuffer().
123
    QPDF_DLL
124
    void setOutputMemory();
125
126
    // Return the buffer object containing the PDF file. If setOutputMemory() has been called, this
127
    // method may be called exactly one time after write() has returned. The caller is responsible
128
    // for deleting the buffer when done. See also getBufferSharedPointer().
129
    QPDF_DLL
130
    Buffer* getBuffer();
131
132
    // Return getBuffer() in a shared pointer.
133
    QPDF_DLL
134
    std::shared_ptr<Buffer> getBufferSharedPointer();
135
136
    // Supply your own pipeline object.  Output will be written to this pipeline, and QPDFWriter
137
    // will call finish() on the pipeline.  It is the caller's responsibility to manage the memory
138
    // for the pipeline.  The pipeline is never deleted by QPDFWriter, which makes it possible for
139
    // you to call additional methods on the pipeline after the writing is finished.
140
    QPDF_DLL
141
    void setOutputPipeline(Pipeline*);
142
143
    // Setting Parameters
144
145
    // Set the value of object stream mode.  In disable mode, we never generate any object streams.
146
    // In preserve mode, we preserve object stream structure from the original file.  In generate
147
    // mode, we generate our own object streams.  In all cases, we generate a conventional
148
    // cross-reference table if there are no object streams and a cross-reference stream if there
149
    // are object streams.  The default is o_preserve.
150
    QPDF_DLL
151
    void setObjectStreamMode(qpdf_object_stream_e);
152
153
    // Set value of stream data mode. This is an older interface. Instead of using this, prefer
154
    // setCompressStreams() and setDecodeLevel(). This method is retained for compatibility, but it
155
    // does not cover the full range of available configurations. The mapping between this and the
156
    // new methods is as follows:
157
    //
158
    // qpdf_s_uncompress:
159
    //   setCompressStreams(false)
160
    //   setDecodeLevel(qpdf_dl_generalized)
161
    // qpdf_s_preserve:
162
    //   setCompressStreams(false)
163
    //   setDecodeLevel(qpdf_dl_none)
164
    // qpdf_s_compress:
165
    //   setCompressStreams(true)
166
    //   setDecodeLevel(qpdf_dl_generalized)
167
    //
168
    // The default is qpdf_s_compress.
169
    QPDF_DLL
170
    void setStreamDataMode(qpdf_stream_data_e);
171
172
    // If true, compress any uncompressed streams when writing them. Metadata streams are a special
173
    // case and are not compressed even if this is true. This is true by default for QPDFWriter. If
174
    // you want QPDFWriter to leave uncompressed streams uncompressed, pass false to this method.
175
    QPDF_DLL
176
    void setCompressStreams(bool);
177
178
    // When QPDFWriter encounters streams, this parameter controls the behavior with respect to
179
    // attempting to apply any filters to the streams when copying to the output. The decode levels
180
    // are as follows:
181
    //
182
    // qpdf_dl_none: Do not attempt to apply any filters. Streams remain as they appear in the
183
    // original file. Note that uncompressed streams may still be compressed on output. You can
184
    // disable that by calling setCompressStreams(false).
185
    //
186
    // qpdf_dl_generalized: This is the default. QPDFWriter will apply LZWDecode, ASCII85Decode,
187
    // ASCIIHexDecode, and FlateDecode filters on the input. When combined with
188
    // setCompressStreams(true), which is the default, the effect of this is that streams filtered
189
    // with these older and less efficient filters will be recompressed with the Flate filter. By
190
    // default, as a special case, if a stream is already compressed with FlateDecode and
191
    // setCompressStreams is enabled, the original compressed data will be preserved. This behavior
192
    // can be overridden by calling setRecompressFlate(true).
193
    //
194
    // qpdf_dl_specialized: In addition to uncompressing the generalized compression formats,
195
    // supported non-lossy compression will also be decoded. At present, this includes the
196
    // RunLengthDecode filter.
197
    //
198
    // qpdf_dl_all: In addition to generalized and non-lossy specialized filters, supported lossy
199
    // compression filters will be applied. At present, this includes DCTDecode (JPEG) compression.
200
    // Note that compressing the resulting data with DCTDecode again will accumulate loss, so avoid
201
    // multiple compression and decompression cycles. This is mostly useful for retrieving image
202
    // data.
203
    QPDF_DLL
204
    void setDecodeLevel(qpdf_stream_decode_level_e);
205
206
    // By default, when both the input and output contents of a stream are compressed with Flate,
207
    // qpdf does not uncompress and recompress the stream. Passing true here causes it to do so.
208
    // This can be useful if recompressing all streams with a higher compression level, which can be
209
    // set by calling the static method Pl_Flate::setCompressionLevel.
210
    QPDF_DLL
211
    void setRecompressFlate(bool);
212
213
    // Set value of content stream normalization.  The default is "false".  If true, we attempt to
214
    // normalize newlines inside of content streams.  Some constructs such as inline images may
215
    // thwart our efforts.  There may be some cases where this can damage the content stream.  This
216
    // flag should be used only for debugging and experimenting with PDF content streams.  Never use
217
    // it for production files.
218
    QPDF_DLL
219
    void setContentNormalization(bool);
220
221
    // Set QDF mode.  QDF mode causes special "pretty printing" of PDF objects, adds comments for
222
    // easier perusing of files. Resulting PDF files can be edited in a text editor and then run
223
    // through fix-qdf to update cross reference tables and stream lengths.
224
    QPDF_DLL
225
    void setQDFMode(bool);
226
227
    // Preserve unreferenced objects. The default behavior is to discard any object that is not
228
    // visited during a traversal of the object structure from the trailer.
229
    QPDF_DLL
230
    void setPreserveUnreferencedObjects(bool);
231
232
    // Always write a newline before the endstream keyword. This helps with PDF/A compliance, though
233
    // it is not sufficient for it.
234
    QPDF_DLL
235
    void setNewlineBeforeEndstream(bool);
236
237
    // Set the minimum PDF version.  If the PDF version of the input file (or previously set minimum
238
    // version) is less than the version passed to this method, the PDF version of the output file
239
    // will be set to this value.  If the original PDF file's version or previously set minimum
240
    // version is already this version or later, the original file's version will be used.
241
    // QPDFWriter automatically sets the minimum version to 1.4 when R3 encryption parameters are
242
    // used, and to 1.5 when object streams are used.
243
    QPDF_DLL
244
    void setMinimumPDFVersion(std::string const&, int extension_level = 0);
245
    QPDF_DLL
246
    void setMinimumPDFVersion(PDFVersion const&);
247
248
    // Force the PDF version of the output file to be a given version. Use of this function may
249
    // create PDF files that will not work properly with older PDF viewers.  When a PDF version is
250
    // set using this function, qpdf will use this version even if the file contains features that
251
    // are not supported in that version of PDF.  In other words, you should only use this function
252
    // if you are sure the PDF file in question has no features of newer versions of PDF or if you
253
    // are willing to create files that old viewers may try to open but not be able to properly
254
    // interpret. If any encryption has been applied to the document either explicitly or by
255
    // preserving the encryption of the source document, forcing the PDF version to a value too low
256
    // to support that type of encryption will explicitly disable decryption. Additionally, forcing
257
    // to a version below 1.5 will disable object streams.
258
    QPDF_DLL
259
    void forcePDFVersion(std::string const&, int extension_level = 0);
260
261
    // Provide additional text to insert in the PDF file somewhere near the beginning of the file.
262
    // This can be used to add comments to the beginning of a PDF file, for example, if those
263
    // comments are to be consumed by some other application.  No checks are performed to ensure
264
    // that the text inserted here is valid PDF.  If you want to insert multiline comments, you will
265
    // need to include \n in the string yourself and start each line with %.  An extra newline will
266
    // be appended if one is not already present at the end of your text.
267
    QPDF_DLL
268
    void setExtraHeaderText(std::string const&);
269
270
    // Causes a deterministic /ID value to be generated. When this is set, the current time and
271
    // output file name are not used as part of /ID generation. Instead, a digest of all significant
272
    // parts of the output file's contents is included in the /ID calculation. Use of a
273
    // deterministic /ID can be handy when it is desirable for a repeat of the same qpdf operation
274
    // on the same inputs being written to the same outputs with the same parameters to generate
275
    // exactly the same results. This feature is incompatible with encrypted files because, for
276
    // encrypted files, the /ID is generated before any part of the file is written since it is an
277
    // input to the encryption process.
278
    QPDF_DLL
279
    void setDeterministicID(bool);
280
281
    // Cause a static /ID value to be generated.  Use only in test suites.  See also
282
    // setDeterministicID.
283
    QPDF_DLL
284
    void setStaticID(bool);
285
286
    // Use a fixed initialization vector for AES-CBC encryption.  This is not secure.  It should be
287
    // used only in test suites for creating predictable encrypted output.
288
    QPDF_DLL
289
    void setStaticAesIV(bool);
290
291
    // Suppress inclusion of comments indicating original object IDs when writing QDF files.  This
292
    // can also be useful for testing, particularly when using comparison of two qdf files to
293
    // determine whether two PDF files have identical content.
294
    QPDF_DLL
295
    void setSuppressOriginalObjectIDs(bool);
296
297
    // Preserve encryption.  The default is true unless prefiltering, content normalization, or qdf
298
    // mode has been selected in which case encryption is never preserved.  Encryption is also not
299
    // preserved if we explicitly set encryption parameters.
300
    QPDF_DLL
301
    void setPreserveEncryption(bool);
302
303
    // Copy encryption parameters from another QPDF object.  If you want to copy encryption from the
304
    // object you are writing, call setPreserveEncryption(true) instead.
305
    QPDF_DLL
306
    void copyEncryptionParameters(QPDF&);
307
308
    // Set up for encrypted output.  User and owner password both must be specified.  Either or both
309
    // may be the empty string.  Note that qpdf does not apply any special treatment to the empty
310
    // string, which makes it possible to create encrypted files with empty owner passwords and
311
    // non-empty user passwords or with the same password for both user and owner.  Some PDF reading
312
    // products don't handle such files very well.  Enabling encryption disables stream prefiltering
313
    // and content normalization.  Note that setting R2 encryption parameters sets the PDF version
314
    // to at least 1.3, setting R3 encryption parameters pushes the PDF version number to at
315
    // least 1.4, setting R4 parameters pushes the version to at least 1.5, or if AES is used, 1.6,
316
    // and setting R5 or R6 parameters pushes the version to at least 1.7 with extension level 3.
317
    //
318
    // Note about Unicode passwords: the PDF specification requires passwords to be encoded with PDF
319
    // Doc encoding for R <= 4 and UTF-8 for R >= 5. In all cases, these methods take strings of
320
    // bytes as passwords. It is up to the caller to ensure that passwords are properly encoded. The
321
    // qpdf command-line tool tries to do this, as discussed in the manual. If you are doing this
322
    // from your own application, QUtil contains many transcoding functions that could be useful to
323
    // you, most notably utf8_to_pdf_doc.
324
325
    // R2 uses RC4, which is a weak cryptographic algorithm. Don't use it unless you have to. See
326
    // "Weak Cryptography" in the manual. This encryption format is deprecated in the PDF 2.0
327
    // specification.
328
    QPDF_DLL
329
    void setR2EncryptionParametersInsecure(
330
        char const* user_password,
331
        char const* owner_password,
332
        bool allow_print,
333
        bool allow_modify,
334
        bool allow_extract,
335
        bool allow_annotate);
336
    // R3 uses RC4, which is a weak cryptographic algorithm. Don't use it unless you have to. See
337
    // "Weak Cryptography" in the manual. This encryption format is deprecated in the PDF 2.0
338
    // specification.
339
    QPDF_DLL
340
    void setR3EncryptionParametersInsecure(
341
        char const* user_password,
342
        char const* owner_password,
343
        bool allow_accessibility,
344
        bool allow_extract,
345
        bool allow_assemble,
346
        bool allow_annotate_and_form,
347
        bool allow_form_filling,
348
        bool allow_modify_other,
349
        qpdf_r3_print_e print);
350
    // When use_aes=false, this call enables R4 with RC4, which is a weak cryptographic algorithm.
351
    // Even with use_aes=true, the overall encryption scheme is weak. Don't use it unless you have
352
    // to. See "Weak Cryptography" in the manual. This encryption format is deprecated in the
353
    // PDF 2.0 specification.
354
    QPDF_DLL
355
    void setR4EncryptionParametersInsecure(
356
        char const* user_password,
357
        char const* owner_password,
358
        bool allow_accessibility,
359
        bool allow_extract,
360
        bool allow_assemble,
361
        bool allow_annotate_and_form,
362
        bool allow_form_filling,
363
        bool allow_modify_other,
364
        qpdf_r3_print_e print,
365
        bool encrypt_metadata,
366
        bool use_aes);
367
    // R5 is deprecated.  Do not use it for production use.  Writing R5 is supported by qpdf
368
    // primarily to generate test files for applications that may need to test R5 support.
369
    QPDF_DLL
370
    void setR5EncryptionParameters(
371
        char const* user_password,
372
        char const* owner_password,
373
        bool allow_accessibility,
374
        bool allow_extract,
375
        bool allow_assemble,
376
        bool allow_annotate_and_form,
377
        bool allow_form_filling,
378
        bool allow_modify_other,
379
        qpdf_r3_print_e print,
380
        bool encrypt_metadata);
381
    // This is the only password-based encryption format supported by the PDF specification.
382
    QPDF_DLL
383
    void setR6EncryptionParameters(
384
        char const* user_password,
385
        char const* owner_password,
386
        bool allow_accessibility,
387
        bool allow_extract,
388
        bool allow_assemble,
389
        bool allow_annotate_and_form,
390
        bool allow_form_filling,
391
        bool allow_modify_other,
392
        qpdf_r3_print_e print,
393
        bool encrypt_metadata_aes);
394
395
    // Create linearized output.  Disables qdf mode, content normalization, and stream prefiltering.
396
    QPDF_DLL
397
    void setLinearization(bool);
398
399
    // For debugging QPDF: provide the name of a file to write pass1 of linearization to. The only
400
    // reason to use this is to debug QPDF. To linearize, QPDF writes out the file in two passes.
401
    // Usually the first pass is discarded, but lots of computations are made in pass 1. If a
402
    // linearized file comes out wrong, it can be helpful to look at the first pass.
403
    QPDF_DLL
404
    void setLinearizationPass1Filename(std::string const&);
405
406
    // Create PCLm output. This is only useful for clients that know how to create PCLm files. If a
407
    // file is structured exactly as PCLm requires, this call will tell QPDFWriter to write the PCLm
408
    // header, create certain unreferenced streams required by the standard, and write the objects
409
    // in the required order. Calling this on an ordinary PDF serves no purpose. There is no
410
    // command-line argument that causes this method to be called.
411
    QPDF_DLL
412
    void setPCLm(bool);
413
414
    // If you want to be notified of progress, derive a class from ProgressReporter and override the
415
    // reportProgress method.
416
    QPDF_DLL
417
    void registerProgressReporter(std::shared_ptr<ProgressReporter>);
418
419
    // Return the PDF version that will be written into the header. Calling this method does all the
420
    // preparation for writing, so it is an error to call any methods that may cause a change to the
421
    // version. Adding new objects to the original file after calling this may also cause problems.
422
    // It is safe to update existing objects or stream contents after calling this method, e.g., to
423
    // include the final version number in metadata.
424
    QPDF_DLL
425
    std::string getFinalVersion();
426
427
    // Write the final file. There is no expectation of being able to call write() more than once.
428
    QPDF_DLL
429
    void write();
430
431
    // Return renumbered ObjGen that was written into the final file. This method can be used after
432
    // calling write().
433
    QPDF_DLL
434
    QPDFObjGen getRenumberedObjGen(QPDFObjGen);
435
436
    // Return XRef entry that was written into the final file. This method can be used after calling
437
    // write().
438
    QPDF_DLL
439
    std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable();
440
441
    // The following structs / classes are not part of the public API.
442
    struct Object;
443
    struct NewObject;
444
    class ObjTable;
445
    class NewObjTable;
446
447
  private:
448
    friend class qpdf::Writer;
449
450
    class Members;
451
452
    std::shared_ptr<Members> m;
453
};
454
455
#endif // QPDFWRITER_HH