Coverage Report

Created: 2026-06-15 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/include/qpdf/QPDF.hh
Line
Count
Source
1
// Copyright (c) 2005-2021 Jay Berkenbilt
2
// Copyright (c) 2022-2026 Jay Berkenbilt and Manfred Holger
3
//
4
// This file is part of qpdf.
5
//
6
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
7
// in compliance with the License. You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software distributed under the License
12
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
13
// or implied. See the License for the specific language governing permissions and limitations under
14
// the License.
15
//
16
// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
17
// License. At your option, you may continue to consider qpdf to be licensed under those terms.
18
// Please see the manual for additional information.
19
20
#ifndef QPDF_HH
21
#define QPDF_HH
22
23
#include <qpdf/DLL.h>
24
#include <qpdf/Types.h>
25
26
#include <bitset>
27
#include <cstdio>
28
#include <functional>
29
#include <iostream>
30
#include <list>
31
#include <map>
32
#include <memory>
33
#include <string>
34
#include <string_view>
35
#include <vector>
36
37
#include <qpdf/Buffer.hh>
38
#include <qpdf/InputSource.hh>
39
#include <qpdf/PDFVersion.hh>
40
#include <qpdf/QPDFExc.hh>
41
#include <qpdf/QPDFObjGen.hh>
42
#include <qpdf/QPDFObjectHandle.hh>
43
#include <qpdf/QPDFStreamFilter.hh>
44
#include <qpdf/QPDFTokenizer.hh>
45
#include <qpdf/QPDFWriter.hh>
46
#include <qpdf/QPDFXRefEntry.hh>
47
48
class QPDFLogger;
49
50
class QPDF
51
{
52
  public:
53
    // Get the current version of the QPDF software. See also qpdf/DLL.h
54
    QPDF_DLL
55
    static std::string const& QPDFVersion();
56
57
    QPDF_DLL
58
    QPDF();
59
    QPDF_DLL
60
    ~QPDF();
61
62
    QPDF_DLL
63
    static std::shared_ptr<QPDF> create();
64
65
    // Associate a file with a QPDF object and do initial parsing of the file.  PDF objects are not
66
    // read until they are needed.  A QPDF object may be associated with only one file in its
67
    // lifetime.  This method must be called before any methods that potentially ask for information
68
    // about the PDF file are called. Prior to successfully calling this, the only methods that are
69
    // allowed are those that set parameters.  If the input file is not encrypted, either a null
70
    // password or an empty password can be used.  If the file is encrypted, either the user
71
    // password or the owner password may be supplied. The method setPasswordIsHexKey may be called
72
    // prior to calling this method or any of the other process methods to force the password to be
73
    // interpreted as a raw encryption key. See comments on setPasswordIsHexKey for more
74
    // information.
75
    QPDF_DLL
76
    void processFile(char const* filename, char const* password = nullptr);
77
78
    // Parse a PDF from a stdio FILE*.  The FILE must be open in binary mode and must be seekable.
79
    // It may be open read only. This works exactly like processFile except that the PDF file is
80
    // read from an already opened FILE*.  If close_file is true, the file will be closed at the
81
    // end.  Otherwise, the caller is responsible for closing the file.
82
    QPDF_DLL
83
    void processFile(
84
        char const* description, FILE* file, bool close_file, char const* password = nullptr);
85
86
    // Parse a PDF file loaded into a memory buffer.  This works exactly like processFile except
87
    // that the PDF file is in memory instead of on disk.  The description appears in any warning or
88
    // error message in place of the file name. The buffer is owned by the caller and must remain
89
    // valid for the lifetime of the QPDF object.
90
    QPDF_DLL
91
    void processMemoryFile(
92
        char const* description, char const* buf, size_t length, char const* password = nullptr);
93
94
    // Parse a PDF file loaded from a custom InputSource.  If you have your own method of retrieving
95
    // a PDF file, you can subclass InputSource and use this method.
96
    QPDF_DLL
97
    void processInputSource(std::shared_ptr<InputSource>, char const* password = nullptr);
98
99
    // Create a PDF from an input source that contains JSON as written by writeJSON (or qpdf
100
    // --json-output, version 2 or higher). The JSON must be a complete representation of a PDF. See
101
    // "qpdf JSON" in the manual for details. The input JSON may be arbitrarily large. QPDF does not
102
    // load stream data into memory for more than one stream at a time, even if the stream data is
103
    // specified inline.
104
    QPDF_DLL
105
    void createFromJSON(std::string const& json_file);
106
    QPDF_DLL
107
    void createFromJSON(std::shared_ptr<InputSource>);
108
109
    // Update a PDF from an input source that contains JSON in the same format as is written by
110
    // writeJSON (or qpdf --json-output, version 2 or higher). Objects in the PDF and not in the
111
    // JSON are not modified. See "qpdf JSON" in the manual for details. As with createFromJSON, the
112
    // input JSON may be arbitrarily large.
113
    QPDF_DLL
114
    void updateFromJSON(std::string const& json_file);
115
    QPDF_DLL
116
    void updateFromJSON(std::shared_ptr<InputSource>);
117
118
    // Write qpdf JSON format to the pipeline "p". The only supported version is 2. The finish()
119
    // method is not called on the pipeline.
120
    //
121
    // The decode_level parameter controls which streams are uncompressed in the JSON. Use
122
    // qpdf_dl_none to preserve all stream data exactly as it appears in the input. The possible
123
    // values for json_stream_data can be found in qpdf/Constants.h and correspond to the
124
    // --json-stream-data command-line argument. If json_stream_data is qpdf_sj_file, file_prefix
125
    // must be specified. Each stream will be written to a file whose path is constructed by
126
    // appending "-nnn" to file_prefix, where "nnn" is the object number (not zero-filled). If
127
    // wanted_objects is empty, write all objects. Otherwise, write only objects whose keys are in
128
    // wanted_objects. Keys may be either "trailer" or of the form "obj:n n R". Invalid keys are
129
    // ignored. This corresponds to the --json-object command-line argument.
130
    //
131
    // QPDF is efficient with regard to memory when writing, allowing you to write arbitrarily large
132
    // PDF files to a pipeline. You can use a pipeline like Pl_Buffer or Pl_String to capture the
133
    // JSON output in memory, but do so with caution as this will allocate enough memory to hold the
134
    // entire PDF file.
135
    QPDF_DLL
136
    void writeJSON(
137
        int version,
138
        Pipeline* p,
139
        qpdf_stream_decode_level_e decode_level,
140
        qpdf_json_stream_data_e json_stream_data,
141
        std::string const& file_prefix,
142
        std::set<std::string> wanted_objects);
143
144
    // This version of writeJSON enables writing only the "qpdf" key of an in-progress dictionary.
145
    // If the value of "complete" is true, a complete JSON object containing only the "qpdf" key is
146
    // written to the pipeline. If the value of "complete" is false, the "qpdf" key and its value
147
    // are written to the pipeline assuming that a dictionary is already open. The parameter
148
    // first_key indicates whether this is the first key in an in-progress dictionary. It will be
149
    // set to false by writeJSON. The "qpdf" key and value are written as if at depth 1 in a
150
    // prettified JSON output. Remaining arguments are the same as the above version.
151
    QPDF_DLL
152
    void writeJSON(
153
        int version,
154
        Pipeline* p,
155
        bool complete,
156
        bool& first_key,
157
        qpdf_stream_decode_level_e decode_level,
158
        qpdf_json_stream_data_e json_stream_data,
159
        std::string const& file_prefix,
160
        std::set<std::string> wanted_objects);
161
162
    // Close or otherwise release the input source. Once this has been called, no other methods of
163
    // qpdf can be called safely except for getWarnings and anyWarnings(). After this has been
164
    // called, it is safe to perform operations on the input file such as deleting or renaming it.
165
    QPDF_DLL
166
    void closeInputSource();
167
168
    // For certain forensic or investigatory purposes, it may sometimes be useful to specify the
169
    // encryption key directly, even though regular PDF applications do not provide a way to do
170
    // this. Calling setPasswordIsHexKey(true) before calling any of the process methods will bypass
171
    // the normal encryption key computation or recovery mechanisms and interpret the bytes in the
172
    // password as a hex-encoded encryption key. Note that we hex-encode the key because it may
173
    // contain null bytes and therefore can't be represented in a char const*.
174
    QPDF_DLL
175
    void setPasswordIsHexKey(bool);
176
177
    // Create a QPDF object for an empty PDF.  This PDF has no pages or objects other than a minimal
178
    // trailer, a document catalog, and a /Pages tree containing zero pages.  Pages and other
179
    // objects can be added to the file in the normal way, and the trailer and document catalog can
180
    // be mutated.  Calling this method is equivalent to calling processFile on an equivalent PDF
181
    // file.  See the pdf-create.cc example for a demonstration of how to use this method to create
182
    // a PDF file from scratch.
183
    QPDF_DLL
184
    void emptyPDF();
185
186
    // From 10.1: register a new filter implementation for a specific stream filter. You can add
187
    // your own implementations for new filter types or override existing ones provided by the
188
    // library. Registered stream filters are used for decoding only as you can override encoding
189
    // with stream data providers. For example, you could use this method to add support for one of
190
    // the other filter types by using additional third-party libraries that qpdf does not presently
191
    // use. The standard filters are implemented using QPDFStreamFilter classes.
192
    QPDF_DLL
193
    static void registerStreamFilter(
194
        std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory);
195
196
    // Parameter settings
197
198
    // To capture or redirect output, configure the logger returned by getLogger(). By default, all
199
    // QPDF and QPDFJob objects share the global logger. If you need a private logger for some
200
    // reason, pass a new one to setLogger(). See comments in QPDFLogger.hh for details on
201
    // configuring the logger.
202
    //
203
    // Note that no normal QPDF operations generate output to standard output, so for applications
204
    // that just wish to avoid creating output for warnings and don't call any check functions,
205
    // calling setSuppressWarnings(true) is sufficient.
206
    QPDF_DLL
207
    std::shared_ptr<QPDFLogger> getLogger();
208
    QPDF_DLL
209
    void setLogger(std::shared_ptr<QPDFLogger>);
210
211
    // This deprecated method is the old way to capture output, but it didn't capture all output.
212
    // See comments above for getLogger and setLogger. This will be removed in QPDF 12. For now, it
213
    // configures a private logger, separating this object from the default logger, and calls
214
    // setOutputStreams on that logger. See QPDFLogger.hh for additional details.
215
    [[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void
216
    setOutputStreams(std::ostream* out_stream, std::ostream* err_stream);
217
218
    // If true, ignore any cross-reference streams in a hybrid file (one that contains both
219
    // cross-reference streams and cross-reference tables).  This can be useful for testing to
220
    // ensure that a hybrid file would work with an older reader.
221
    QPDF_DLL
222
    void setIgnoreXRefStreams(bool);
223
224
    // By default, any warnings are issued to std::cerr or the error stream specified in a call to
225
    // setOutputStreams as they are encountered.  If this method is called with a true value,
226
    // reporting of warnings is suppressed.  You may still retrieve warnings by calling getWarnings.
227
    QPDF_DLL
228
    void setSuppressWarnings(bool);
229
230
    // Set the maximum number of warnings. A QPDFExc is thrown if the limit is exceeded, in which
231
    // case further use of the QPDF object is not permitted. This limit only applies during the
232
    // initial processing of an input file and is intended to limit the resources used to process
233
    // (possibly intentionally) severely damaged files that have little chance of being successfully
234
    // repaired.
235
    //
236
    // This option must only be called before calling `processFile` or any of the other process
237
    // methods. Once `processFile` successfully completes the limit is automatically canceled.
238
    QPDF_DLL
239
    void setMaxWarnings(size_t);
240
241
    // By default, QPDF will try to recover if it finds certain types of errors in PDF files.  If
242
    // turned off, it will throw an exception on the first such problem it finds without attempting
243
    // recovery.
244
    QPDF_DLL
245
    void setAttemptRecovery(bool);
246
247
    // Tell other QPDF objects that streams copied from this QPDF need to be fully copied when
248
    // copyForeignObject is called on them. Calling setIgnoreXRefStreams(true) on a QPDF object
249
    // makes it possible for the object and its input source to disappear before streams copied from
250
    // it are written with the destination QPDF object. Confused? Ordinarily, if you are going to
251
    // copy objects from a source QPDF object to a destination QPDF object using copyForeignObject
252
    // or addPage, the source object's input source must stick around until after the destination
253
    // PDF is written. If you call this method on the source QPDF object, it sends a signal to the
254
    // destination object that it must fully copy the stream data when copyForeignObject. It will do
255
    // this by making a copy in RAM. Ordinarily the stream data is copied lazily to avoid
256
    // unnecessary duplication of the stream data. Note that the stream data is copied into RAM only
257
    // once regardless of how many objects the stream is copied into. The result is that, if you
258
    // called setImmediateCopyFrom(true) on a given QPDF object prior to copying any of its streams,
259
    // you do not need to keep it or its input source around after copying its objects to another
260
    // QPDF. This is true even if the source streams use StreamDataProvider. Note that this method
261
    // is called on the QPDF object you are copying FROM, not the one you are copying to. The
262
    // reasoning for this is that there's no reason a given QPDF may not get objects copied to it
263
    // from a variety of other objects, some transient and some not. Since what's relevant is
264
    // whether the source QPDF is transient, the method must be called on the source QPDF, not the
265
    // destination one. This method will make a copy of the stream in RAM, so be sure you have
266
    // enough memory to simultaneously hold all the streams you're copying.
267
    QPDF_DLL
268
    void setImmediateCopyFrom(bool);
269
270
    // Other public methods
271
272
    // Return the list of warnings that have been issued so far and clear the list.  This method may
273
    // be called even if processFile throws an exception.  Note that if setSuppressWarnings was not
274
    // called or was called with a false value, any warnings retrieved here will have already been
275
    // output.
276
    QPDF_DLL
277
    std::vector<QPDFExc> getWarnings();
278
279
    // Indicate whether any warnings have been issued so far. Does not clear the list of warnings.
280
    QPDF_DLL
281
    bool anyWarnings() const;
282
283
    // Indicate the number of warnings that have been issued since the last call to getWarnings.
284
    // Does not clear the list of warnings.
285
    QPDF_DLL
286
    size_t numWarnings() const;
287
288
    // Return an application-scoped unique ID for this QPDF object. This is not a globally unique
289
    // ID. It is constructed using a timestamp and a random number and is intended to be unique
290
    // among QPDF objects that are created by a single run of an application. While it's very likely
291
    // that these are actually globally unique, it is not recommended to use them for long-term
292
    // purposes.
293
    QPDF_DLL
294
    unsigned long long getUniqueId() const;
295
296
    // Issue a warning on behalf of this QPDF object. It will be emitted with other warnings,
297
    // following warning suppression rules, and it will be available with getWarnings().
298
    QPDF_DLL
299
    void warn(QPDFExc const& e);
300
    // Same as above but creates the QPDFExc object using the arguments passed to warn. The filename
301
    // argument to QPDFExc is omitted. This method uses the filename associated with the QPDF
302
    // object.
303
    QPDF_DLL
304
    void warn(
305
        qpdf_error_code_e error_code,
306
        std::string const& object,
307
        qpdf_offset_t offset,
308
        std::string const& message);
309
310
    // Return the filename associated with the QPDF object.
311
    QPDF_DLL
312
    std::string getFilename() const;
313
    // Return PDF Version and extension level together as a PDFVersion object
314
    QPDF_DLL
315
    PDFVersion getVersionAsPDFVersion();
316
    // Return just the PDF version from the file
317
    QPDF_DLL
318
    std::string getPDFVersion() const;
319
    QPDF_DLL
320
    int getExtensionLevel();
321
    QPDF_DLL
322
    QPDFObjectHandle getTrailer();
323
    QPDF_DLL
324
    QPDFObjectHandle getRoot();
325
    QPDF_DLL
326
    std::map<QPDFObjGen, QPDFXRefEntry> getXRefTable();
327
328
    // Public factory methods
329
330
    // Create a new stream.  A subsequent call must be made to replaceStreamData() to provide data
331
    // for the stream.  The stream's dictionary may be retrieved by calling getDict(), and the
332
    // resulting dictionary may be modified.  Alternatively, you can create a new dictionary and
333
    // call replaceDict to install it.
334
    QPDF_DLL
335
    QPDFObjectHandle newStream();
336
337
    // Create a new stream.  Use the given buffer as the stream data.  The stream dictionary's
338
    // /Length key will automatically be set to the size of the data buffer.  If additional keys are
339
    // required, the stream's dictionary may be retrieved by calling getDict(), and the resulting
340
    // dictionary may be modified.  This method is just a convenient wrapper around the newStream()
341
    // and replaceStreamData().  It is a convenience methods for streams that require no parameters
342
    // beyond the stream length. Note that you don't have to deal with compression yourself if you
343
    // use QPDFWriter.  By default, QPDFWriter will automatically compress uncompressed stream data.
344
    // Example programs are provided that illustrate this.
345
    QPDF_DLL
346
    QPDFObjectHandle newStream(std::shared_ptr<Buffer> data);
347
348
    // Create new stream with data from string.  This method will create a copy of the data rather
349
    // than using the user-provided buffer as in the std::shared_ptr<Buffer> version of newStream.
350
    QPDF_DLL
351
    QPDFObjectHandle newStream(std::string const& data);
352
353
    // A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is
354
    // going to be added to the QPDF object.  Normally you don't have to use this type since you can
355
    // just call QPDF::makeIndirectObject.  However, in some cases, if you have to create objects
356
    // with circular references, you may need to create a reserved object so that you can have a
357
    // reference to it and then replace the object later.  Reserved objects have the special
358
    // property that they can't be resolved to direct objects.  This makes it possible to replace a
359
    // reserved object with a new object while preserving existing references to them.  When you are
360
    // ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this
361
    // purpose rather than the more general QPDF::replaceObject.  It is an error to try to write a
362
    // QPDF with QPDFWriter if it has any reserved objects in it.
363
    QPDF_DLL
364
    QPDFObjectHandle newReserved();
365
    QPDF_DLL
366
    QPDFObjectHandle newIndirectNull();
367
368
    // Install this object handle as an indirect object and return an indirect reference to it.
369
    QPDF_DLL
370
    QPDFObjectHandle makeIndirectObject(QPDFObjectHandle);
371
372
    // Retrieve an object by object ID and generation. Returns an indirect reference to it. The
373
    // getObject() methods were added for qpdf 11.
374
    QPDF_DLL
375
    QPDFObjectHandle getObject(QPDFObjGen);
376
    QPDF_DLL
377
    QPDFObjectHandle getObject(int objid, int generation);
378
    // These are older methods, but there is no intention to deprecate
379
    // them.
380
    QPDF_DLL
381
    QPDFObjectHandle getObjectByObjGen(QPDFObjGen);
382
    QPDF_DLL
383
    QPDFObjectHandle getObjectByID(int objid, int generation);
384
385
    // Replace the object with the given object id with the given object. The object handle passed
386
    // in must be a direct object, though it may contain references to other indirect objects within
387
    // it. Prior to qpdf 10.2.1, after calling this method, existing QPDFObjectHandle instances that
388
    // pointed to the original object still pointed to the original object, resulting in confusing
389
    // and incorrect behavior. This was fixed in 10.2.1, so existing QPDFObjectHandle objects will
390
    // start pointing to the newly replaced object. Note that replacing an object with
391
    // QPDFObjectHandle::newNull() effectively removes the object from the file since a non-existent
392
    // object is treated as a null object. To replace a reserved object, call replaceReserved
393
    // instead.
394
    QPDF_DLL
395
    void replaceObject(QPDFObjGen og, QPDFObjectHandle);
396
    QPDF_DLL
397
    void replaceObject(int objid, int generation, QPDFObjectHandle);
398
399
    // Swap two objects given by ID. Prior to qpdf 10.2.1, existing QPDFObjectHandle instances that
400
    // reference them objects not notice the swap, but this was fixed in 10.2.1.
401
    QPDF_DLL
402
    void swapObjects(QPDFObjGen og1, QPDFObjGen og2);
403
    QPDF_DLL
404
    void swapObjects(int objid1, int generation1, int objid2, int generation2);
405
406
    // Replace a reserved object.  This is a wrapper around replaceObject but it guarantees that the
407
    // underlying object is a reserved object or a null object.  After this call, reserved will
408
    // be a reference to replacement.
409
    QPDF_DLL
410
    void replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement);
411
412
    // Copy an object from another QPDF to this one. Starting with qpdf version 8.3.0, it is no
413
    // longer necessary to keep the original QPDF around after the call to copyForeignObject as long
414
    // as the source of any copied stream data is still available. Usually this means you just have
415
    // to keep the input file around, not the QPDF object. The exception to this is if you copy a
416
    // stream that gets its data from a QPDFObjectHandle::StreamDataProvider. In this case only, the
417
    // original stream's QPDF object must stick around because the QPDF object is itself the source
418
    // of the original stream data. For a more in-depth discussion, please see the TODO file.
419
    // Starting in 8.4.0, you can call setImmediateCopyFrom(true) on the SOURCE QPDF object (the one
420
    // you're copying FROM). If you do this prior to copying any of its objects, then neither the
421
    // source QPDF object nor its input source needs to stick around at all regardless of the
422
    // source. The cost is that the stream data is copied into RAM at the time copyForeignObject is
423
    // called. See setImmediateCopyFrom for more information.
424
    //
425
    // The return value of this method is an indirect reference to the copied object in this file.
426
    // This method is intended to be used to copy non-page objects. To copy page objects, pass the
427
    // foreign page object directly to addPage (or addPageAt). If you copy objects that contain
428
    // references to pages, you should copy the pages first using addPage(At). Otherwise references
429
    // to the pages that have not been copied will be replaced with nulls. It is possible to use
430
    // copyForeignObject on page objects if you are not going to use them as pages. Doing so copies
431
    // the object normally but does not update the page structure. For example, it is a valid use
432
    // case to use copyForeignObject for a page that you are going to turn into a form XObject,
433
    // though you can also use QPDFPageObjectHelper::getFormXObjectForPage for that purpose.
434
    //
435
    // When copying objects with this method, object structure will be preserved, so all indirectly
436
    // referenced indirect objects will be copied as well.  This includes any circular references
437
    // that may exist.  The QPDF object keeps a record of what has already been copied, so shared
438
    // objects will not be copied multiple times.  This also means that if you mutate an object that
439
    // has already been copied and try to copy it again, it won't work since the modified object
440
    // will not be recopied.  Therefore, you should do all mutation on the original file that you
441
    // are going to do before you start copying its objects to a new file.
442
    QPDF_DLL
443
    QPDFObjectHandle copyForeignObject(QPDFObjectHandle foreign);
444
445
    // Encryption support
446
447
    enum encryption_method_e { e_none, e_unknown, e_rc4, e_aes, e_aesv3 };
448
449
    // To be removed from the public API in qpdf 13. See
450
    // <https:manual.qpdf.org/release-notes.html#r12-3-0-deprecate>.
451
    class EncryptionData
452
    {
453
      public:
454
        // This class holds data read from the encryption dictionary.
455
        EncryptionData(
456
            int V,
457
            int R,
458
            int Length_bytes,
459
            int P,
460
            std::string const& O,
461
            std::string const& U,
462
            std::string const& OE,
463
            std::string const& UE,
464
            std::string const& Perms,
465
            std::string const& id1,
466
            bool encrypt_metadata) :
467
            V(V),
468
            R(R),
469
            Length_bytes(Length_bytes),
470
            P(P),
471
            O(O),
472
            U(U),
473
            OE(OE),
474
            UE(UE),
475
            Perms(Perms),
476
            id1(id1),
477
            encrypt_metadata(encrypt_metadata)
478
0
        {
479
0
        }
480
481
        int getV() const;
482
        int getR() const;
483
        int getLengthBytes() const;
484
        int getP() const;
485
        std::string const& getO() const;
486
        std::string const& getU() const;
487
        std::string const& getOE() const;
488
        std::string const& getUE() const;
489
        std::string const& getPerms() const;
490
        std::string const& getId1() const;
491
        bool getEncryptMetadata() const;
492
493
        void setO(std::string const&);
494
        void setU(std::string const&);
495
        void setV5EncryptionParameters(
496
            std::string const& O,
497
            std::string const& OE,
498
            std::string const& U,
499
            std::string const& UE,
500
            std::string const& Perms);
501
502
      private:
503
        EncryptionData(EncryptionData const&) = delete;
504
        EncryptionData& operator=(EncryptionData const&) = delete;
505
506
        int V;
507
        int R;
508
        int Length_bytes;
509
        int P;
510
        std::string O;
511
        std::string U;
512
        std::string OE;
513
        std::string UE;
514
        std::string Perms;
515
        std::string id1;
516
        bool encrypt_metadata;
517
    };
518
    QPDF_DLL
519
    bool isEncrypted() const;
520
521
    QPDF_DLL
522
    bool isEncrypted(int& R, int& P);
523
524
    QPDF_DLL
525
    bool isEncrypted(
526
        int& R,
527
        int& P,
528
        int& V,
529
        encryption_method_e& stream_method,
530
        encryption_method_e& string_method,
531
        encryption_method_e& file_method);
532
533
    QPDF_DLL
534
    bool ownerPasswordMatched() const;
535
536
    QPDF_DLL
537
    bool userPasswordMatched() const;
538
539
    // Encryption permissions -- not enforced by QPDF
540
    QPDF_DLL
541
    bool allowAccessibility();
542
    QPDF_DLL
543
    bool allowExtractAll();
544
    QPDF_DLL
545
    bool allowPrintLowRes();
546
    QPDF_DLL
547
    bool allowPrintHighRes();
548
    QPDF_DLL
549
    bool allowModifyAssembly();
550
    QPDF_DLL
551
    bool allowModifyForm();
552
    QPDF_DLL
553
    bool allowModifyAnnotation();
554
    QPDF_DLL
555
    bool allowModifyOther();
556
    QPDF_DLL
557
    bool allowModifyAll();
558
559
    // Helper function to trim padding from user password.  Calling trim_user_password on the result
560
    // of getPaddedUserPassword gives getTrimmedUserPassword's result.
561
    QPDF_DLL
562
    static void trim_user_password(std::string& user_password);
563
    QPDF_DLL
564
    static std::string compute_data_key(
565
        std::string const& encryption_key,
566
        int objid,
567
        int generation,
568
        bool use_aes,
569
        int encryption_V,
570
        int encryption_R);
571
572
    // To be removed in qpdf 13. See <https:manual.qpdf.org/release-notes.html#r12-3-0-deprecate>.
573
    [[deprecated("to be removed in qpdf 13")]]
574
    QPDF_DLL static std::string
575
    compute_encryption_key(std::string const& password, EncryptionData const& data);
576
577
    QPDF_DLL
578
    static void compute_encryption_O_U(
579
        char const* user_password,
580
        char const* owner_password,
581
        int V,
582
        int R,
583
        int key_len,
584
        int P,
585
        bool encrypt_metadata,
586
        std::string const& id1,
587
        std::string& O,
588
        std::string& U);
589
    QPDF_DLL
590
    static void compute_encryption_parameters_V5(
591
        char const* user_password,
592
        char const* owner_password,
593
        int V,
594
        int R,
595
        int key_len,
596
        int P,
597
        bool encrypt_metadata,
598
        std::string const& id1,
599
        std::string& encryption_key,
600
        std::string& O,
601
        std::string& U,
602
        std::string& OE,
603
        std::string& UE,
604
        std::string& Perms);
605
    // Return the full user password as stored in the PDF file.  For files encrypted with 40-bit or
606
    // 128-bit keys, the user password can be recovered when the file is opened using the owner
607
    // password.  This is not possible with newer encryption formats. If you are attempting to
608
    // recover the user password in a user-presentable form, call getTrimmedUserPassword() instead.
609
    QPDF_DLL
610
    std::string const& getPaddedUserPassword() const;
611
    // Return human-readable form of user password subject to same limitations as
612
    // getPaddedUserPassword().
613
    QPDF_DLL
614
    std::string getTrimmedUserPassword() const;
615
    // Return the previously computed or retrieved encryption key for this file
616
    QPDF_DLL
617
    std::string getEncryptionKey() const;
618
    // Remove security restrictions associated with digitally signed files. From qpdf 11.7.0, this
619
    // is called by QPDFAcroFormDocumentHelper::disableDigitalSignatures and is more useful when
620
    // called from there than when just called by itself.
621
    QPDF_DLL
622
    void removeSecurityRestrictions();
623
624
    // Linearization support
625
626
    // Returns true iff the file starts with a linearization parameter dictionary.  Does no
627
    // additional validation.
628
    QPDF_DLL
629
    bool isLinearized();
630
631
    // Performs various sanity checks on a linearized file. Return true if no errors or warnings.
632
    // Otherwise, return false and output errors and warnings to the default output stream
633
    // (std::cout or whatever is configured in the logger). It is recommended for linearization
634
    // errors to be treated as warnings.
635
    QPDF_DLL
636
    bool checkLinearization();
637
638
    // Calls checkLinearization() and, if possible, prints normalized contents of some of the hints
639
    // tables to the default output stream. Normalization includes adding min values to delta values
640
    // and adjusting offsets based on the location and size of the primary hint stream.
641
    QPDF_DLL
642
    void showLinearizationData();
643
644
    // Shows the contents of the cross-reference table
645
    QPDF_DLL
646
    void showXRefTable();
647
648
    // Starting from qpdf 11.0 user code should not need to call this method. Before 11.0 this
649
    // method was used to detect all indirect references to objects that don't exist and resolve
650
    // them by replacing them with null, which is how the PDF spec says to interpret such dangling
651
    // references. This method is called automatically when you try to add any new objects, if you
652
    // call getAllObjects, and before a file is written. The qpdf object caches whether it has run
653
    // this to avoid running it multiple times. Before 11.2.1 you could pass true to force it to run
654
    // again if you had explicitly added new objects that may have additional dangling references.
655
    QPDF_DLL
656
    void fixDanglingReferences(bool force = false);
657
658
    // Return the approximate number of indirect objects. It is/ approximate because not all objects
659
    // in the file are preserved in all cases, and gaps in object numbering are not preserved.
660
    QPDF_DLL
661
    size_t getObjectCount();
662
663
    // Returns a list of indirect objects for every object in the xref table. Useful for discovering
664
    // objects that are not otherwise referenced.
665
    QPDF_DLL
666
    std::vector<QPDFObjectHandle> getAllObjects();
667
668
    // Optimization support -- see doc/optimization.  Implemented in QPDF_optimization.cc
669
670
    // The object_stream_data map maps from a "compressed" object to the object stream that contains
671
    // it. This enables optimize to populate the object <-> user maps with only uncompressed
672
    // objects. If allow_changes is false, an exception will be thrown if any changes are made
673
    // during the optimization process. This is available so that the test suite can make sure that
674
    // a linearized file is already optimized. When called in this way, optimize() still populates
675
    // the object <-> user maps. The optional skip_stream_parameters parameter, if present, is
676
    // called for each stream object. The function should return 2 if optimization should discard
677
    // /Length, /Filter, and /DecodeParms; 1 if it should discard /Length, and 0 if it should
678
    // preserve all keys. This is used by QPDFWriter to avoid creation of dangling objects for
679
    // stream dictionary keys it will be regenerating.
680
    [[deprecated("Unused - see release notes for qpdf 12.1.0")]] QPDF_DLL void optimize(
681
        std::map<int, int> const& object_stream_data,
682
        bool allow_changes = true,
683
        std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr);
684
685
    // Traverse page tree return all /Page objects. It also detects and resolves cases in which the
686
    // same /Page object is duplicated. For efficiency, this method returns a const reference to an
687
    // internal vector of pages. Calls to addPage, addPageAt, and removePage safely update this, but
688
    // direct manipulation of the pages tree or pushing inheritable objects to the page level may
689
    // invalidate it. See comments for updateAllPagesCache() for additional notes. Newer code should
690
    // use QPDFPageDocumentHelper::getAllPages instead. The decision to expose this internal cache
691
    // was arguably incorrect, but it is being left here for compatibility. It is, however,
692
    // completely safe to use this for files that you are not modifying.
693
    QPDF_DLL
694
    std::vector<QPDFObjectHandle> const& getAllPages();
695
696
    QPDF_DLL
697
    bool everCalledGetAllPages() const;
698
    QPDF_DLL
699
    bool everPushedInheritedAttributesToPages() const;
700
701
    // These methods, given a page object or its object/generation number, returns the 0-based index
702
    // into the array returned by getAllPages() for that page. An exception is thrown if the page is
703
    // not found.
704
    QPDF_DLL
705
    int findPage(QPDFObjGen og);
706
    QPDF_DLL
707
    int findPage(QPDFObjectHandle& page);
708
709
    // This method synchronizes QPDF's cache of the page structure with the actual /Pages tree.  If
710
    // you restrict changes to the /Pages tree, including addition, removal, or replacement of pages
711
    // or changes to any /Pages objects, to calls to these page handling APIs, you never need to
712
    // call this method.  If you modify /Pages structures directly, you must call this method
713
    // afterwards.  This method updates the internal list of pages, so after calling this method,
714
    // any previous references returned by getAllPages() will be valid again.  It also resets any
715
    // state about having pushed inherited attributes in /Pages objects down to the pages, so if you
716
    // add any inheritable attributes to a /Pages object, you should also call this method.
717
    QPDF_DLL
718
    void updateAllPagesCache();
719
720
    // Legacy handling API. These methods are not going anywhere, and you should feel free to
721
    // continue using them if it simplifies your code. Newer code should make use of
722
    // QPDFPageDocumentHelper instead as future page handling methods will be added there. The
723
    // functionality and specification of these legacy methods is identical to the identically named
724
    // methods there, except that these versions use QPDFObjectHandle instead of
725
    // QPDFPageObjectHelper, so please see comments in that file for descriptions. There are
726
    // subtleties you need to know about, so please look at the comments there.
727
    QPDF_DLL
728
    void pushInheritedAttributesToPage();
729
    QPDF_DLL
730
    void addPage(QPDFObjectHandle newpage, bool first);
731
    QPDF_DLL
732
    void addPageAt(QPDFObjectHandle newpage, bool before, QPDFObjectHandle refpage);
733
    QPDF_DLL
734
    void removePage(QPDFObjectHandle page);
735
    // End legacy page helpers
736
737
    // End of the public API. The following classes and methods are for qpdf internal use only.
738
739
    class Doc;
740
741
    inline Doc& doc();
742
743
    // For testing only -- do not add to DLL
744
    static bool test_json_validators();
745
746
  private:
747
    // It has never been safe to copy QPDF objects as there is code in the library that assumes
748
    // there are no copies of a QPDF object. Copying QPDF objects was not prevented by the API until
749
    // qpdf 11. If you have been copying QPDF objects, use std::shared_ptr<QPDF> instead. From qpdf
750
    // 11, you can use QPDF::create to create them.
751
    QPDF(QPDF const&) = delete;
752
    QPDF& operator=(QPDF const&) = delete;
753
754
    static std::string const qpdf_version;
755
756
    class ObjCache;
757
    class EncryptionParameters;
758
    class StringDecrypter;
759
    class ResolveRecorder;
760
    class JSONReactor;
761
762
    void removeObject(QPDFObjGen og);
763
764
    // Calls finish() on the pipeline when done but does not delete it
765
    bool pipeStreamData(
766
        QPDFObjGen og,
767
        qpdf_offset_t offset,
768
        size_t length,
769
        QPDFObjectHandle dict,
770
        bool is_root_metadata,
771
        Pipeline* pipeline,
772
        bool suppress_warnings,
773
        bool will_retry);
774
    static bool pipeStreamData(
775
        std::shared_ptr<QPDF::EncryptionParameters> encp,
776
        std::shared_ptr<InputSource> file,
777
        QPDF& qpdf_for_warning,
778
        QPDFObjGen og,
779
        qpdf_offset_t offset,
780
        size_t length,
781
        QPDFObjectHandle dict,
782
        bool is_root_metadata,
783
        Pipeline* pipeline,
784
        bool suppress_warnings,
785
        bool will_retry);
786
787
    // methods to support encryption -- implemented in QPDF_encryption.cc
788
    void initializeEncryption();
789
    static std::string
790
    getKeyForObject(std::shared_ptr<EncryptionParameters> encp, QPDFObjGen og, bool use_aes);
791
    void decryptString(std::string&, QPDFObjGen og);
792
    static void decryptStream(
793
        std::shared_ptr<EncryptionParameters> encp,
794
        std::shared_ptr<InputSource> file,
795
        QPDF& qpdf_for_warning,
796
        Pipeline*& pipeline,
797
        QPDFObjGen og,
798
        QPDFObjectHandle& stream_dict,
799
        bool is_root_metadata,
800
        std::unique_ptr<Pipeline>& heap);
801
802
    // JSON import
803
    void importJSON(std::shared_ptr<InputSource>, bool must_be_complete);
804
805
    class Members;
806
807
    // Keep all member variables inside the Members object, which we dynamically allocate. This
808
    // makes it possible to add new private members without breaking binary compatibility.
809
    std::unique_ptr<Members> m;
810
};
811
812
#endif // QPDF_HH