/src/qpdf/include/qpdf/QPDF.hh
Line | Count | Source |
1 | | // Copyright (c) 2005-2021 Jay Berkenbilt |
2 | | // Copyright (c) 2022-2026 Jay Berkenbilt and Manfred Holger |
3 | | // |
4 | | // This file is part of qpdf. |
5 | | // |
6 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
7 | | // in compliance with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
12 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
13 | | // or implied. See the License for the specific language governing permissions and limitations under |
14 | | // the License. |
15 | | // |
16 | | // Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic |
17 | | // License. At your option, you may continue to consider qpdf to be licensed under those terms. |
18 | | // Please see the manual for additional information. |
19 | | |
20 | | #ifndef QPDF_HH |
21 | | #define QPDF_HH |
22 | | |
23 | | #include <qpdf/DLL.h> |
24 | | #include <qpdf/Types.h> |
25 | | |
26 | | #include <bitset> |
27 | | #include <cstdio> |
28 | | #include <functional> |
29 | | #include <iostream> |
30 | | #include <list> |
31 | | #include <map> |
32 | | #include <memory> |
33 | | #include <string> |
34 | | #include <string_view> |
35 | | #include <vector> |
36 | | |
37 | | #include <qpdf/Buffer.hh> |
38 | | #include <qpdf/InputSource.hh> |
39 | | #include <qpdf/PDFVersion.hh> |
40 | | #include <qpdf/QPDFExc.hh> |
41 | | #include <qpdf/QPDFObjGen.hh> |
42 | | #include <qpdf/QPDFObjectHandle.hh> |
43 | | #include <qpdf/QPDFStreamFilter.hh> |
44 | | #include <qpdf/QPDFTokenizer.hh> |
45 | | #include <qpdf/QPDFWriter.hh> |
46 | | #include <qpdf/QPDFXRefEntry.hh> |
47 | | |
48 | | class QPDFLogger; |
49 | | |
50 | | class QPDF |
51 | | { |
52 | | public: |
53 | | // Get the current version of the QPDF software. See also qpdf/DLL.h |
54 | | QPDF_DLL |
55 | | static std::string const& QPDFVersion(); |
56 | | |
57 | | QPDF_DLL |
58 | | QPDF(); |
59 | | QPDF_DLL |
60 | | ~QPDF(); |
61 | | |
62 | | QPDF_DLL |
63 | | static std::shared_ptr<QPDF> create(); |
64 | | |
65 | | // Associate a file with a QPDF object and do initial parsing of the file. PDF objects are not |
66 | | // read until they are needed. A QPDF object may be associated with only one file in its |
67 | | // lifetime. This method must be called before any methods that potentially ask for information |
68 | | // about the PDF file are called. Prior to calling this, the only methods that are allowed are |
69 | | // those that set parameters. If the input file is not encrypted, either a null password or an |
70 | | // empty password can be used. If the file is encrypted, either the user password or the owner |
71 | | // password may be supplied. The method setPasswordIsHexKey may be called prior to calling this |
72 | | // method or any of the other process methods to force the password to be interpreted as a raw |
73 | | // encryption key. See comments on setPasswordIsHexKey for more information. |
74 | | QPDF_DLL |
75 | | void processFile(char const* filename, char const* password = nullptr); |
76 | | |
77 | | // Parse a PDF from a stdio FILE*. The FILE must be open in binary mode and must be seekable. |
78 | | // It may be open read only. This works exactly like processFile except that the PDF file is |
79 | | // read from an already opened FILE*. If close_file is true, the file will be closed at the |
80 | | // end. Otherwise, the caller is responsible for closing the file. |
81 | | QPDF_DLL |
82 | | void processFile( |
83 | | char const* description, FILE* file, bool close_file, char const* password = nullptr); |
84 | | |
85 | | // Parse a PDF file loaded into a memory buffer. This works exactly like processFile except |
86 | | // that the PDF file is in memory instead of on disk. The description appears in any warning or |
87 | | // error message in place of the file name. The buffer is owned by the caller and must remain |
88 | | // valid for the lifetime of the QPDF object. |
89 | | QPDF_DLL |
90 | | void processMemoryFile( |
91 | | char const* description, char const* buf, size_t length, char const* password = nullptr); |
92 | | |
93 | | // Parse a PDF file loaded from a custom InputSource. If you have your own method of retrieving |
94 | | // a PDF file, you can subclass InputSource and use this method. |
95 | | QPDF_DLL |
96 | | void processInputSource(std::shared_ptr<InputSource>, char const* password = nullptr); |
97 | | |
98 | | // Create a PDF from an input source that contains JSON as written by writeJSON (or qpdf |
99 | | // --json-output, version 2 or higher). The JSON must be a complete representation of a PDF. See |
100 | | // "qpdf JSON" in the manual for details. The input JSON may be arbitrarily large. QPDF does not |
101 | | // load stream data into memory for more than one stream at a time, even if the stream data is |
102 | | // specified inline. |
103 | | QPDF_DLL |
104 | | void createFromJSON(std::string const& json_file); |
105 | | QPDF_DLL |
106 | | void createFromJSON(std::shared_ptr<InputSource>); |
107 | | |
108 | | // Update a PDF from an input source that contains JSON in the same format as is written by |
109 | | // writeJSON (or qpdf --json-output, version 2 or higher). Objects in the PDF and not in the |
110 | | // JSON are not modified. See "qpdf JSON" in the manual for details. As with createFromJSON, the |
111 | | // input JSON may be arbitrarily large. |
112 | | QPDF_DLL |
113 | | void updateFromJSON(std::string const& json_file); |
114 | | QPDF_DLL |
115 | | void updateFromJSON(std::shared_ptr<InputSource>); |
116 | | |
117 | | // Write qpdf JSON format to the pipeline "p". The only supported version is 2. The finish() |
118 | | // method is not called on the pipeline. |
119 | | // |
120 | | // The decode_level parameter controls which streams are uncompressed in the JSON. Use |
121 | | // qpdf_dl_none to preserve all stream data exactly as it appears in the input. The possible |
122 | | // values for json_stream_data can be found in qpdf/Constants.h and correspond to the |
123 | | // --json-stream-data command-line argument. If json_stream_data is qpdf_sj_file, file_prefix |
124 | | // must be specified. Each stream will be written to a file whose path is constructed by |
125 | | // appending "-nnn" to file_prefix, where "nnn" is the object number (not zero-filled). If |
126 | | // wanted_objects is empty, write all objects. Otherwise, write only objects whose keys are in |
127 | | // wanted_objects. Keys may be either "trailer" or of the form "obj:n n R". Invalid keys are |
128 | | // ignored. This corresponds to the --json-object command-line argument. |
129 | | // |
130 | | // QPDF is efficient with regard to memory when writing, allowing you to write arbitrarily large |
131 | | // PDF files to a pipeline. You can use a pipeline like Pl_Buffer or Pl_String to capture the |
132 | | // JSON output in memory, but do so with caution as this will allocate enough memory to hold the |
133 | | // entire PDF file. |
134 | | QPDF_DLL |
135 | | void writeJSON( |
136 | | int version, |
137 | | Pipeline* p, |
138 | | qpdf_stream_decode_level_e decode_level, |
139 | | qpdf_json_stream_data_e json_stream_data, |
140 | | std::string const& file_prefix, |
141 | | std::set<std::string> wanted_objects); |
142 | | |
143 | | // This version of writeJSON enables writing only the "qpdf" key of an in-progress dictionary. |
144 | | // If the value of "complete" is true, a complete JSON object containing only the "qpdf" key is |
145 | | // written to the pipeline. If the value of "complete" is false, the "qpdf" key and its value |
146 | | // are written to the pipeline assuming that a dictionary is already open. The parameter |
147 | | // first_key indicates whether this is the first key in an in-progress dictionary. It will be |
148 | | // set to false by writeJSON. The "qpdf" key and value are written as if at depth 1 in a |
149 | | // prettified JSON output. Remaining arguments are the same as the above version. |
150 | | QPDF_DLL |
151 | | void writeJSON( |
152 | | int version, |
153 | | Pipeline* p, |
154 | | bool complete, |
155 | | bool& first_key, |
156 | | qpdf_stream_decode_level_e decode_level, |
157 | | qpdf_json_stream_data_e json_stream_data, |
158 | | std::string const& file_prefix, |
159 | | std::set<std::string> wanted_objects); |
160 | | |
161 | | // Close or otherwise release the input source. Once this has been called, no other methods of |
162 | | // qpdf can be called safely except for getWarnings and anyWarnings(). After this has been |
163 | | // called, it is safe to perform operations on the input file such as deleting or renaming it. |
164 | | QPDF_DLL |
165 | | void closeInputSource(); |
166 | | |
167 | | // For certain forensic or investigatory purposes, it may sometimes be useful to specify the |
168 | | // encryption key directly, even though regular PDF applications do not provide a way to do |
169 | | // this. Calling setPasswordIsHexKey(true) before calling any of the process methods will bypass |
170 | | // the normal encryption key computation or recovery mechanisms and interpret the bytes in the |
171 | | // password as a hex-encoded encryption key. Note that we hex-encode the key because it may |
172 | | // contain null bytes and therefore can't be represented in a char const*. |
173 | | QPDF_DLL |
174 | | void setPasswordIsHexKey(bool); |
175 | | |
176 | | // Create a QPDF object for an empty PDF. This PDF has no pages or objects other than a minimal |
177 | | // trailer, a document catalog, and a /Pages tree containing zero pages. Pages and other |
178 | | // objects can be added to the file in the normal way, and the trailer and document catalog can |
179 | | // be mutated. Calling this method is equivalent to calling processFile on an equivalent PDF |
180 | | // file. See the pdf-create.cc example for a demonstration of how to use this method to create |
181 | | // a PDF file from scratch. |
182 | | QPDF_DLL |
183 | | void emptyPDF(); |
184 | | |
185 | | // From 10.1: register a new filter implementation for a specific stream filter. You can add |
186 | | // your own implementations for new filter types or override existing ones provided by the |
187 | | // library. Registered stream filters are used for decoding only as you can override encoding |
188 | | // with stream data providers. For example, you could use this method to add support for one of |
189 | | // the other filter types by using additional third-party libraries that qpdf does not presently |
190 | | // use. The standard filters are implemented using QPDFStreamFilter classes. |
191 | | QPDF_DLL |
192 | | static void registerStreamFilter( |
193 | | std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory); |
194 | | |
195 | | // Parameter settings |
196 | | |
197 | | // To capture or redirect output, configure the logger returned by getLogger(). By default, all |
198 | | // QPDF and QPDFJob objects share the global logger. If you need a private logger for some |
199 | | // reason, pass a new one to setLogger(). See comments in QPDFLogger.hh for details on |
200 | | // configuring the logger. |
201 | | // |
202 | | // Note that no normal QPDF operations generate output to standard output, so for applications |
203 | | // that just wish to avoid creating output for warnings and don't call any check functions, |
204 | | // calling setSuppressWarnings(true) is sufficient. |
205 | | QPDF_DLL |
206 | | std::shared_ptr<QPDFLogger> getLogger(); |
207 | | QPDF_DLL |
208 | | void setLogger(std::shared_ptr<QPDFLogger>); |
209 | | |
210 | | // This deprecated method is the old way to capture output, but it didn't capture all output. |
211 | | // See comments above for getLogger and setLogger. This will be removed in QPDF 12. For now, it |
212 | | // configures a private logger, separating this object from the default logger, and calls |
213 | | // setOutputStreams on that logger. See QPDFLogger.hh for additional details. |
214 | | [[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void |
215 | | setOutputStreams(std::ostream* out_stream, std::ostream* err_stream); |
216 | | |
217 | | // If true, ignore any cross-reference streams in a hybrid file (one that contains both |
218 | | // cross-reference streams and cross-reference tables). This can be useful for testing to |
219 | | // ensure that a hybrid file would work with an older reader. |
220 | | QPDF_DLL |
221 | | void setIgnoreXRefStreams(bool); |
222 | | |
223 | | // By default, any warnings are issued to std::cerr or the error stream specified in a call to |
224 | | // setOutputStreams as they are encountered. If this method is called with a true value, |
225 | | // reporting of warnings is suppressed. You may still retrieve warnings by calling getWarnings. |
226 | | QPDF_DLL |
227 | | void setSuppressWarnings(bool); |
228 | | |
229 | | // Set the maximum number of warnings. A QPDFExc is thrown if the limit is exceeded. |
230 | | QPDF_DLL |
231 | | void setMaxWarnings(size_t); |
232 | | |
233 | | // By default, QPDF will try to recover if it finds certain types of errors in PDF files. If |
234 | | // turned off, it will throw an exception on the first such problem it finds without attempting |
235 | | // recovery. |
236 | | QPDF_DLL |
237 | | void setAttemptRecovery(bool); |
238 | | |
239 | | // Tell other QPDF objects that streams copied from this QPDF need to be fully copied when |
240 | | // copyForeignObject is called on them. Calling setIgnoreXRefStreams(true) on a QPDF object |
241 | | // makes it possible for the object and its input source to disappear before streams copied from |
242 | | // it are written with the destination QPDF object. Confused? Ordinarily, if you are going to |
243 | | // copy objects from a source QPDF object to a destination QPDF object using copyForeignObject |
244 | | // or addPage, the source object's input source must stick around until after the destination |
245 | | // PDF is written. If you call this method on the source QPDF object, it sends a signal to the |
246 | | // destination object that it must fully copy the stream data when copyForeignObject. It will do |
247 | | // this by making a copy in RAM. Ordinarily the stream data is copied lazily to avoid |
248 | | // unnecessary duplication of the stream data. Note that the stream data is copied into RAM only |
249 | | // once regardless of how many objects the stream is copied into. The result is that, if you |
250 | | // called setImmediateCopyFrom(true) on a given QPDF object prior to copying any of its streams, |
251 | | // you do not need to keep it or its input source around after copying its objects to another |
252 | | // QPDF. This is true even if the source streams use StreamDataProvider. Note that this method |
253 | | // is called on the QPDF object you are copying FROM, not the one you are copying to. The |
254 | | // reasoning for this is that there's no reason a given QPDF may not get objects copied to it |
255 | | // from a variety of other objects, some transient and some not. Since what's relevant is |
256 | | // whether the source QPDF is transient, the method must be called on the source QPDF, not the |
257 | | // destination one. This method will make a copy of the stream in RAM, so be sure you have |
258 | | // enough memory to simultaneously hold all the streams you're copying. |
259 | | QPDF_DLL |
260 | | void setImmediateCopyFrom(bool); |
261 | | |
262 | | // Other public methods |
263 | | |
264 | | // Return the list of warnings that have been issued so far and clear the list. This method may |
265 | | // be called even if processFile throws an exception. Note that if setSuppressWarnings was not |
266 | | // called or was called with a false value, any warnings retrieved here will have already been |
267 | | // output. |
268 | | QPDF_DLL |
269 | | std::vector<QPDFExc> getWarnings(); |
270 | | |
271 | | // Indicate whether any warnings have been issued so far. Does not clear the list of warnings. |
272 | | QPDF_DLL |
273 | | bool anyWarnings() const; |
274 | | |
275 | | // Indicate the number of warnings that have been issued since the last call to getWarnings. |
276 | | // Does not clear the list of warnings. |
277 | | QPDF_DLL |
278 | | size_t numWarnings() const; |
279 | | |
280 | | // Return an application-scoped unique ID for this QPDF object. This is not a globally unique |
281 | | // ID. It is constructed using a timestamp and a random number and is intended to be unique |
282 | | // among QPDF objects that are created by a single run of an application. While it's very likely |
283 | | // that these are actually globally unique, it is not recommended to use them for long-term |
284 | | // purposes. |
285 | | QPDF_DLL |
286 | | unsigned long long getUniqueId() const; |
287 | | |
288 | | // Issue a warning on behalf of this QPDF object. It will be emitted with other warnings, |
289 | | // following warning suppression rules, and it will be available with getWarnings(). |
290 | | QPDF_DLL |
291 | | void warn(QPDFExc const& e); |
292 | | // Same as above but creates the QPDFExc object using the arguments passed to warn. The filename |
293 | | // argument to QPDFExc is omitted. This method uses the filename associated with the QPDF |
294 | | // object. |
295 | | QPDF_DLL |
296 | | void warn( |
297 | | qpdf_error_code_e error_code, |
298 | | std::string const& object, |
299 | | qpdf_offset_t offset, |
300 | | std::string const& message); |
301 | | |
302 | | // Return the filename associated with the QPDF object. |
303 | | QPDF_DLL |
304 | | std::string getFilename() const; |
305 | | // Return PDF Version and extension level together as a PDFVersion object |
306 | | QPDF_DLL |
307 | | PDFVersion getVersionAsPDFVersion(); |
308 | | // Return just the PDF version from the file |
309 | | QPDF_DLL |
310 | | std::string getPDFVersion() const; |
311 | | QPDF_DLL |
312 | | int getExtensionLevel(); |
313 | | QPDF_DLL |
314 | | QPDFObjectHandle getTrailer(); |
315 | | QPDF_DLL |
316 | | QPDFObjectHandle getRoot(); |
317 | | QPDF_DLL |
318 | | std::map<QPDFObjGen, QPDFXRefEntry> getXRefTable(); |
319 | | |
320 | | // Public factory methods |
321 | | |
322 | | // Create a new stream. A subsequent call must be made to replaceStreamData() to provide data |
323 | | // for the stream. The stream's dictionary may be retrieved by calling getDict(), and the |
324 | | // resulting dictionary may be modified. Alternatively, you can create a new dictionary and |
325 | | // call replaceDict to install it. |
326 | | QPDF_DLL |
327 | | QPDFObjectHandle newStream(); |
328 | | |
329 | | // Create a new stream. Use the given buffer as the stream data. The stream dictionary's |
330 | | // /Length key will automatically be set to the size of the data buffer. If additional keys are |
331 | | // required, the stream's dictionary may be retrieved by calling getDict(), and the resulting |
332 | | // dictionary may be modified. This method is just a convenient wrapper around the newStream() |
333 | | // and replaceStreamData(). It is a convenience methods for streams that require no parameters |
334 | | // beyond the stream length. Note that you don't have to deal with compression yourself if you |
335 | | // use QPDFWriter. By default, QPDFWriter will automatically compress uncompressed stream data. |
336 | | // Example programs are provided that illustrate this. |
337 | | QPDF_DLL |
338 | | QPDFObjectHandle newStream(std::shared_ptr<Buffer> data); |
339 | | |
340 | | // Create new stream with data from string. This method will create a copy of the data rather |
341 | | // than using the user-provided buffer as in the std::shared_ptr<Buffer> version of newStream. |
342 | | QPDF_DLL |
343 | | QPDFObjectHandle newStream(std::string const& data); |
344 | | |
345 | | // A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is |
346 | | // going to be added to the QPDF object. Normally you don't have to use this type since you can |
347 | | // just call QPDF::makeIndirectObject. However, in some cases, if you have to create objects |
348 | | // with circular references, you may need to create a reserved object so that you can have a |
349 | | // reference to it and then replace the object later. Reserved objects have the special |
350 | | // property that they can't be resolved to direct objects. This makes it possible to replace a |
351 | | // reserved object with a new object while preserving existing references to them. When you are |
352 | | // ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this |
353 | | // purpose rather than the more general QPDF::replaceObject. It is an error to try to write a |
354 | | // QPDF with QPDFWriter if it has any reserved objects in it. |
355 | | QPDF_DLL |
356 | | QPDFObjectHandle newReserved(); |
357 | | QPDF_DLL |
358 | | QPDFObjectHandle newIndirectNull(); |
359 | | |
360 | | // Install this object handle as an indirect object and return an indirect reference to it. |
361 | | QPDF_DLL |
362 | | QPDFObjectHandle makeIndirectObject(QPDFObjectHandle); |
363 | | |
364 | | // Retrieve an object by object ID and generation. Returns an indirect reference to it. The |
365 | | // getObject() methods were added for qpdf 11. |
366 | | QPDF_DLL |
367 | | QPDFObjectHandle getObject(QPDFObjGen); |
368 | | QPDF_DLL |
369 | | QPDFObjectHandle getObject(int objid, int generation); |
370 | | // These are older methods, but there is no intention to deprecate |
371 | | // them. |
372 | | QPDF_DLL |
373 | | QPDFObjectHandle getObjectByObjGen(QPDFObjGen); |
374 | | QPDF_DLL |
375 | | QPDFObjectHandle getObjectByID(int objid, int generation); |
376 | | |
377 | | // Replace the object with the given object id with the given object. The object handle passed |
378 | | // in must be a direct object, though it may contain references to other indirect objects within |
379 | | // it. Prior to qpdf 10.2.1, after calling this method, existing QPDFObjectHandle instances that |
380 | | // pointed to the original object still pointed to the original object, resulting in confusing |
381 | | // and incorrect behavior. This was fixed in 10.2.1, so existing QPDFObjectHandle objects will |
382 | | // start pointing to the newly replaced object. Note that replacing an object with |
383 | | // QPDFObjectHandle::newNull() effectively removes the object from the file since a non-existent |
384 | | // object is treated as a null object. To replace a reserved object, call replaceReserved |
385 | | // instead. |
386 | | QPDF_DLL |
387 | | void replaceObject(QPDFObjGen og, QPDFObjectHandle); |
388 | | QPDF_DLL |
389 | | void replaceObject(int objid, int generation, QPDFObjectHandle); |
390 | | |
391 | | // Swap two objects given by ID. Prior to qpdf 10.2.1, existing QPDFObjectHandle instances that |
392 | | // reference them objects not notice the swap, but this was fixed in 10.2.1. |
393 | | QPDF_DLL |
394 | | void swapObjects(QPDFObjGen og1, QPDFObjGen og2); |
395 | | QPDF_DLL |
396 | | void swapObjects(int objid1, int generation1, int objid2, int generation2); |
397 | | |
398 | | // Replace a reserved object. This is a wrapper around replaceObject but it guarantees that the |
399 | | // underlying object is a reserved object or a null object. After this call, reserved will |
400 | | // be a reference to replacement. |
401 | | QPDF_DLL |
402 | | void replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement); |
403 | | |
404 | | // Copy an object from another QPDF to this one. Starting with qpdf version 8.3.0, it is no |
405 | | // longer necessary to keep the original QPDF around after the call to copyForeignObject as long |
406 | | // as the source of any copied stream data is still available. Usually this means you just have |
407 | | // to keep the input file around, not the QPDF object. The exception to this is if you copy a |
408 | | // stream that gets its data from a QPDFObjectHandle::StreamDataProvider. In this case only, the |
409 | | // original stream's QPDF object must stick around because the QPDF object is itself the source |
410 | | // of the original stream data. For a more in-depth discussion, please see the TODO file. |
411 | | // Starting in 8.4.0, you can call setImmediateCopyFrom(true) on the SOURCE QPDF object (the one |
412 | | // you're copying FROM). If you do this prior to copying any of its objects, then neither the |
413 | | // source QPDF object nor its input source needs to stick around at all regardless of the |
414 | | // source. The cost is that the stream data is copied into RAM at the time copyForeignObject is |
415 | | // called. See setImmediateCopyFrom for more information. |
416 | | // |
417 | | // The return value of this method is an indirect reference to the copied object in this file. |
418 | | // This method is intended to be used to copy non-page objects. To copy page objects, pass the |
419 | | // foreign page object directly to addPage (or addPageAt). If you copy objects that contain |
420 | | // references to pages, you should copy the pages first using addPage(At). Otherwise references |
421 | | // to the pages that have not been copied will be replaced with nulls. It is possible to use |
422 | | // copyForeignObject on page objects if you are not going to use them as pages. Doing so copies |
423 | | // the object normally but does not update the page structure. For example, it is a valid use |
424 | | // case to use copyForeignObject for a page that you are going to turn into a form XObject, |
425 | | // though you can also use QPDFPageObjectHelper::getFormXObjectForPage for that purpose. |
426 | | // |
427 | | // When copying objects with this method, object structure will be preserved, so all indirectly |
428 | | // referenced indirect objects will be copied as well. This includes any circular references |
429 | | // that may exist. The QPDF object keeps a record of what has already been copied, so shared |
430 | | // objects will not be copied multiple times. This also means that if you mutate an object that |
431 | | // has already been copied and try to copy it again, it won't work since the modified object |
432 | | // will not be recopied. Therefore, you should do all mutation on the original file that you |
433 | | // are going to do before you start copying its objects to a new file. |
434 | | QPDF_DLL |
435 | | QPDFObjectHandle copyForeignObject(QPDFObjectHandle foreign); |
436 | | |
437 | | // Encryption support |
438 | | |
439 | | enum encryption_method_e { e_none, e_unknown, e_rc4, e_aes, e_aesv3 }; |
440 | | |
441 | | // To be removed from the public API in qpdf 13. See |
442 | | // <https:manual.qpdf.org/release-notes.html#r12-3-0-deprecate>. |
443 | | class EncryptionData |
444 | | { |
445 | | public: |
446 | | // This class holds data read from the encryption dictionary. |
447 | | EncryptionData( |
448 | | int V, |
449 | | int R, |
450 | | int Length_bytes, |
451 | | int P, |
452 | | std::string const& O, |
453 | | std::string const& U, |
454 | | std::string const& OE, |
455 | | std::string const& UE, |
456 | | std::string const& Perms, |
457 | | std::string const& id1, |
458 | | bool encrypt_metadata) : |
459 | | V(V), |
460 | | R(R), |
461 | | Length_bytes(Length_bytes), |
462 | | P(P), |
463 | | O(O), |
464 | | U(U), |
465 | | OE(OE), |
466 | | UE(UE), |
467 | | Perms(Perms), |
468 | | id1(id1), |
469 | | encrypt_metadata(encrypt_metadata) |
470 | 0 | { |
471 | 0 | } |
472 | | |
473 | | int getV() const; |
474 | | int getR() const; |
475 | | int getLengthBytes() const; |
476 | | int getP() const; |
477 | | std::string const& getO() const; |
478 | | std::string const& getU() const; |
479 | | std::string const& getOE() const; |
480 | | std::string const& getUE() const; |
481 | | std::string const& getPerms() const; |
482 | | std::string const& getId1() const; |
483 | | bool getEncryptMetadata() const; |
484 | | |
485 | | void setO(std::string const&); |
486 | | void setU(std::string const&); |
487 | | void setV5EncryptionParameters( |
488 | | std::string const& O, |
489 | | std::string const& OE, |
490 | | std::string const& U, |
491 | | std::string const& UE, |
492 | | std::string const& Perms); |
493 | | |
494 | | private: |
495 | | EncryptionData(EncryptionData const&) = delete; |
496 | | EncryptionData& operator=(EncryptionData const&) = delete; |
497 | | |
498 | | int V; |
499 | | int R; |
500 | | int Length_bytes; |
501 | | int P; |
502 | | std::string O; |
503 | | std::string U; |
504 | | std::string OE; |
505 | | std::string UE; |
506 | | std::string Perms; |
507 | | std::string id1; |
508 | | bool encrypt_metadata; |
509 | | }; |
510 | | QPDF_DLL |
511 | | bool isEncrypted() const; |
512 | | |
513 | | QPDF_DLL |
514 | | bool isEncrypted(int& R, int& P); |
515 | | |
516 | | QPDF_DLL |
517 | | bool isEncrypted( |
518 | | int& R, |
519 | | int& P, |
520 | | int& V, |
521 | | encryption_method_e& stream_method, |
522 | | encryption_method_e& string_method, |
523 | | encryption_method_e& file_method); |
524 | | |
525 | | QPDF_DLL |
526 | | bool ownerPasswordMatched() const; |
527 | | |
528 | | QPDF_DLL |
529 | | bool userPasswordMatched() const; |
530 | | |
531 | | // Encryption permissions -- not enforced by QPDF |
532 | | QPDF_DLL |
533 | | bool allowAccessibility(); |
534 | | QPDF_DLL |
535 | | bool allowExtractAll(); |
536 | | QPDF_DLL |
537 | | bool allowPrintLowRes(); |
538 | | QPDF_DLL |
539 | | bool allowPrintHighRes(); |
540 | | QPDF_DLL |
541 | | bool allowModifyAssembly(); |
542 | | QPDF_DLL |
543 | | bool allowModifyForm(); |
544 | | QPDF_DLL |
545 | | bool allowModifyAnnotation(); |
546 | | QPDF_DLL |
547 | | bool allowModifyOther(); |
548 | | QPDF_DLL |
549 | | bool allowModifyAll(); |
550 | | |
551 | | // Helper function to trim padding from user password. Calling trim_user_password on the result |
552 | | // of getPaddedUserPassword gives getTrimmedUserPassword's result. |
553 | | QPDF_DLL |
554 | | static void trim_user_password(std::string& user_password); |
555 | | QPDF_DLL |
556 | | static std::string compute_data_key( |
557 | | std::string const& encryption_key, |
558 | | int objid, |
559 | | int generation, |
560 | | bool use_aes, |
561 | | int encryption_V, |
562 | | int encryption_R); |
563 | | |
564 | | // To be removed in qpdf 13. See <https:manual.qpdf.org/release-notes.html#r12-3-0-deprecate>. |
565 | | [[deprecated("to be removed in qpdf 13")]] |
566 | | QPDF_DLL static std::string |
567 | | compute_encryption_key(std::string const& password, EncryptionData const& data); |
568 | | |
569 | | QPDF_DLL |
570 | | static void compute_encryption_O_U( |
571 | | char const* user_password, |
572 | | char const* owner_password, |
573 | | int V, |
574 | | int R, |
575 | | int key_len, |
576 | | int P, |
577 | | bool encrypt_metadata, |
578 | | std::string const& id1, |
579 | | std::string& O, |
580 | | std::string& U); |
581 | | QPDF_DLL |
582 | | static void compute_encryption_parameters_V5( |
583 | | char const* user_password, |
584 | | char const* owner_password, |
585 | | int V, |
586 | | int R, |
587 | | int key_len, |
588 | | int P, |
589 | | bool encrypt_metadata, |
590 | | std::string const& id1, |
591 | | std::string& encryption_key, |
592 | | std::string& O, |
593 | | std::string& U, |
594 | | std::string& OE, |
595 | | std::string& UE, |
596 | | std::string& Perms); |
597 | | // Return the full user password as stored in the PDF file. For files encrypted with 40-bit or |
598 | | // 128-bit keys, the user password can be recovered when the file is opened using the owner |
599 | | // password. This is not possible with newer encryption formats. If you are attempting to |
600 | | // recover the user password in a user-presentable form, call getTrimmedUserPassword() instead. |
601 | | QPDF_DLL |
602 | | std::string const& getPaddedUserPassword() const; |
603 | | // Return human-readable form of user password subject to same limitations as |
604 | | // getPaddedUserPassword(). |
605 | | QPDF_DLL |
606 | | std::string getTrimmedUserPassword() const; |
607 | | // Return the previously computed or retrieved encryption key for this file |
608 | | QPDF_DLL |
609 | | std::string getEncryptionKey() const; |
610 | | // Remove security restrictions associated with digitally signed files. From qpdf 11.7.0, this |
611 | | // is called by QPDFAcroFormDocumentHelper::disableDigitalSignatures and is more useful when |
612 | | // called from there than when just called by itself. |
613 | | QPDF_DLL |
614 | | void removeSecurityRestrictions(); |
615 | | |
616 | | // Linearization support |
617 | | |
618 | | // Returns true iff the file starts with a linearization parameter dictionary. Does no |
619 | | // additional validation. |
620 | | QPDF_DLL |
621 | | bool isLinearized(); |
622 | | |
623 | | // Performs various sanity checks on a linearized file. Return true if no errors or warnings. |
624 | | // Otherwise, return false and output errors and warnings to the default output stream |
625 | | // (std::cout or whatever is configured in the logger). It is recommended for linearization |
626 | | // errors to be treated as warnings. |
627 | | QPDF_DLL |
628 | | bool checkLinearization(); |
629 | | |
630 | | // Calls checkLinearization() and, if possible, prints normalized contents of some of the hints |
631 | | // tables to the default output stream. Normalization includes adding min values to delta values |
632 | | // and adjusting offsets based on the location and size of the primary hint stream. |
633 | | QPDF_DLL |
634 | | void showLinearizationData(); |
635 | | |
636 | | // Shows the contents of the cross-reference table |
637 | | QPDF_DLL |
638 | | void showXRefTable(); |
639 | | |
640 | | // Starting from qpdf 11.0 user code should not need to call this method. Before 11.0 this |
641 | | // method was used to detect all indirect references to objects that don't exist and resolve |
642 | | // them by replacing them with null, which is how the PDF spec says to interpret such dangling |
643 | | // references. This method is called automatically when you try to add any new objects, if you |
644 | | // call getAllObjects, and before a file is written. The qpdf object caches whether it has run |
645 | | // this to avoid running it multiple times. Before 11.2.1 you could pass true to force it to run |
646 | | // again if you had explicitly added new objects that may have additional dangling references. |
647 | | QPDF_DLL |
648 | | void fixDanglingReferences(bool force = false); |
649 | | |
650 | | // Return the approximate number of indirect objects. It is/ approximate because not all objects |
651 | | // in the file are preserved in all cases, and gaps in object numbering are not preserved. |
652 | | QPDF_DLL |
653 | | size_t getObjectCount(); |
654 | | |
655 | | // Returns a list of indirect objects for every object in the xref table. Useful for discovering |
656 | | // objects that are not otherwise referenced. |
657 | | QPDF_DLL |
658 | | std::vector<QPDFObjectHandle> getAllObjects(); |
659 | | |
660 | | // Optimization support -- see doc/optimization. Implemented in QPDF_optimization.cc |
661 | | |
662 | | // The object_stream_data map maps from a "compressed" object to the object stream that contains |
663 | | // it. This enables optimize to populate the object <-> user maps with only uncompressed |
664 | | // objects. If allow_changes is false, an exception will be thrown if any changes are made |
665 | | // during the optimization process. This is available so that the test suite can make sure that |
666 | | // a linearized file is already optimized. When called in this way, optimize() still populates |
667 | | // the object <-> user maps. The optional skip_stream_parameters parameter, if present, is |
668 | | // called for each stream object. The function should return 2 if optimization should discard |
669 | | // /Length, /Filter, and /DecodeParms; 1 if it should discard /Length, and 0 if it should |
670 | | // preserve all keys. This is used by QPDFWriter to avoid creation of dangling objects for |
671 | | // stream dictionary keys it will be regenerating. |
672 | | [[deprecated("Unused - see release notes for qpdf 12.1.0")]] QPDF_DLL void optimize( |
673 | | std::map<int, int> const& object_stream_data, |
674 | | bool allow_changes = true, |
675 | | std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr); |
676 | | |
677 | | // Traverse page tree return all /Page objects. It also detects and resolves cases in which the |
678 | | // same /Page object is duplicated. For efficiency, this method returns a const reference to an |
679 | | // internal vector of pages. Calls to addPage, addPageAt, and removePage safely update this, but |
680 | | // direct manipulation of the pages tree or pushing inheritable objects to the page level may |
681 | | // invalidate it. See comments for updateAllPagesCache() for additional notes. Newer code should |
682 | | // use QPDFPageDocumentHelper::getAllPages instead. The decision to expose this internal cache |
683 | | // was arguably incorrect, but it is being left here for compatibility. It is, however, |
684 | | // completely safe to use this for files that you are not modifying. |
685 | | QPDF_DLL |
686 | | std::vector<QPDFObjectHandle> const& getAllPages(); |
687 | | |
688 | | QPDF_DLL |
689 | | bool everCalledGetAllPages() const; |
690 | | QPDF_DLL |
691 | | bool everPushedInheritedAttributesToPages() const; |
692 | | |
693 | | // These methods, given a page object or its object/generation number, returns the 0-based index |
694 | | // into the array returned by getAllPages() for that page. An exception is thrown if the page is |
695 | | // not found. |
696 | | QPDF_DLL |
697 | | int findPage(QPDFObjGen og); |
698 | | QPDF_DLL |
699 | | int findPage(QPDFObjectHandle& page); |
700 | | |
701 | | // This method synchronizes QPDF's cache of the page structure with the actual /Pages tree. If |
702 | | // you restrict changes to the /Pages tree, including addition, removal, or replacement of pages |
703 | | // or changes to any /Pages objects, to calls to these page handling APIs, you never need to |
704 | | // call this method. If you modify /Pages structures directly, you must call this method |
705 | | // afterwards. This method updates the internal list of pages, so after calling this method, |
706 | | // any previous references returned by getAllPages() will be valid again. It also resets any |
707 | | // state about having pushed inherited attributes in /Pages objects down to the pages, so if you |
708 | | // add any inheritable attributes to a /Pages object, you should also call this method. |
709 | | QPDF_DLL |
710 | | void updateAllPagesCache(); |
711 | | |
712 | | // Legacy handling API. These methods are not going anywhere, and you should feel free to |
713 | | // continue using them if it simplifies your code. Newer code should make use of |
714 | | // QPDFPageDocumentHelper instead as future page handling methods will be added there. The |
715 | | // functionality and specification of these legacy methods is identical to the identically named |
716 | | // methods there, except that these versions use QPDFObjectHandle instead of |
717 | | // QPDFPageObjectHelper, so please see comments in that file for descriptions. There are |
718 | | // subtleties you need to know about, so please look at the comments there. |
719 | | QPDF_DLL |
720 | | void pushInheritedAttributesToPage(); |
721 | | QPDF_DLL |
722 | | void addPage(QPDFObjectHandle newpage, bool first); |
723 | | QPDF_DLL |
724 | | void addPageAt(QPDFObjectHandle newpage, bool before, QPDFObjectHandle refpage); |
725 | | QPDF_DLL |
726 | | void removePage(QPDFObjectHandle page); |
727 | | // End legacy page helpers |
728 | | |
729 | | // End of the public API. The following classes and methods are for qpdf internal use only. |
730 | | |
731 | | class Doc; |
732 | | |
733 | | inline Doc& doc(); |
734 | | |
735 | | // For testing only -- do not add to DLL |
736 | | static bool test_json_validators(); |
737 | | |
738 | | private: |
739 | | // It has never been safe to copy QPDF objects as there is code in the library that assumes |
740 | | // there are no copies of a QPDF object. Copying QPDF objects was not prevented by the API until |
741 | | // qpdf 11. If you have been copying QPDF objects, use std::shared_ptr<QPDF> instead. From qpdf |
742 | | // 11, you can use QPDF::create to create them. |
743 | | QPDF(QPDF const&) = delete; |
744 | | QPDF& operator=(QPDF const&) = delete; |
745 | | |
746 | | static std::string const qpdf_version; |
747 | | |
748 | | class ObjCache; |
749 | | class EncryptionParameters; |
750 | | class StringDecrypter; |
751 | | class ResolveRecorder; |
752 | | class JSONReactor; |
753 | | |
754 | | void removeObject(QPDFObjGen og); |
755 | | |
756 | | // Calls finish() on the pipeline when done but does not delete it |
757 | | bool pipeStreamData( |
758 | | QPDFObjGen og, |
759 | | qpdf_offset_t offset, |
760 | | size_t length, |
761 | | QPDFObjectHandle dict, |
762 | | bool is_root_metadata, |
763 | | Pipeline* pipeline, |
764 | | bool suppress_warnings, |
765 | | bool will_retry); |
766 | | static bool pipeStreamData( |
767 | | std::shared_ptr<QPDF::EncryptionParameters> encp, |
768 | | std::shared_ptr<InputSource> file, |
769 | | QPDF& qpdf_for_warning, |
770 | | QPDFObjGen og, |
771 | | qpdf_offset_t offset, |
772 | | size_t length, |
773 | | QPDFObjectHandle dict, |
774 | | bool is_root_metadata, |
775 | | Pipeline* pipeline, |
776 | | bool suppress_warnings, |
777 | | bool will_retry); |
778 | | |
779 | | // methods to support encryption -- implemented in QPDF_encryption.cc |
780 | | void initializeEncryption(); |
781 | | static std::string |
782 | | getKeyForObject(std::shared_ptr<EncryptionParameters> encp, QPDFObjGen og, bool use_aes); |
783 | | void decryptString(std::string&, QPDFObjGen og); |
784 | | static void decryptStream( |
785 | | std::shared_ptr<EncryptionParameters> encp, |
786 | | std::shared_ptr<InputSource> file, |
787 | | QPDF& qpdf_for_warning, |
788 | | Pipeline*& pipeline, |
789 | | QPDFObjGen og, |
790 | | QPDFObjectHandle& stream_dict, |
791 | | bool is_root_metadata, |
792 | | std::unique_ptr<Pipeline>& heap); |
793 | | |
794 | | // JSON import |
795 | | void importJSON(std::shared_ptr<InputSource>, bool must_be_complete); |
796 | | |
797 | | class Members; |
798 | | |
799 | | // Keep all member variables inside the Members object, which we dynamically allocate. This |
800 | | // makes it possible to add new private members without breaking binary compatibility. |
801 | | std::unique_ptr<Members> m; |
802 | | }; |
803 | | |
804 | | #endif // QPDF_HH |