/src/qpdf/include/qpdf/QPDF.hh
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2005-2021 Jay Berkenbilt |
2 | | // Copyright (c) 2022-2025 Jay Berkenbilt and Manfred Holger |
3 | | // |
4 | | // This file is part of qpdf. |
5 | | // |
6 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
7 | | // in compliance with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
12 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
13 | | // or implied. See the License for the specific language governing permissions and limitations under |
14 | | // the License. |
15 | | // |
16 | | // Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic |
17 | | // License. At your option, you may continue to consider qpdf to be licensed under those terms. |
18 | | // Please see the manual for additional information. |
19 | | |
20 | | #ifndef QPDF_HH |
21 | | #define QPDF_HH |
22 | | |
23 | | #include <qpdf/DLL.h> |
24 | | #include <qpdf/Types.h> |
25 | | |
26 | | #include <bitset> |
27 | | #include <cstdio> |
28 | | #include <functional> |
29 | | #include <iostream> |
30 | | #include <list> |
31 | | #include <map> |
32 | | #include <memory> |
33 | | #include <string> |
34 | | #include <string_view> |
35 | | #include <vector> |
36 | | |
37 | | #include <qpdf/Buffer.hh> |
38 | | #include <qpdf/InputSource.hh> |
39 | | #include <qpdf/PDFVersion.hh> |
40 | | #include <qpdf/QIntC.hh> |
41 | | #include <qpdf/QPDFExc.hh> |
42 | | #include <qpdf/QPDFObjGen.hh> |
43 | | #include <qpdf/QPDFObjectHandle.hh> |
44 | | #include <qpdf/QPDFStreamFilter.hh> |
45 | | #include <qpdf/QPDFTokenizer.hh> |
46 | | #include <qpdf/QPDFWriter.hh> |
47 | | #include <qpdf/QPDFXRefEntry.hh> |
48 | | |
49 | | namespace qpdf::is |
50 | | { |
51 | | class OffsetBuffer; |
52 | | } |
53 | | |
54 | | class QPDF_Stream; |
55 | | class BitStream; |
56 | | class BitWriter; |
57 | | class BufferInputSource; |
58 | | class QPDFLogger; |
59 | | class QPDFParser; |
60 | | |
61 | | class QPDF |
62 | | { |
63 | | public: |
64 | | // Get the current version of the QPDF software. See also qpdf/DLL.h |
65 | | QPDF_DLL |
66 | | static std::string const& QPDFVersion(); |
67 | | |
68 | | QPDF_DLL |
69 | | QPDF(); |
70 | | QPDF_DLL |
71 | | ~QPDF(); |
72 | | |
73 | | QPDF_DLL |
74 | | static std::shared_ptr<QPDF> create(); |
75 | | |
76 | | // Associate a file with a QPDF object and do initial parsing of the file. PDF objects are not |
77 | | // read until they are needed. A QPDF object may be associated with only one file in its |
78 | | // lifetime. This method must be called before any methods that potentially ask for information |
79 | | // about the PDF file are called. Prior to calling this, the only methods that are allowed are |
80 | | // those that set parameters. If the input file is not encrypted, either a null password or an |
81 | | // empty password can be used. If the file is encrypted, either the user password or the owner |
82 | | // password may be supplied. The method setPasswordIsHexKey may be called prior to calling this |
83 | | // method or any of the other process methods to force the password to be interpreted as a raw |
84 | | // encryption key. See comments on setPasswordIsHexKey for more information. |
85 | | QPDF_DLL |
86 | | void processFile(char const* filename, char const* password = nullptr); |
87 | | |
88 | | // Parse a PDF from a stdio FILE*. The FILE must be open in binary mode and must be seekable. |
89 | | // It may be open read only. This works exactly like processFile except that the PDF file is |
90 | | // read from an already opened FILE*. If close_file is true, the file will be closed at the |
91 | | // end. Otherwise, the caller is responsible for closing the file. |
92 | | QPDF_DLL |
93 | | void processFile( |
94 | | char const* description, FILE* file, bool close_file, char const* password = nullptr); |
95 | | |
96 | | // Parse a PDF file loaded into a memory buffer. This works exactly like processFile except |
97 | | // that the PDF file is in memory instead of on disk. The description appears in any warning or |
98 | | // error message in place of the file name. The buffer is owned by the caller and must remain |
99 | | // valid for the lifetime of the QPDF object. |
100 | | QPDF_DLL |
101 | | void processMemoryFile( |
102 | | char const* description, char const* buf, size_t length, char const* password = nullptr); |
103 | | |
104 | | // Parse a PDF file loaded from a custom InputSource. If you have your own method of retrieving |
105 | | // a PDF file, you can subclass InputSource and use this method. |
106 | | QPDF_DLL |
107 | | void processInputSource(std::shared_ptr<InputSource>, char const* password = nullptr); |
108 | | |
109 | | // Create a PDF from an input source that contains JSON as written by writeJSON (or qpdf |
110 | | // --json-output, version 2 or higher). The JSON must be a complete representation of a PDF. See |
111 | | // "qpdf JSON" in the manual for details. The input JSON may be arbitrarily large. QPDF does not |
112 | | // load stream data into memory for more than one stream at a time, even if the stream data is |
113 | | // specified inline. |
114 | | QPDF_DLL |
115 | | void createFromJSON(std::string const& json_file); |
116 | | QPDF_DLL |
117 | | void createFromJSON(std::shared_ptr<InputSource>); |
118 | | |
119 | | // Update a PDF from an input source that contains JSON in the same format as is written by |
120 | | // writeJSON (or qpdf --json-output, version 2 or higher). Objects in the PDF and not in the |
121 | | // JSON are not modified. See "qpdf JSON" in the manual for details. As with createFromJSON, the |
122 | | // input JSON may be arbitrarily large. |
123 | | QPDF_DLL |
124 | | void updateFromJSON(std::string const& json_file); |
125 | | QPDF_DLL |
126 | | void updateFromJSON(std::shared_ptr<InputSource>); |
127 | | |
128 | | // Write qpdf JSON format to the pipeline "p". The only supported version is 2. The finish() |
129 | | // method is not called on the pipeline. |
130 | | // |
131 | | // The decode_level parameter controls which streams are uncompressed in the JSON. Use |
132 | | // qpdf_dl_none to preserve all stream data exactly as it appears in the input. The possible |
133 | | // values for json_stream_data can be found in qpdf/Constants.h and correspond to the |
134 | | // --json-stream-data command-line argument. If json_stream_data is qpdf_sj_file, file_prefix |
135 | | // must be specified. Each stream will be written to a file whose path is constructed by |
136 | | // appending "-nnn" to file_prefix, where "nnn" is the object number (not zero-filled). If |
137 | | // wanted_objects is empty, write all objects. Otherwise, write only objects whose keys are in |
138 | | // wanted_objects. Keys may be either "trailer" or of the form "obj:n n R". Invalid keys are |
139 | | // ignored. This corresponds to the --json-object command-line argument. |
140 | | // |
141 | | // QPDF is efficient with regard to memory when writing, allowing you to write arbitrarily large |
142 | | // PDF files to a pipeline. You can use a pipeline like Pl_Buffer or Pl_String to capture the |
143 | | // JSON output in memory, but do so with caution as this will allocate enough memory to hold the |
144 | | // entire PDF file. |
145 | | QPDF_DLL |
146 | | void writeJSON( |
147 | | int version, |
148 | | Pipeline* p, |
149 | | qpdf_stream_decode_level_e decode_level, |
150 | | qpdf_json_stream_data_e json_stream_data, |
151 | | std::string const& file_prefix, |
152 | | std::set<std::string> wanted_objects); |
153 | | |
154 | | // This version of writeJSON enables writing only the "qpdf" key of an in-progress dictionary. |
155 | | // If the value of "complete" is true, a complete JSON object containing only the "qpdf" key is |
156 | | // written to the pipeline. If the value of "complete" is false, the "qpdf" key and its value |
157 | | // are written to the pipeline assuming that a dictionary is already open. The parameter |
158 | | // first_key indicates whether this is the first key in an in-progress dictionary. It will be |
159 | | // set to false by writeJSON. The "qpdf" key and value are written as if at depth 1 in a |
160 | | // prettified JSON output. Remaining arguments are the same as the above version. |
161 | | QPDF_DLL |
162 | | void writeJSON( |
163 | | int version, |
164 | | Pipeline* p, |
165 | | bool complete, |
166 | | bool& first_key, |
167 | | qpdf_stream_decode_level_e decode_level, |
168 | | qpdf_json_stream_data_e json_stream_data, |
169 | | std::string const& file_prefix, |
170 | | std::set<std::string> wanted_objects); |
171 | | |
172 | | // Close or otherwise release the input source. Once this has been called, no other methods of |
173 | | // qpdf can be called safely except for getWarnings and anyWarnings(). After this has been |
174 | | // called, it is safe to perform operations on the input file such as deleting or renaming it. |
175 | | QPDF_DLL |
176 | | void closeInputSource(); |
177 | | |
178 | | // For certain forensic or investigatory purposes, it may sometimes be useful to specify the |
179 | | // encryption key directly, even though regular PDF applications do not provide a way to do |
180 | | // this. Calling setPasswordIsHexKey(true) before calling any of the process methods will bypass |
181 | | // the normal encryption key computation or recovery mechanisms and interpret the bytes in the |
182 | | // password as a hex-encoded encryption key. Note that we hex-encode the key because it may |
183 | | // contain null bytes and therefore can't be represented in a char const*. |
184 | | QPDF_DLL |
185 | | void setPasswordIsHexKey(bool); |
186 | | |
187 | | // Create a QPDF object for an empty PDF. This PDF has no pages or objects other than a minimal |
188 | | // trailer, a document catalog, and a /Pages tree containing zero pages. Pages and other |
189 | | // objects can be added to the file in the normal way, and the trailer and document catalog can |
190 | | // be mutated. Calling this method is equivalent to calling processFile on an equivalent PDF |
191 | | // file. See the pdf-create.cc example for a demonstration of how to use this method to create |
192 | | // a PDF file from scratch. |
193 | | QPDF_DLL |
194 | | void emptyPDF(); |
195 | | |
196 | | // From 10.1: register a new filter implementation for a specific stream filter. You can add |
197 | | // your own implementations for new filter types or override existing ones provided by the |
198 | | // library. Registered stream filters are used for decoding only as you can override encoding |
199 | | // with stream data providers. For example, you could use this method to add support for one of |
200 | | // the other filter types by using additional third-party libraries that qpdf does not presently |
201 | | // use. The standard filters are implemented using QPDFStreamFilter classes. |
202 | | QPDF_DLL |
203 | | static void registerStreamFilter( |
204 | | std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory); |
205 | | |
206 | | // Parameter settings |
207 | | |
208 | | // To capture or redirect output, configure the logger returned by getLogger(). By default, all |
209 | | // QPDF and QPDFJob objects share the global logger. If you need a private logger for some |
210 | | // reason, pass a new one to setLogger(). See comments in QPDFLogger.hh for details on |
211 | | // configuring the logger. |
212 | | // |
213 | | // Note that no normal QPDF operations generate output to standard output, so for applications |
214 | | // that just wish to avoid creating output for warnings and don't call any check functions, |
215 | | // calling setSuppressWarnings(true) is sufficient. |
216 | | QPDF_DLL |
217 | | std::shared_ptr<QPDFLogger> getLogger(); |
218 | | QPDF_DLL |
219 | | void setLogger(std::shared_ptr<QPDFLogger>); |
220 | | |
221 | | // This deprecated method is the old way to capture output, but it didn't capture all output. |
222 | | // See comments above for getLogger and setLogger. This will be removed in QPDF 12. For now, it |
223 | | // configures a private logger, separating this object from the default logger, and calls |
224 | | // setOutputStreams on that logger. See QPDFLogger.hh for additional details. |
225 | | [[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void |
226 | | setOutputStreams(std::ostream* out_stream, std::ostream* err_stream); |
227 | | |
228 | | // If true, ignore any cross-reference streams in a hybrid file (one that contains both |
229 | | // cross-reference streams and cross-reference tables). This can be useful for testing to |
230 | | // ensure that a hybrid file would work with an older reader. |
231 | | QPDF_DLL |
232 | | void setIgnoreXRefStreams(bool); |
233 | | |
234 | | // By default, any warnings are issued to std::cerr or the error stream specified in a call to |
235 | | // setOutputStreams as they are encountered. If this method is called with a true value, |
236 | | // reporting of warnings is suppressed. You may still retrieve warnings by calling getWarnings. |
237 | | QPDF_DLL |
238 | | void setSuppressWarnings(bool); |
239 | | |
240 | | // Set the maximum number of warnings. A QPDFExc is thrown if the limit is exceeded. |
241 | | QPDF_DLL |
242 | | void setMaxWarnings(size_t); |
243 | | |
244 | | // By default, QPDF will try to recover if it finds certain types of errors in PDF files. If |
245 | | // turned off, it will throw an exception on the first such problem it finds without attempting |
246 | | // recovery. |
247 | | QPDF_DLL |
248 | | void setAttemptRecovery(bool); |
249 | | |
250 | | // Tell other QPDF objects that streams copied from this QPDF need to be fully copied when |
251 | | // copyForeignObject is called on them. Calling setIgnoreXRefStreams(true) on a QPDF object |
252 | | // makes it possible for the object and its input source to disappear before streams copied from |
253 | | // it are written with the destination QPDF object. Confused? Ordinarily, if you are going to |
254 | | // copy objects from a source QPDF object to a destination QPDF object using copyForeignObject |
255 | | // or addPage, the source object's input source must stick around until after the destination |
256 | | // PDF is written. If you call this method on the source QPDF object, it sends a signal to the |
257 | | // destination object that it must fully copy the stream data when copyForeignObject. It will do |
258 | | // this by making a copy in RAM. Ordinarily the stream data is copied lazily to avoid |
259 | | // unnecessary duplication of the stream data. Note that the stream data is copied into RAM only |
260 | | // once regardless of how many objects the stream is copied into. The result is that, if you |
261 | | // called setImmediateCopyFrom(true) on a given QPDF object prior to copying any of its streams, |
262 | | // you do not need to keep it or its input source around after copying its objects to another |
263 | | // QPDF. This is true even if the source streams use StreamDataProvider. Note that this method |
264 | | // is called on the QPDF object you are copying FROM, not the one you are copying to. The |
265 | | // reasoning for this is that there's no reason a given QPDF may not get objects copied to it |
266 | | // from a variety of other objects, some transient and some not. Since what's relevant is |
267 | | // whether the source QPDF is transient, the method must be called on the source QPDF, not the |
268 | | // destination one. This method will make a copy of the stream in RAM, so be sure you have |
269 | | // enough memory to simultaneously hold all the streams you're copying. |
270 | | QPDF_DLL |
271 | | void setImmediateCopyFrom(bool); |
272 | | |
273 | | // Other public methods |
274 | | |
275 | | // Return the list of warnings that have been issued so far and clear the list. This method may |
276 | | // be called even if processFile throws an exception. Note that if setSuppressWarnings was not |
277 | | // called or was called with a false value, any warnings retrieved here will have already been |
278 | | // output. |
279 | | QPDF_DLL |
280 | | std::vector<QPDFExc> getWarnings(); |
281 | | |
282 | | // Indicate whether any warnings have been issued so far. Does not clear the list of warnings. |
283 | | QPDF_DLL |
284 | | bool anyWarnings() const; |
285 | | |
286 | | // Indicate the number of warnings that have been issued since the last call to getWarnings. |
287 | | // Does not clear the list of warnings. |
288 | | QPDF_DLL |
289 | | size_t numWarnings() const; |
290 | | |
291 | | // Return an application-scoped unique ID for this QPDF object. This is not a globally unique |
292 | | // ID. It is constructed using a timestamp and a random number and is intended to be unique |
293 | | // among QPDF objects that are created by a single run of an application. While it's very likely |
294 | | // that these are actually globally unique, it is not recommended to use them for long-term |
295 | | // purposes. |
296 | | QPDF_DLL |
297 | | unsigned long long getUniqueId() const; |
298 | | |
299 | | // Issue a warning on behalf of this QPDF object. It will be emitted with other warnings, |
300 | | // following warning suppression rules, and it will be available with getWarnings(). |
301 | | QPDF_DLL |
302 | | void warn(QPDFExc const& e); |
303 | | // Same as above but creates the QPDFExc object using the arguments passed to warn. The filename |
304 | | // argument to QPDFExc is omitted. This method uses the filename associated with the QPDF |
305 | | // object. |
306 | | QPDF_DLL |
307 | | void warn( |
308 | | qpdf_error_code_e error_code, |
309 | | std::string const& object, |
310 | | qpdf_offset_t offset, |
311 | | std::string const& message); |
312 | | |
313 | | // Return the filename associated with the QPDF object. |
314 | | QPDF_DLL |
315 | | std::string getFilename() const; |
316 | | // Return PDF Version and extension level together as a PDFVersion object |
317 | | QPDF_DLL |
318 | | PDFVersion getVersionAsPDFVersion(); |
319 | | // Return just the PDF version from the file |
320 | | QPDF_DLL |
321 | | std::string getPDFVersion() const; |
322 | | QPDF_DLL |
323 | | int getExtensionLevel(); |
324 | | QPDF_DLL |
325 | | QPDFObjectHandle getTrailer(); |
326 | | QPDF_DLL |
327 | | QPDFObjectHandle getRoot(); |
328 | | QPDF_DLL |
329 | | std::map<QPDFObjGen, QPDFXRefEntry> getXRefTable(); |
330 | | |
331 | | // Public factory methods |
332 | | |
333 | | // Create a new stream. A subsequent call must be made to replaceStreamData() to provide data |
334 | | // for the stream. The stream's dictionary may be retrieved by calling getDict(), and the |
335 | | // resulting dictionary may be modified. Alternatively, you can create a new dictionary and |
336 | | // call replaceDict to install it. |
337 | | QPDF_DLL |
338 | | QPDFObjectHandle newStream(); |
339 | | |
340 | | // Create a new stream. Use the given buffer as the stream data. The stream dictionary's |
341 | | // /Length key will automatically be set to the size of the data buffer. If additional keys are |
342 | | // required, the stream's dictionary may be retrieved by calling getDict(), and the resulting |
343 | | // dictionary may be modified. This method is just a convenient wrapper around the newStream() |
344 | | // and replaceStreamData(). It is a convenience methods for streams that require no parameters |
345 | | // beyond the stream length. Note that you don't have to deal with compression yourself if you |
346 | | // use QPDFWriter. By default, QPDFWriter will automatically compress uncompressed stream data. |
347 | | // Example programs are provided that illustrate this. |
348 | | QPDF_DLL |
349 | | QPDFObjectHandle newStream(std::shared_ptr<Buffer> data); |
350 | | |
351 | | // Create new stream with data from string. This method will create a copy of the data rather |
352 | | // than using the user-provided buffer as in the std::shared_ptr<Buffer> version of newStream. |
353 | | QPDF_DLL |
354 | | QPDFObjectHandle newStream(std::string const& data); |
355 | | |
356 | | // A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is |
357 | | // going to be added to the QPDF object. Normally you don't have to use this type since you can |
358 | | // just call QPDF::makeIndirectObject. However, in some cases, if you have to create objects |
359 | | // with circular references, you may need to create a reserved object so that you can have a |
360 | | // reference to it and then replace the object later. Reserved objects have the special |
361 | | // property that they can't be resolved to direct objects. This makes it possible to replace a |
362 | | // reserved object with a new object while preserving existing references to them. When you are |
363 | | // ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this |
364 | | // purpose rather than the more general QPDF::replaceObject. It is an error to try to write a |
365 | | // QPDF with QPDFWriter if it has any reserved objects in it. |
366 | | QPDF_DLL |
367 | | QPDFObjectHandle newReserved(); |
368 | | QPDF_DLL |
369 | | QPDFObjectHandle newIndirectNull(); |
370 | | |
371 | | // Install this object handle as an indirect object and return an indirect reference to it. |
372 | | QPDF_DLL |
373 | | QPDFObjectHandle makeIndirectObject(QPDFObjectHandle); |
374 | | |
375 | | // Retrieve an object by object ID and generation. Returns an indirect reference to it. The |
376 | | // getObject() methods were added for qpdf 11. |
377 | | QPDF_DLL |
378 | | QPDFObjectHandle getObject(QPDFObjGen); |
379 | | QPDF_DLL |
380 | | QPDFObjectHandle getObject(int objid, int generation); |
381 | | // These are older methods, but there is no intention to deprecate |
382 | | // them. |
383 | | QPDF_DLL |
384 | | QPDFObjectHandle getObjectByObjGen(QPDFObjGen); |
385 | | QPDF_DLL |
386 | | QPDFObjectHandle getObjectByID(int objid, int generation); |
387 | | |
388 | | // Replace the object with the given object id with the given object. The object handle passed |
389 | | // in must be a direct object, though it may contain references to other indirect objects within |
390 | | // it. Prior to qpdf 10.2.1, after calling this method, existing QPDFObjectHandle instances that |
391 | | // pointed to the original object still pointed to the original object, resulting in confusing |
392 | | // and incorrect behavior. This was fixed in 10.2.1, so existing QPDFObjectHandle objects will |
393 | | // start pointing to the newly replaced object. Note that replacing an object with |
394 | | // QPDFObjectHandle::newNull() effectively removes the object from the file since a non-existent |
395 | | // object is treated as a null object. To replace a reserved object, call replaceReserved |
396 | | // instead. |
397 | | QPDF_DLL |
398 | | void replaceObject(QPDFObjGen og, QPDFObjectHandle); |
399 | | QPDF_DLL |
400 | | void replaceObject(int objid, int generation, QPDFObjectHandle); |
401 | | |
402 | | // Swap two objects given by ID. Prior to qpdf 10.2.1, existing QPDFObjectHandle instances that |
403 | | // reference them objects not notice the swap, but this was fixed in 10.2.1. |
404 | | QPDF_DLL |
405 | | void swapObjects(QPDFObjGen og1, QPDFObjGen og2); |
406 | | QPDF_DLL |
407 | | void swapObjects(int objid1, int generation1, int objid2, int generation2); |
408 | | |
409 | | // Replace a reserved object. This is a wrapper around replaceObject but it guarantees that the |
410 | | // underlying object is a reserved object or a null object. After this call, reserved will |
411 | | // be a reference to replacement. |
412 | | QPDF_DLL |
413 | | void replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement); |
414 | | |
415 | | // Copy an object from another QPDF to this one. Starting with qpdf version 8.3.0, it is no |
416 | | // longer necessary to keep the original QPDF around after the call to copyForeignObject as long |
417 | | // as the source of any copied stream data is still available. Usually this means you just have |
418 | | // to keep the input file around, not the QPDF object. The exception to this is if you copy a |
419 | | // stream that gets its data from a QPDFObjectHandle::StreamDataProvider. In this case only, the |
420 | | // original stream's QPDF object must stick around because the QPDF object is itself the source |
421 | | // of the original stream data. For a more in-depth discussion, please see the TODO file. |
422 | | // Starting in 8.4.0, you can call setImmediateCopyFrom(true) on the SOURCE QPDF object (the one |
423 | | // you're copying FROM). If you do this prior to copying any of its objects, then neither the |
424 | | // source QPDF object nor its input source needs to stick around at all regardless of the |
425 | | // source. The cost is that the stream data is copied into RAM at the time copyForeignObject is |
426 | | // called. See setImmediateCopyFrom for more information. |
427 | | // |
428 | | // The return value of this method is an indirect reference to the copied object in this file. |
429 | | // This method is intended to be used to copy non-page objects. To copy page objects, pass the |
430 | | // foreign page object directly to addPage (or addPageAt). If you copy objects that contain |
431 | | // references to pages, you should copy the pages first using addPage(At). Otherwise references |
432 | | // to the pages that have not been copied will be replaced with nulls. It is possible to use |
433 | | // copyForeignObject on page objects if you are not going to use them as pages. Doing so copies |
434 | | // the object normally but does not update the page structure. For example, it is a valid use |
435 | | // case to use copyForeignObject for a page that you are going to turn into a form XObject, |
436 | | // though you can also use QPDFPageObjectHelper::getFormXObjectForPage for that purpose. |
437 | | // |
438 | | // When copying objects with this method, object structure will be preserved, so all indirectly |
439 | | // referenced indirect objects will be copied as well. This includes any circular references |
440 | | // that may exist. The QPDF object keeps a record of what has already been copied, so shared |
441 | | // objects will not be copied multiple times. This also means that if you mutate an object that |
442 | | // has already been copied and try to copy it again, it won't work since the modified object |
443 | | // will not be recopied. Therefore, you should do all mutation on the original file that you |
444 | | // are going to do before you start copying its objects to a new file. |
445 | | QPDF_DLL |
446 | | QPDFObjectHandle copyForeignObject(QPDFObjectHandle foreign); |
447 | | |
448 | | // Encryption support |
449 | | |
450 | | enum encryption_method_e { e_none, e_unknown, e_rc4, e_aes, e_aesv3 }; |
451 | | class EncryptionData |
452 | | { |
453 | | public: |
454 | | // This class holds data read from the encryption dictionary. |
455 | | EncryptionData( |
456 | | int V, |
457 | | int R, |
458 | | int Length_bytes, |
459 | | int P, |
460 | | std::string const& O, |
461 | | std::string const& U, |
462 | | std::string const& OE, |
463 | | std::string const& UE, |
464 | | std::string const& Perms, |
465 | | std::string const& id1, |
466 | | bool encrypt_metadata) : |
467 | 844 | V(V), |
468 | 844 | R(R), |
469 | 844 | Length_bytes(Length_bytes), |
470 | 844 | P(static_cast<unsigned long long>(P)), |
471 | 844 | O(O), |
472 | 844 | U(U), |
473 | 844 | OE(OE), |
474 | 844 | UE(UE), |
475 | 844 | Perms(Perms), |
476 | 844 | id1(id1), |
477 | 844 | encrypt_metadata(encrypt_metadata) |
478 | 844 | { |
479 | 844 | } |
480 | | EncryptionData(int V, int R, int Length_bytes, bool encrypt_metadata) : |
481 | | V(V), |
482 | | R(R), |
483 | | Length_bytes(Length_bytes), |
484 | | encrypt_metadata(encrypt_metadata) |
485 | 0 | { |
486 | 0 | } |
487 | | |
488 | | int getV() const; |
489 | | int getR() const; |
490 | | int getLengthBytes() const; |
491 | | int getP() const; |
492 | | // Bits in P are numbered from 1 as in the PDF spec. |
493 | | bool getP(size_t bit) const; |
494 | | std::string const& getO() const; |
495 | | std::string const& getU() const; |
496 | | std::string const& getOE() const; |
497 | | std::string const& getUE() const; |
498 | | std::string const& getPerms() const; |
499 | | std::string const& getId1() const; |
500 | | bool getEncryptMetadata() const; |
501 | | // Bits in P are numbered from 1 as in the PDF spec. |
502 | | void setP(size_t bit, bool val); |
503 | | void setP(unsigned long val); |
504 | | void setO(std::string const&); |
505 | | void setU(std::string const&); |
506 | | void setId1(std::string const& val); |
507 | | void setV5EncryptionParameters( |
508 | | std::string const& O, |
509 | | std::string const& OE, |
510 | | std::string const& U, |
511 | | std::string const& UE, |
512 | | std::string const& Perms); |
513 | | |
514 | | std::string compute_encryption_key(std::string const& password) const; |
515 | | |
516 | | bool |
517 | | check_owner_password(std::string& user_password, std::string const& owner_password) const; |
518 | | |
519 | | bool check_user_password(std::string const& user_password) const; |
520 | | |
521 | | std::string |
522 | | recover_encryption_key_with_password(std::string const& password, bool& perms_valid) const; |
523 | | |
524 | | void compute_encryption_O_U(char const* user_password, char const* owner_password); |
525 | | |
526 | | std::string |
527 | | compute_encryption_parameters_V5(char const* user_password, char const* owner_password); |
528 | | |
529 | | std::string compute_parameters(char const* user_password, char const* owner_password); |
530 | | |
531 | | private: |
532 | | static constexpr unsigned int OU_key_bytes_V4 = 16; // ( == sizeof(MD5::Digest) |
533 | | |
534 | | EncryptionData(EncryptionData const&) = delete; |
535 | | EncryptionData& operator=(EncryptionData const&) = delete; |
536 | | |
537 | | std::string hash_V5( |
538 | | std::string const& password, std::string const& salt, std::string const& udata) const; |
539 | | std::string |
540 | | compute_O_value(std::string const& user_password, std::string const& owner_password) const; |
541 | | std::string compute_U_value(std::string const& user_password) const; |
542 | | std::string compute_encryption_key_from_password(std::string const& password) const; |
543 | | std::string recover_encryption_key_with_password(std::string const& password) const; |
544 | | bool check_owner_password_V4( |
545 | | std::string& user_password, std::string const& owner_password) const; |
546 | | bool check_owner_password_V5(std::string const& owner_passworda) const; |
547 | | std::string compute_Perms_value_V5_clear() const; |
548 | | std::string compute_O_rc4_key( |
549 | | std::string const& user_password, std::string const& owner_password) const; |
550 | | std::string compute_U_value_R2(std::string const& user_password) const; |
551 | | std::string compute_U_value_R3(std::string const& user_password) const; |
552 | | bool check_user_password_V4(std::string const& user_password) const; |
553 | | bool check_user_password_V5(std::string const& user_password) const; |
554 | | |
555 | | int V; |
556 | | int R; |
557 | | int Length_bytes; |
558 | | std::bitset<32> P{0xfffffffc}; // Specification always requires bits 1 and 2 to be cleared. |
559 | | std::string O; |
560 | | std::string U; |
561 | | std::string OE; |
562 | | std::string UE; |
563 | | std::string Perms; |
564 | | std::string id1; |
565 | | bool encrypt_metadata; |
566 | | }; |
567 | | |
568 | | QPDF_DLL |
569 | | bool isEncrypted() const; |
570 | | |
571 | | QPDF_DLL |
572 | | bool isEncrypted(int& R, int& P); |
573 | | |
574 | | QPDF_DLL |
575 | | bool isEncrypted( |
576 | | int& R, |
577 | | int& P, |
578 | | int& V, |
579 | | encryption_method_e& stream_method, |
580 | | encryption_method_e& string_method, |
581 | | encryption_method_e& file_method); |
582 | | |
583 | | QPDF_DLL |
584 | | bool ownerPasswordMatched() const; |
585 | | |
586 | | QPDF_DLL |
587 | | bool userPasswordMatched() const; |
588 | | |
589 | | // Encryption permissions -- not enforced by QPDF |
590 | | QPDF_DLL |
591 | | bool allowAccessibility(); |
592 | | QPDF_DLL |
593 | | bool allowExtractAll(); |
594 | | QPDF_DLL |
595 | | bool allowPrintLowRes(); |
596 | | QPDF_DLL |
597 | | bool allowPrintHighRes(); |
598 | | QPDF_DLL |
599 | | bool allowModifyAssembly(); |
600 | | QPDF_DLL |
601 | | bool allowModifyForm(); |
602 | | QPDF_DLL |
603 | | bool allowModifyAnnotation(); |
604 | | QPDF_DLL |
605 | | bool allowModifyOther(); |
606 | | QPDF_DLL |
607 | | bool allowModifyAll(); |
608 | | |
609 | | // Helper function to trim padding from user password. Calling trim_user_password on the result |
610 | | // of getPaddedUserPassword gives getTrimmedUserPassword's result. |
611 | | QPDF_DLL |
612 | | static void trim_user_password(std::string& user_password); |
613 | | QPDF_DLL |
614 | | static std::string compute_data_key( |
615 | | std::string const& encryption_key, |
616 | | int objid, |
617 | | int generation, |
618 | | bool use_aes, |
619 | | int encryption_V, |
620 | | int encryption_R); |
621 | | QPDF_DLL |
622 | | static std::string |
623 | | compute_encryption_key(std::string const& password, EncryptionData const& data); |
624 | | |
625 | | QPDF_DLL |
626 | | static void compute_encryption_O_U( |
627 | | char const* user_password, |
628 | | char const* owner_password, |
629 | | int V, |
630 | | int R, |
631 | | int key_len, |
632 | | int P, |
633 | | bool encrypt_metadata, |
634 | | std::string const& id1, |
635 | | std::string& O, |
636 | | std::string& U); |
637 | | QPDF_DLL |
638 | | static void compute_encryption_parameters_V5( |
639 | | char const* user_password, |
640 | | char const* owner_password, |
641 | | int V, |
642 | | int R, |
643 | | int key_len, |
644 | | int P, |
645 | | bool encrypt_metadata, |
646 | | std::string const& id1, |
647 | | std::string& encryption_key, |
648 | | std::string& O, |
649 | | std::string& U, |
650 | | std::string& OE, |
651 | | std::string& UE, |
652 | | std::string& Perms); |
653 | | // Return the full user password as stored in the PDF file. For files encrypted with 40-bit or |
654 | | // 128-bit keys, the user password can be recovered when the file is opened using the owner |
655 | | // password. This is not possible with newer encryption formats. If you are attempting to |
656 | | // recover the user password in a user-presentable form, call getTrimmedUserPassword() instead. |
657 | | QPDF_DLL |
658 | | std::string const& getPaddedUserPassword() const; |
659 | | // Return human-readable form of user password subject to same limitations as |
660 | | // getPaddedUserPassword(). |
661 | | QPDF_DLL |
662 | | std::string getTrimmedUserPassword() const; |
663 | | // Return the previously computed or retrieved encryption key for this file |
664 | | QPDF_DLL |
665 | | std::string getEncryptionKey() const; |
666 | | // Remove security restrictions associated with digitally signed files. From qpdf 11.7.0, this |
667 | | // is called by QPDFAcroFormDocumentHelper::disableDigitalSignatures and is more useful when |
668 | | // called from there than when just called by itself. |
669 | | QPDF_DLL |
670 | | void removeSecurityRestrictions(); |
671 | | |
672 | | // Linearization support |
673 | | |
674 | | // Returns true iff the file starts with a linearization parameter dictionary. Does no |
675 | | // additional validation. |
676 | | QPDF_DLL |
677 | | bool isLinearized(); |
678 | | |
679 | | // Performs various sanity checks on a linearized file. Return true if no errors or warnings. |
680 | | // Otherwise, return false and output errors and warnings to the default output stream |
681 | | // (std::cout or whatever is configured in the logger). It is recommended for linearization |
682 | | // errors to be treated as warnings. |
683 | | QPDF_DLL |
684 | | bool checkLinearization(); |
685 | | |
686 | | // Calls checkLinearization() and, if possible, prints normalized contents of some of the hints |
687 | | // tables to the default output stream. Normalization includes adding min values to delta values |
688 | | // and adjusting offsets based on the location and size of the primary hint stream. |
689 | | QPDF_DLL |
690 | | void showLinearizationData(); |
691 | | |
692 | | // Shows the contents of the cross-reference table |
693 | | QPDF_DLL |
694 | | void showXRefTable(); |
695 | | |
696 | | // Starting from qpdf 11.0 user code should not need to call this method. Before 11.0 this |
697 | | // method was used to detect all indirect references to objects that don't exist and resolve |
698 | | // them by replacing them with null, which is how the PDF spec says to interpret such dangling |
699 | | // references. This method is called automatically when you try to add any new objects, if you |
700 | | // call getAllObjects, and before a file is written. The qpdf object caches whether it has run |
701 | | // this to avoid running it multiple times. Before 11.2.1 you could pass true to force it to run |
702 | | // again if you had explicitly added new objects that may have additional dangling references. |
703 | | QPDF_DLL |
704 | | void fixDanglingReferences(bool force = false); |
705 | | |
706 | | // Return the approximate number of indirect objects. It is/ approximate because not all objects |
707 | | // in the file are preserved in all cases, and gaps in object numbering are not preserved. |
708 | | QPDF_DLL |
709 | | size_t getObjectCount(); |
710 | | |
711 | | // Returns a list of indirect objects for every object in the xref table. Useful for discovering |
712 | | // objects that are not otherwise referenced. |
713 | | QPDF_DLL |
714 | | std::vector<QPDFObjectHandle> getAllObjects(); |
715 | | |
716 | | // Optimization support -- see doc/optimization. Implemented in QPDF_optimization.cc |
717 | | |
718 | | // The object_stream_data map maps from a "compressed" object to the object stream that contains |
719 | | // it. This enables optimize to populate the object <-> user maps with only uncompressed |
720 | | // objects. If allow_changes is false, an exception will be thrown if any changes are made |
721 | | // during the optimization process. This is available so that the test suite can make sure that |
722 | | // a linearized file is already optimized. When called in this way, optimize() still populates |
723 | | // the object <-> user maps. The optional skip_stream_parameters parameter, if present, is |
724 | | // called for each stream object. The function should return 2 if optimization should discard |
725 | | // /Length, /Filter, and /DecodeParms; 1 if it should discard /Length, and 0 if it should |
726 | | // preserve all keys. This is used by QPDFWriter to avoid creation of dangling objects for |
727 | | // stream dictionary keys it will be regenerating. |
728 | | [[deprecated("Unused - see release notes for qpdf 12.1.0")]] QPDF_DLL void optimize( |
729 | | std::map<int, int> const& object_stream_data, |
730 | | bool allow_changes = true, |
731 | | std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr); |
732 | | |
733 | | // Traverse page tree return all /Page objects. It also detects and resolves cases in which the |
734 | | // same /Page object is duplicated. For efficiency, this method returns a const reference to an |
735 | | // internal vector of pages. Calls to addPage, addPageAt, and removePage safely update this, but |
736 | | // direct manipulation of the pages tree or pushing inheritable objects to the page level may |
737 | | // invalidate it. See comments for updateAllPagesCache() for additional notes. Newer code should |
738 | | // use QPDFPageDocumentHelper::getAllPages instead. The decision to expose this internal cache |
739 | | // was arguably incorrect, but it is being left here for compatibility. It is, however, |
740 | | // completely safe to use this for files that you are not modifying. |
741 | | QPDF_DLL |
742 | | std::vector<QPDFObjectHandle> const& getAllPages(); |
743 | | |
744 | | QPDF_DLL |
745 | | bool everCalledGetAllPages() const; |
746 | | QPDF_DLL |
747 | | bool everPushedInheritedAttributesToPages() const; |
748 | | |
749 | | // These methods, given a page object or its object/generation number, returns the 0-based index |
750 | | // into the array returned by getAllPages() for that page. An exception is thrown if the page is |
751 | | // not found. |
752 | | QPDF_DLL |
753 | | int findPage(QPDFObjGen og); |
754 | | QPDF_DLL |
755 | | int findPage(QPDFObjectHandle& page); |
756 | | |
757 | | // This method synchronizes QPDF's cache of the page structure with the actual /Pages tree. If |
758 | | // you restrict changes to the /Pages tree, including addition, removal, or replacement of pages |
759 | | // or changes to any /Pages objects, to calls to these page handling APIs, you never need to |
760 | | // call this method. If you modify /Pages structures directly, you must call this method |
761 | | // afterwards. This method updates the internal list of pages, so after calling this method, |
762 | | // any previous references returned by getAllPages() will be valid again. It also resets any |
763 | | // state about having pushed inherited attributes in /Pages objects down to the pages, so if you |
764 | | // add any inheritable attributes to a /Pages object, you should also call this method. |
765 | | QPDF_DLL |
766 | | void updateAllPagesCache(); |
767 | | |
768 | | // Legacy handling API. These methods are not going anywhere, and you should feel free to |
769 | | // continue using them if it simplifies your code. Newer code should make use of |
770 | | // QPDFPageDocumentHelper instead as future page handling methods will be added there. The |
771 | | // functionality and specification of these legacy methods is identical to the identically named |
772 | | // methods there, except that these versions use QPDFObjectHandle instead of |
773 | | // QPDFPageObjectHelper, so please see comments in that file for descriptions. There are |
774 | | // subtleties you need to know about, so please look at the comments there. |
775 | | QPDF_DLL |
776 | | void pushInheritedAttributesToPage(); |
777 | | QPDF_DLL |
778 | | void addPage(QPDFObjectHandle newpage, bool first); |
779 | | QPDF_DLL |
780 | | void addPageAt(QPDFObjectHandle newpage, bool before, QPDFObjectHandle refpage); |
781 | | QPDF_DLL |
782 | | void removePage(QPDFObjectHandle page); |
783 | | // End legacy page helpers |
784 | | |
785 | | // End of the public API. The following classes and methods are for qpdf internal use only. |
786 | | |
787 | | class Writer; |
788 | | class Resolver; |
789 | | class StreamCopier; |
790 | | class ParseGuard; |
791 | | class Pipe; |
792 | | class JobSetter; |
793 | | |
794 | | inline bool reconstructed_xref() const; |
795 | | |
796 | | // For testing only -- do not add to DLL |
797 | | static bool test_json_validators(); |
798 | | |
799 | | private: |
800 | | // It has never been safe to copy QPDF objects as there is code in the library that assumes |
801 | | // there are no copies of a QPDF object. Copying QPDF objects was not prevented by the API until |
802 | | // qpdf 11. If you have been copying QPDF objects, use std::shared_ptr<QPDF> instead. From qpdf |
803 | | // 11, you can use QPDF::create to create them. |
804 | | QPDF(QPDF const&) = delete; |
805 | | QPDF& operator=(QPDF const&) = delete; |
806 | | |
807 | | static std::string const qpdf_version; |
808 | | |
809 | | class ObjCache; |
810 | | class ObjCopier; |
811 | | class EncryptionParameters; |
812 | | class ForeignStreamData; |
813 | | class CopiedStreamDataProvider; |
814 | | class StringDecrypter; |
815 | | class ResolveRecorder; |
816 | | class JSONReactor; |
817 | | |
818 | | void parse(char const* password); |
819 | | void inParse(bool); |
820 | | void setTrailer(QPDFObjectHandle obj); |
821 | | void read_xref(qpdf_offset_t offset, bool in_stream_recovery = false); |
822 | | bool resolveXRefTable(); |
823 | | void reconstruct_xref(QPDFExc& e, bool found_startxref = true); |
824 | | bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); |
825 | | bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); |
826 | | bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); |
827 | | qpdf_offset_t read_xrefTable(qpdf_offset_t offset); |
828 | | qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery = false); |
829 | | qpdf_offset_t processXRefStream( |
830 | | qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery = false); |
831 | | std::pair<int, std::array<int, 3>> |
832 | | processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged); |
833 | | int processXRefSize( |
834 | | QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged); |
835 | | std::pair<int, std::vector<std::pair<int, int>>> processXRefIndex( |
836 | | QPDFObjectHandle& dict, |
837 | | int max_num_entries, |
838 | | std::function<QPDFExc(std::string_view)> damaged); |
839 | | void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2); |
840 | | void insertFreeXrefEntry(QPDFObjGen); |
841 | | void setLastObjectDescription(std::string const& description, QPDFObjGen og); |
842 | | QPDFObjectHandle readTrailer(); |
843 | | QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); |
844 | | void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); |
845 | | void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); |
846 | | QPDFObjectHandle readObjectInStream(qpdf::is::OffsetBuffer& input, int stream_id, int obj_id); |
847 | | size_t recoverStreamLength( |
848 | | std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset); |
849 | | QPDFTokenizer::Token readToken(InputSource&, size_t max_len = 0); |
850 | | |
851 | | QPDFObjGen read_object_start(qpdf_offset_t offset); |
852 | | void readObjectAtOffset( |
853 | | bool attempt_recovery, |
854 | | qpdf_offset_t offset, |
855 | | std::string const& description, |
856 | | QPDFObjGen exp_og); |
857 | | QPDFObjectHandle readObjectAtOffset( |
858 | | qpdf_offset_t offset, std::string const& description, bool skip_cache_if_in_xref); |
859 | | std::shared_ptr<QPDFObject> const& resolve(QPDFObjGen og); |
860 | | void resolveObjectsInStream(int obj_stream_number); |
861 | | void stopOnError(std::string const& message); |
862 | | QPDFObjGen nextObjGen(); |
863 | | QPDFObjectHandle newIndirect(QPDFObjGen, std::shared_ptr<QPDFObject> const&); |
864 | | QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj); |
865 | | bool isCached(QPDFObjGen og); |
866 | | bool isUnresolved(QPDFObjGen og); |
867 | | std::shared_ptr<QPDFObject> getObjectForParser(int id, int gen, bool parse_pdf); |
868 | | std::shared_ptr<QPDFObject> getObjectForJSON(int id, int gen); |
869 | | void removeObject(QPDFObjGen og); |
870 | | void updateCache( |
871 | | QPDFObjGen og, |
872 | | std::shared_ptr<QPDFObject> const& object, |
873 | | qpdf_offset_t end_before_space, |
874 | | qpdf_offset_t end_after_space, |
875 | | bool destroy = true); |
876 | | static QPDFExc damagedPDF( |
877 | | InputSource& input, |
878 | | std::string const& object, |
879 | | qpdf_offset_t offset, |
880 | | std::string const& message); |
881 | | QPDFExc damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message); |
882 | | QPDFExc damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message); |
883 | | QPDFExc damagedPDF(std::string const& object, std::string const& message); |
884 | | QPDFExc damagedPDF(qpdf_offset_t offset, std::string const& message); |
885 | | QPDFExc damagedPDF(std::string const& message); |
886 | | |
887 | | // Calls finish() on the pipeline when done but does not delete it |
888 | | bool pipeStreamData( |
889 | | QPDFObjGen og, |
890 | | qpdf_offset_t offset, |
891 | | size_t length, |
892 | | QPDFObjectHandle dict, |
893 | | bool is_root_metadata, |
894 | | Pipeline* pipeline, |
895 | | bool suppress_warnings, |
896 | | bool will_retry); |
897 | | bool pipeForeignStreamData( |
898 | | std::shared_ptr<ForeignStreamData>, Pipeline*, bool suppress_warnings, bool will_retry); |
899 | | static bool pipeStreamData( |
900 | | std::shared_ptr<QPDF::EncryptionParameters> encp, |
901 | | std::shared_ptr<InputSource> file, |
902 | | QPDF& qpdf_for_warning, |
903 | | QPDFObjGen og, |
904 | | qpdf_offset_t offset, |
905 | | size_t length, |
906 | | QPDFObjectHandle dict, |
907 | | bool is_root_metadata, |
908 | | Pipeline* pipeline, |
909 | | bool suppress_warnings, |
910 | | bool will_retry); |
911 | | |
912 | | // For QPDFWriter: |
913 | | |
914 | | std::map<QPDFObjGen, QPDFXRefEntry> const& getXRefTableInternal(); |
915 | | template <typename T> |
916 | | void optimize_internal( |
917 | | T const& object_stream_data, |
918 | | bool allow_changes = true, |
919 | | std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr); |
920 | | void optimize( |
921 | | QPDFWriter::ObjTable const& obj, |
922 | | std::function<int(QPDFObjectHandle&)> skip_stream_parameters); |
923 | | size_t tableSize(); |
924 | | |
925 | | // Get lists of all objects in order according to the part of a linearized file that they belong |
926 | | // to. |
927 | | void getLinearizedParts( |
928 | | QPDFWriter::ObjTable const& obj, |
929 | | std::vector<QPDFObjectHandle>& part4, |
930 | | std::vector<QPDFObjectHandle>& part6, |
931 | | std::vector<QPDFObjectHandle>& part7, |
932 | | std::vector<QPDFObjectHandle>& part8, |
933 | | std::vector<QPDFObjectHandle>& part9); |
934 | | |
935 | | void generateHintStream( |
936 | | QPDFWriter::NewObjTable const& new_obj, |
937 | | QPDFWriter::ObjTable const& obj, |
938 | | std::string& hint_stream, |
939 | | int& S, |
940 | | int& O, |
941 | | bool compressed); |
942 | | |
943 | | // Get a list of objects that would be permitted in an object stream. |
944 | | template <typename T> |
945 | | std::vector<T> getCompressibleObjGens(); |
946 | | std::vector<QPDFObjGen> getCompressibleObjVector(); |
947 | | std::vector<bool> getCompressibleObjSet(); |
948 | | |
949 | | // methods to support page handling |
950 | | |
951 | | void getAllPagesInternal( |
952 | | QPDFObjectHandle cur_pages, |
953 | | QPDFObjGen::set& visited, |
954 | | QPDFObjGen::set& seen, |
955 | | bool media_box, |
956 | | bool resources); |
957 | | void insertPage(QPDFObjectHandle newpage, int pos); |
958 | | void flattenPagesTree(); |
959 | | void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate); |
960 | | |
961 | | // methods to support encryption -- implemented in QPDF_encryption.cc |
962 | | void initializeEncryption(); |
963 | | static std::string |
964 | | getKeyForObject(std::shared_ptr<EncryptionParameters> encp, QPDFObjGen og, bool use_aes); |
965 | | void decryptString(std::string&, QPDFObjGen og); |
966 | | static void decryptStream( |
967 | | std::shared_ptr<EncryptionParameters> encp, |
968 | | std::shared_ptr<InputSource> file, |
969 | | QPDF& qpdf_for_warning, |
970 | | Pipeline*& pipeline, |
971 | | QPDFObjGen og, |
972 | | QPDFObjectHandle& stream_dict, |
973 | | bool is_root_metadata, |
974 | | std::unique_ptr<Pipeline>& heap); |
975 | | |
976 | | // Methods to support object copying |
977 | | void reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top); |
978 | | QPDFObjectHandle |
979 | | replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top); |
980 | | void copyStreamData(QPDFObjectHandle dest_stream, QPDFObjectHandle src_stream); |
981 | | |
982 | | struct HPageOffsetEntry; |
983 | | struct HPageOffset; |
984 | | struct HSharedObjectEntry; |
985 | | struct HSharedObject; |
986 | | struct HGeneric; |
987 | | struct LinParameters; |
988 | | struct CHPageOffsetEntry; |
989 | | struct CHPageOffset; |
990 | | struct CHSharedObjectEntry; |
991 | | struct CHSharedObject; |
992 | | class ObjUser; |
993 | | struct UpdateObjectMapsFrame; |
994 | | class PatternFinder; |
995 | | |
996 | | // Methods to support pattern finding |
997 | | static bool validatePDFVersion(char const*&, std::string& version); |
998 | | bool findHeader(); |
999 | | bool findStartxref(); |
1000 | | bool findEndstream(); |
1001 | | |
1002 | | // methods to support linearization checking -- implemented in QPDF_linearization.cc |
1003 | | void readLinearizationData(); |
1004 | | bool checkLinearizationInternal(); |
1005 | | void dumpLinearizationDataInternal(); |
1006 | | void linearizationWarning(std::string_view); |
1007 | | QPDFObjectHandle readHintStream(Pipeline&, qpdf_offset_t offset, size_t length); |
1008 | | void readHPageOffset(BitStream); |
1009 | | void readHSharedObject(BitStream); |
1010 | | void readHGeneric(BitStream, HGeneric&); |
1011 | | qpdf_offset_t maxEnd(ObjUser const& ou); |
1012 | | qpdf_offset_t getLinearizationOffset(QPDFObjGen); |
1013 | | QPDFObjectHandle |
1014 | | getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data); |
1015 | | QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj); |
1016 | | int lengthNextN(int first_object, int n); |
1017 | | void |
1018 | | checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj); |
1019 | | void |
1020 | | checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj); |
1021 | | void checkHOutlines(); |
1022 | | void dumpHPageOffset(); |
1023 | | void dumpHSharedObject(); |
1024 | | void dumpHGeneric(HGeneric&); |
1025 | | qpdf_offset_t adjusted_offset(qpdf_offset_t offset); |
1026 | | template <typename T> |
1027 | | void calculateLinearizationData(T const& object_stream_data); |
1028 | | template <typename T> |
1029 | | void pushOutlinesToPart( |
1030 | | std::vector<QPDFObjectHandle>& part, |
1031 | | std::set<QPDFObjGen>& lc_outlines, |
1032 | | T const& object_stream_data); |
1033 | | int outputLengthNextN( |
1034 | | int in_object, |
1035 | | int n, |
1036 | | QPDFWriter::NewObjTable const& new_obj, |
1037 | | QPDFWriter::ObjTable const& obj); |
1038 | | void |
1039 | | calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); |
1040 | | void |
1041 | | calculateHSharedObject(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); |
1042 | | void calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); |
1043 | | void writeHPageOffset(BitWriter&); |
1044 | | void writeHSharedObject(BitWriter&); |
1045 | | void writeHGeneric(BitWriter&, HGeneric&); |
1046 | | |
1047 | | // Methods to support optimization |
1048 | | |
1049 | | void pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys); |
1050 | | void pushInheritedAttributesToPageInternal( |
1051 | | QPDFObjectHandle, |
1052 | | std::map<std::string, std::vector<QPDFObjectHandle>>&, |
1053 | | bool allow_changes, |
1054 | | bool warn_skipped_keys); |
1055 | | void updateObjectMaps( |
1056 | | ObjUser const& ou, |
1057 | | QPDFObjectHandle oh, |
1058 | | std::function<int(QPDFObjectHandle&)> skip_stream_parameters); |
1059 | | void filterCompressedObjects(std::map<int, int> const& object_stream_data); |
1060 | | void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data); |
1061 | | |
1062 | | // JSON import |
1063 | | void importJSON(std::shared_ptr<InputSource>, bool must_be_complete); |
1064 | | |
1065 | | // Type conversion helper methods |
1066 | | template <typename T> |
1067 | | static qpdf_offset_t |
1068 | | toO(T const& i) |
1069 | 2.54M | { |
1070 | 2.54M | return QIntC::to_offset(i); |
1071 | 2.54M | } |
1072 | | template <typename T> |
1073 | | static size_t |
1074 | | toS(T const& i) |
1075 | 58.4k | { |
1076 | 58.4k | return QIntC::to_size(i); |
1077 | 58.4k | } unsigned long QPDF::toS<int>(int const&) Line | Count | Source | 1075 | 32.5k | { | 1076 | 32.5k | return QIntC::to_size(i); | 1077 | 32.5k | } |
unsigned long QPDF::toS<unsigned long long>(unsigned long long const&) Line | Count | Source | 1075 | 10.3k | { | 1076 | 10.3k | return QIntC::to_size(i); | 1077 | 10.3k | } |
unsigned long QPDF::toS<long long>(long long const&) Line | Count | Source | 1075 | 15.5k | { | 1076 | 15.5k | return QIntC::to_size(i); | 1077 | 15.5k | } |
|
1078 | | template <typename T> |
1079 | | static int |
1080 | | toI(T const& i) |
1081 | 934k | { |
1082 | 934k | return QIntC::to_int(i); |
1083 | 934k | } int QPDF::toI<unsigned long>(unsigned long const&) Line | Count | Source | 1081 | 46.3k | { | 1082 | 46.3k | return QIntC::to_int(i); | 1083 | 46.3k | } |
int QPDF::toI<long>(long const&) Line | Count | Source | 1081 | 6.40k | { | 1082 | 6.40k | return QIntC::to_int(i); | 1083 | 6.40k | } |
int QPDF::toI<long long>(long long const&) Line | Count | Source | 1081 | 882k | { | 1082 | 882k | return QIntC::to_int(i); | 1083 | 882k | } |
|
1084 | | template <typename T> |
1085 | | static unsigned long long |
1086 | | toULL(T const& i) |
1087 | | { |
1088 | | return QIntC::to_ulonglong(i); |
1089 | | } |
1090 | | |
1091 | | class Members; |
1092 | | |
1093 | | // Keep all member variables inside the Members object, which we dynamically allocate. This |
1094 | | // makes it possible to add new private members without breaking binary compatibility. |
1095 | | std::unique_ptr<Members> m; |
1096 | | }; |
1097 | | |
1098 | | #endif // QPDF_HH |