/src/qpdf/libqpdf/QPDF.cc
Line | Count | Source |
1 | | #include <qpdf/qpdf-config.h> // include first for large file support |
2 | | |
3 | | #include <qpdf/QPDF_private.hh> |
4 | | |
5 | | #include <array> |
6 | | #include <atomic> |
7 | | #include <cstring> |
8 | | #include <limits> |
9 | | #include <map> |
10 | | #include <regex> |
11 | | #include <sstream> |
12 | | #include <vector> |
13 | | |
14 | | #include <qpdf/AcroForm.hh> |
15 | | #include <qpdf/FileInputSource.hh> |
16 | | #include <qpdf/InputSource_private.hh> |
17 | | #include <qpdf/OffsetInputSource.hh> |
18 | | #include <qpdf/Pipeline.hh> |
19 | | #include <qpdf/QPDFExc.hh> |
20 | | #include <qpdf/QPDFLogger.hh> |
21 | | #include <qpdf/QPDFObjectHandle_private.hh> |
22 | | #include <qpdf/QPDFObject_private.hh> |
23 | | #include <qpdf/QPDFParser.hh> |
24 | | #include <qpdf/QTC.hh> |
25 | | #include <qpdf/QUtil.hh> |
26 | | #include <qpdf/Util.hh> |
27 | | |
28 | | using namespace qpdf; |
29 | | using namespace std::literals; |
30 | | |
31 | | using Common = impl::Doc::Common; |
32 | | using Objects = impl::Doc::Objects; |
33 | | using Foreign = Objects::Foreign; |
34 | | using Streams = Objects::Streams; |
35 | | |
36 | | // This must be a fixed value. This API returns a const reference to it, and the C API relies on its |
37 | | // being static as well. |
38 | | std::string const QPDF::qpdf_version(QPDF_VERSION); |
39 | | |
40 | | static char const* EMPTY_PDF = ( |
41 | | // force line break |
42 | | "%PDF-1.3\n" |
43 | | "1 0 obj\n" |
44 | | "<< /Type /Catalog /Pages 2 0 R >>\n" |
45 | | "endobj\n" |
46 | | "2 0 obj\n" |
47 | | "<< /Type /Pages /Kids [] /Count 0 >>\n" |
48 | | "endobj\n" |
49 | | "xref\n" |
50 | | "0 3\n" |
51 | | "0000000000 65535 f \n" |
52 | | "0000000009 00000 n \n" |
53 | | "0000000058 00000 n \n" |
54 | | "trailer << /Size 3 /Root 1 0 R >>\n" |
55 | | "startxref\n" |
56 | | "110\n" |
57 | | "%%EOF\n"); |
58 | | |
59 | | namespace |
60 | | { |
61 | | class InvalidInputSource: public InputSource |
62 | | { |
63 | | public: |
64 | | ~InvalidInputSource() override = default; |
65 | | qpdf_offset_t |
66 | | findAndSkipNextEOL() override |
67 | 0 | { |
68 | 0 | throwException(); |
69 | 0 | return 0; |
70 | 0 | } |
71 | | std::string const& |
72 | | getName() const override |
73 | 0 | { |
74 | 0 | static std::string name("closed input source"); |
75 | 0 | return name; |
76 | 0 | } |
77 | | qpdf_offset_t |
78 | | tell() override |
79 | 0 | { |
80 | 0 | throwException(); |
81 | 0 | return 0; |
82 | 0 | } |
83 | | void |
84 | | seek(qpdf_offset_t offset, int whence) override |
85 | 0 | { |
86 | 0 | throwException(); |
87 | 0 | } |
88 | | void |
89 | | rewind() override |
90 | 0 | { |
91 | 0 | throwException(); |
92 | 0 | } |
93 | | size_t |
94 | | read(char* buffer, size_t length) override |
95 | 0 | { |
96 | 0 | throwException(); |
97 | 0 | return 0; |
98 | 0 | } |
99 | | void |
100 | | unreadCh(char ch) override |
101 | 0 | { |
102 | 0 | throwException(); |
103 | 0 | } |
104 | | |
105 | | private: |
106 | | void |
107 | | throwException() |
108 | 0 | { |
109 | 0 | throw std::logic_error( |
110 | 0 | "QPDF operation attempted on a QPDF object with no input " |
111 | 0 | "source. QPDF operations are invalid before processFile (or " |
112 | 0 | "another process method) or after closeInputSource"); |
113 | 0 | } |
114 | | }; |
115 | | } // namespace |
116 | | |
117 | | QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) : |
118 | 0 | qpdf(qpdf), |
119 | 0 | og(og) |
120 | 0 | { |
121 | 0 | } |
122 | | |
123 | | std::string const& |
124 | | QPDF::QPDFVersion() |
125 | 0 | { |
126 | | // The C API relies on this being a static value. |
127 | 0 | return QPDF::qpdf_version; |
128 | 0 | } |
129 | | |
130 | | QPDF::Members::Members(QPDF& qpdf) : |
131 | 7.94k | Doc(qpdf, this), |
132 | 7.94k | c(qpdf, this), |
133 | 7.94k | lin(*this), |
134 | 7.94k | objects(*this), |
135 | 7.94k | pages(*this), |
136 | 7.94k | file(std::make_shared<InvalidInputSource>()), |
137 | 7.94k | encp(std::make_shared<EncryptionParameters>()) |
138 | 7.94k | { |
139 | 7.94k | } |
140 | | |
141 | | QPDF::QPDF() : |
142 | 7.94k | m(std::make_unique<Members>(*this)) |
143 | 7.94k | { |
144 | 7.94k | m->tokenizer.allowEOF(); |
145 | | // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout |
146 | | // the lifetime of this running application. |
147 | 7.94k | static std::atomic<unsigned long long> unique_id{0}; |
148 | 7.94k | m->unique_id = unique_id.fetch_add(1ULL); |
149 | 7.94k | } |
150 | | |
151 | | /// @brief Initializes the AcroForm functionality for the document. |
152 | | /// @par |
153 | | /// This method creates a unique instance of QPDFAcroFormDocumentHelper and associates it |
154 | | /// with the document. It also updates the `acroform_` pointer to reference the AcroForm |
155 | | /// instance managed by the helper. |
156 | | /// |
157 | | /// The method has been separated out from `acroform` to avoid it being inlined |
158 | | /// unnecessarily. |
159 | | void |
160 | | QPDF::Doc::init_acroform() |
161 | 0 | { |
162 | 0 | acroform_dh_ = std::make_unique<QPDFAcroFormDocumentHelper>(qpdf); |
163 | 0 | acroform_ = acroform_dh_->m.get(); |
164 | 0 | } |
165 | | |
166 | | // Provide access to disconnect(). Disconnect will in due course be merged into the current ObjCache |
167 | | // (future Objects::Entry) to centralize all QPDF access to QPDFObject. |
168 | | class Disconnect: BaseHandle |
169 | | { |
170 | | public: |
171 | | Disconnect(std::shared_ptr<QPDFObject> const& obj) : |
172 | 12.2k | BaseHandle(obj) |
173 | 12.2k | { |
174 | 12.2k | } |
175 | | void |
176 | | disconnect() |
177 | 12.2k | { |
178 | 12.2k | BaseHandle::disconnect(false); |
179 | 12.2k | if (raw_type_code() != ::ot_null) { |
180 | 6.91k | obj->value = QPDF_Destroyed(); |
181 | 6.91k | } |
182 | 12.2k | } |
183 | | }; |
184 | | |
185 | | QPDF::~QPDF() |
186 | 7.94k | { |
187 | | // If two objects are mutually referential (through each object having an array or dictionary |
188 | | // that contains an indirect reference to the other), the circular references in the |
189 | | // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects |
190 | | // in the object cache, which is those objects that we read from the file, and break all |
191 | | // resolved indirect references by replacing them with an internal object type representing that |
192 | | // they have been destroyed. Note that we can't break references like this at any time when the |
193 | | // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that |
194 | | // are reachable from this object to release their association with this QPDF. Direct objects |
195 | | // are not destroyed since they can be moved to other QPDF objects safely. |
196 | | |
197 | | // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear |
198 | | // the xref table anyway just to prevent any possibility of resolve() succeeding. |
199 | 7.94k | m->xref_table.clear(); |
200 | 12.2k | for (auto const& iter: m->obj_cache) { |
201 | 12.2k | Disconnect(iter.second.object).disconnect(); |
202 | 12.2k | } |
203 | 7.94k | } |
204 | | |
205 | | std::shared_ptr<QPDF> |
206 | | QPDF::create() |
207 | 0 | { |
208 | 0 | return std::make_shared<QPDF>(); |
209 | 0 | } |
210 | | |
211 | | void |
212 | | QPDF::processFile(char const* filename, char const* password) |
213 | 0 | { |
214 | 0 | auto* fi = new FileInputSource(filename); |
215 | 0 | processInputSource(std::shared_ptr<InputSource>(fi), password); |
216 | 0 | } |
217 | | |
218 | | void |
219 | | QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password) |
220 | 0 | { |
221 | 0 | auto* fi = new FileInputSource(description, filep, close_file); |
222 | 0 | processInputSource(std::shared_ptr<InputSource>(fi), password); |
223 | 0 | } |
224 | | |
225 | | void |
226 | | QPDF::processMemoryFile( |
227 | | char const* description, char const* buf, size_t length, char const* password) |
228 | 7.94k | { |
229 | 7.94k | auto is = std::make_shared<is::OffsetBuffer>(description, std::string_view{buf, length}); |
230 | 7.94k | processInputSource(is, password); |
231 | 7.94k | } |
232 | | |
233 | | void |
234 | | QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password) |
235 | 7.94k | { |
236 | 7.94k | m->file = source; |
237 | 7.94k | m->objects.parse(password); |
238 | 7.94k | } |
239 | | |
240 | | void |
241 | | QPDF::closeInputSource() |
242 | 0 | { |
243 | 0 | m->file = std::shared_ptr<InputSource>(new InvalidInputSource()); |
244 | 0 | } |
245 | | |
246 | | void |
247 | | QPDF::setPasswordIsHexKey(bool val) |
248 | 0 | { |
249 | 0 | m->cf.password_is_hex_key(val); |
250 | 0 | } |
251 | | |
252 | | void |
253 | | QPDF::emptyPDF() |
254 | 0 | { |
255 | 0 | processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF)); |
256 | 0 | } |
257 | | |
258 | | void |
259 | | QPDF::registerStreamFilter( |
260 | | std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory) |
261 | 0 | { |
262 | 0 | qpdf::Stream::registerStreamFilter(filter_name, factory); |
263 | 0 | } |
264 | | |
265 | | void |
266 | | QPDF::setIgnoreXRefStreams(bool val) |
267 | 0 | { |
268 | 0 | (void)m->cf.ignore_xref_streams(val); |
269 | 0 | } |
270 | | |
271 | | std::shared_ptr<QPDFLogger> |
272 | | QPDF::getLogger() |
273 | 0 | { |
274 | 0 | return m->cf.log(); |
275 | 0 | } |
276 | | |
277 | | void |
278 | | QPDF::setLogger(std::shared_ptr<QPDFLogger> l) |
279 | 0 | { |
280 | 0 | m->cf.log(l); |
281 | 0 | } |
282 | | |
283 | | void |
284 | | QPDF::setOutputStreams(std::ostream* out, std::ostream* err) |
285 | 0 | { |
286 | 0 | setLogger(QPDFLogger::create()); |
287 | 0 | m->cf.log()->setOutputStreams(out, err); |
288 | 0 | } |
289 | | |
290 | | void |
291 | | QPDF::setSuppressWarnings(bool val) |
292 | 0 | { |
293 | 0 | (void)m->cf.suppress_warnings(val); |
294 | 0 | } |
295 | | |
296 | | void |
297 | | QPDF::setMaxWarnings(size_t val) |
298 | 7.94k | { |
299 | 7.94k | (void)m->cf.max_warnings(val); |
300 | 7.94k | } |
301 | | |
302 | | void |
303 | | QPDF::setAttemptRecovery(bool val) |
304 | 0 | { |
305 | 0 | (void)m->cf.surpress_recovery(!val); |
306 | 0 | } |
307 | | |
308 | | void |
309 | | QPDF::setImmediateCopyFrom(bool val) |
310 | 0 | { |
311 | 0 | (void)m->cf.immediate_copy_from(val); |
312 | 0 | } |
313 | | |
314 | | std::vector<QPDFExc> |
315 | | QPDF::getWarnings() |
316 | 0 | { |
317 | 0 | std::vector<QPDFExc> result = std::move(m->warnings); |
318 | 0 | m->warnings.clear(); |
319 | 0 | return result; |
320 | 0 | } |
321 | | |
322 | | bool |
323 | | QPDF::anyWarnings() const |
324 | 0 | { |
325 | 0 | return !m->warnings.empty(); |
326 | 0 | } |
327 | | |
328 | | size_t |
329 | | QPDF::numWarnings() const |
330 | 0 | { |
331 | 0 | return m->warnings.size(); |
332 | 0 | } |
333 | | |
334 | | void |
335 | | QPDF::warn(QPDFExc const& e) |
336 | 0 | { |
337 | 0 | m->c.warn(e); |
338 | 0 | } |
339 | | |
340 | | void |
341 | | Common::warn(QPDFExc const& e) |
342 | 82.3k | { |
343 | 82.3k | if (cf.max_warnings() > 0 && m->warnings.size() >= cf.max_warnings()) { |
344 | 26 | stopOnError("Too many warnings - file is too badly damaged"); |
345 | 26 | } |
346 | 82.3k | m->warnings.emplace_back(e); |
347 | 82.3k | if (!cf.suppress_warnings()) { |
348 | 82.2k | *cf.log()->getWarn() << "WARNING: " << m->warnings.back().what() << "\n"; |
349 | 82.2k | } |
350 | 82.3k | } |
351 | | |
352 | | void |
353 | | QPDF::warn( |
354 | | qpdf_error_code_e error_code, |
355 | | std::string const& object, |
356 | | qpdf_offset_t offset, |
357 | | std::string const& message) |
358 | 82.3k | { |
359 | 82.3k | m->c.warn(QPDFExc(error_code, getFilename(), object, offset, message)); |
360 | 82.3k | } |
361 | | |
362 | | void |
363 | | Common::warn( |
364 | | qpdf_error_code_e error_code, |
365 | | std::string const& object, |
366 | | qpdf_offset_t offset, |
367 | | std::string const& message) |
368 | 0 | { |
369 | 0 | warn(QPDFExc(error_code, qpdf.getFilename(), object, offset, message)); |
370 | 0 | } |
371 | | |
372 | | QPDFObjectHandle |
373 | | QPDF::newReserved() |
374 | 0 | { |
375 | 0 | return m->objects.makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Reserved>()); |
376 | 0 | } |
377 | | |
378 | | QPDFObjectHandle |
379 | | QPDF::newIndirectNull() |
380 | 0 | { |
381 | 0 | return m->objects.makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Null>()); |
382 | 0 | } |
383 | | |
384 | | QPDFObjectHandle |
385 | | QPDF::newStream() |
386 | 0 | { |
387 | 0 | return makeIndirectObject( |
388 | 0 | qpdf::Stream(*this, m->objects.nextObjGen(), Dictionary::empty(), 0, 0)); |
389 | 0 | } |
390 | | |
391 | | QPDFObjectHandle |
392 | | QPDF::newStream(std::shared_ptr<Buffer> data) |
393 | 0 | { |
394 | 0 | auto result = newStream(); |
395 | 0 | result.replaceStreamData(data, {}, {}); |
396 | 0 | return result; |
397 | 0 | } |
398 | | |
399 | | QPDFObjectHandle |
400 | | QPDF::newStream(std::string const& data) |
401 | 0 | { |
402 | 0 | auto result = newStream(); |
403 | 0 | result.replaceStreamData(data, {}, {}); |
404 | 0 | return result; |
405 | 0 | } |
406 | | |
407 | | QPDFObjectHandle |
408 | | QPDF::getObject(int objid, int generation) |
409 | 0 | { |
410 | 0 | return getObject({objid, generation}); |
411 | 0 | } |
412 | | |
413 | | QPDFObjectHandle |
414 | | QPDF::getObjectByObjGen(QPDFObjGen og) |
415 | 0 | { |
416 | 0 | return getObject(og); |
417 | 0 | } |
418 | | |
419 | | QPDFObjectHandle |
420 | | QPDF::getObjectByID(int objid, int generation) |
421 | 0 | { |
422 | 0 | return getObject(QPDFObjGen(objid, generation)); |
423 | 0 | } |
424 | | |
425 | | QPDFObjectHandle |
426 | | QPDF::copyForeignObject(QPDFObjectHandle foreign) |
427 | 0 | { |
428 | 0 | return m->objects.foreign().copied(foreign); |
429 | 0 | } |
430 | | |
431 | | Objects ::Foreign::Copier& |
432 | | Objects::Foreign::copier(QPDFObjectHandle const& foreign) |
433 | 0 | { |
434 | 0 | if (!foreign.isIndirect()) { |
435 | 0 | throw std::logic_error("QPDF::copyForeign called with direct object handle"); |
436 | 0 | } |
437 | 0 | QPDF& other = *foreign.qpdf(); |
438 | 0 | if (&other == &qpdf) { |
439 | 0 | throw std::logic_error("QPDF::copyForeign called with object from this QPDF"); |
440 | 0 | } |
441 | 0 | return copiers.insert({other.getUniqueId(), {qpdf}}).first->second; |
442 | 0 | } |
443 | | |
444 | | QPDFObjectHandle |
445 | | Objects::Foreign::Copier::copied(QPDFObjectHandle const& foreign) |
446 | 0 | { |
447 | | // Here's an explanation of what's going on here. |
448 | | // |
449 | | // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and |
450 | | // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a |
451 | | // foreign QPDF into the local QPDF, we have to replace all indirect object references with |
452 | | // references to the corresponding object in the local file. |
453 | | // |
454 | | // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign |
455 | | // QPDF that we are copying from. The mapping is stored in an Foreign::Copier, which contains a |
456 | | // mapping from the foreign ObjGen to the local QPDFObjectHandle. |
457 | | // |
458 | | // To copy, we do a deep traversal of the foreign object with loop detection to discover all |
459 | | // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an |
460 | | // indirect object, we check to see if we have already created a local copy of it. If not, we |
461 | | // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the |
462 | | // mapping from the foreign object ID to the new object. While we |
463 | | // do this, we keep a list of objects to copy. |
464 | | // |
465 | | // Once we are done with the traversal, we copy all the objects that we need to copy. However, |
466 | | // the copies will contain indirect object IDs that refer to objects in the foreign file. We |
467 | | // need to replace them with references to objects in the local file. This is what |
468 | | // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with |
469 | | // all the indirect references replaced with new ones in the local context, we can replace the |
470 | | // local reserved object with the copy. This mechanism allows us to copy objects with circular |
471 | | // references in any order. |
472 | | |
473 | | // For streams, rather than copying the objects, we set up the stream data to pull from the |
474 | | // original stream by using a stream data provider. This is done in a manner that doesn't |
475 | | // require the original QPDF object but may require the original source of the stream data with |
476 | | // special handling for immediate_copy_from. This logic is also in |
477 | | // replaceForeignIndirectObjects. |
478 | | |
479 | | // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented |
480 | | // use case to copy pages this way if the intention is to not update the pages tree. |
481 | |
|
482 | 0 | util::assertion( |
483 | 0 | visiting.empty(), "obj_copier.visiting is not empty at the beginning of copyForeignObject"); |
484 | | |
485 | | // Make sure we have an object in this file for every referenced object in the old file. |
486 | | // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we |
487 | | // have to copy, the local object will be a reservation, unless it is a stream, in which case |
488 | | // the local object will already be a stream. |
489 | 0 | reserve_objects(foreign, true); |
490 | |
|
491 | 0 | util::assertion(visiting.empty(), "obj_copier.visiting is not empty after reserving objects"); |
492 | | |
493 | | // Copy any new objects and replace the reservations. |
494 | 0 | for (auto& oh: to_copy) { |
495 | 0 | auto copy = replace_indirect_object(oh, true); |
496 | 0 | if (!oh.isStream()) { |
497 | 0 | qpdf.replaceReserved(object_map[oh], copy); |
498 | 0 | } |
499 | 0 | } |
500 | 0 | to_copy.clear(); |
501 | |
|
502 | 0 | auto og = foreign.getObjGen(); |
503 | 0 | if (!object_map.contains(og)) { |
504 | 0 | warn(damagedPDF( |
505 | 0 | foreign.qpdf()->getFilename() + " object " + og.unparse(' '), |
506 | 0 | foreign.offset(), |
507 | 0 | "unexpected reference to /Pages object while copying foreign object; replacing with " |
508 | 0 | "null")); |
509 | 0 | return QPDFObjectHandle::newNull(); |
510 | 0 | } |
511 | 0 | return object_map[foreign]; |
512 | 0 | } |
513 | | |
514 | | void |
515 | | Objects::Foreign::Copier::reserve_objects(QPDFObjectHandle const& foreign, bool top) |
516 | 0 | { |
517 | 0 | auto foreign_tc = foreign.type_code(); |
518 | 0 | util::assertion( |
519 | 0 | foreign_tc != ::ot_reserved, "QPDF: attempting to copy a foreign reserved object"); |
520 | |
|
521 | 0 | if (foreign.isPagesObject()) { |
522 | 0 | return; |
523 | 0 | } |
524 | | |
525 | 0 | if (foreign.indirect()) { |
526 | 0 | QPDFObjGen foreign_og(foreign.getObjGen()); |
527 | 0 | if (!visiting.add(foreign_og)) { |
528 | 0 | return; |
529 | 0 | } |
530 | 0 | if (object_map.contains(foreign_og)) { |
531 | 0 | if (!(top && foreign.isPageObject() && object_map[foreign_og].null())) { |
532 | 0 | visiting.erase(foreign); |
533 | 0 | return; |
534 | 0 | } |
535 | 0 | } else { |
536 | 0 | object_map[foreign_og] = foreign.isStream() ? qpdf.newStream() : qpdf.newIndirectNull(); |
537 | 0 | if (!top && foreign.isPageObject()) { |
538 | 0 | visiting.erase(foreign_og); |
539 | 0 | return; |
540 | 0 | } |
541 | 0 | } |
542 | 0 | to_copy.emplace_back(foreign); |
543 | 0 | } |
544 | | |
545 | 0 | if (foreign_tc == ::ot_array) { |
546 | 0 | for (auto const& item: Array(foreign)) { |
547 | 0 | reserve_objects(item); |
548 | 0 | } |
549 | 0 | } else if (foreign_tc == ::ot_dictionary) { |
550 | 0 | for (auto const& item: Dictionary(foreign)) { |
551 | 0 | if (!item.second.null()) { |
552 | 0 | reserve_objects(item.second); |
553 | 0 | } |
554 | 0 | } |
555 | 0 | } else if (foreign_tc == ::ot_stream) { |
556 | 0 | reserve_objects(foreign.getDict()); |
557 | 0 | } |
558 | |
|
559 | 0 | visiting.erase(foreign); |
560 | 0 | } |
561 | | |
562 | | QPDFObjectHandle |
563 | | Objects::Foreign::Copier::replace_indirect_object(QPDFObjectHandle const& foreign, bool top) |
564 | 0 | { |
565 | 0 | auto foreign_tc = foreign.type_code(); |
566 | |
|
567 | 0 | if (!top && foreign.indirect()) { |
568 | 0 | auto mapping = object_map.find(foreign.id_gen()); |
569 | 0 | if (mapping == object_map.end()) { |
570 | | // This case would occur if this is a reference to a Pages object that we didn't |
571 | | // traverse into. |
572 | 0 | return QPDFObjectHandle::newNull(); |
573 | 0 | } |
574 | 0 | return mapping->second; |
575 | 0 | } |
576 | | |
577 | 0 | if (foreign_tc == ::ot_array) { |
578 | 0 | Array array = foreign; |
579 | 0 | std::vector<QPDFObjectHandle> result; |
580 | 0 | result.reserve(array.size()); |
581 | 0 | for (auto const& item: array) { |
582 | 0 | result.emplace_back(replace_indirect_object(item)); |
583 | 0 | } |
584 | 0 | return Array(std::move(result)); |
585 | 0 | } |
586 | | |
587 | 0 | if (foreign_tc == ::ot_dictionary) { |
588 | 0 | auto result = Dictionary::empty(); |
589 | 0 | for (auto const& [key, value]: Dictionary(foreign)) { |
590 | 0 | if (!value.null()) { |
591 | 0 | result.replace(key, replace_indirect_object(value)); |
592 | 0 | } |
593 | 0 | } |
594 | 0 | return result; |
595 | 0 | } |
596 | | |
597 | 0 | if (foreign_tc == ::ot_stream) { |
598 | 0 | Stream stream = foreign; |
599 | 0 | Stream result = object_map[foreign]; |
600 | 0 | auto dict = result.getDict(); |
601 | 0 | for (auto const& [key, value]: stream.getDict()) { |
602 | 0 | if (!value.null()) { |
603 | 0 | dict.replace(key, replace_indirect_object(value)); |
604 | 0 | } |
605 | 0 | } |
606 | 0 | stream.copy_data_to(result); |
607 | 0 | return result; |
608 | 0 | } |
609 | | |
610 | 0 | foreign.assertScalar(); |
611 | 0 | auto result = foreign; |
612 | 0 | result.makeDirect(); |
613 | 0 | return result; |
614 | 0 | } |
615 | | |
616 | | unsigned long long |
617 | | QPDF::getUniqueId() const |
618 | 0 | { |
619 | 0 | return m->unique_id; |
620 | 0 | } |
621 | | |
622 | | std::string |
623 | | QPDF::getFilename() const |
624 | 168k | { |
625 | 168k | return m->file->getName(); |
626 | 168k | } |
627 | | |
628 | | PDFVersion |
629 | | QPDF::getVersionAsPDFVersion() |
630 | 0 | { |
631 | 0 | int major = 1; |
632 | 0 | int minor = 3; |
633 | 0 | int extension_level = getExtensionLevel(); |
634 | |
|
635 | 0 | std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)"); |
636 | 0 | std::smatch match; |
637 | 0 | if (std::regex_search(m->pdf_version, match, v)) { |
638 | 0 | major = QUtil::string_to_int(match[1].str().c_str()); |
639 | 0 | minor = QUtil::string_to_int(match[2].str().c_str()); |
640 | 0 | } |
641 | |
|
642 | 0 | return {major, minor, extension_level}; |
643 | 0 | } |
644 | | |
645 | | std::string |
646 | | QPDF::getPDFVersion() const |
647 | 0 | { |
648 | 0 | return m->pdf_version; |
649 | 0 | } |
650 | | |
651 | | int |
652 | | QPDF::getExtensionLevel() |
653 | 0 | { |
654 | 0 | if (Integer ExtensionLevel = getRoot()["/Extensions"]["/ADBE"]["/ExtensionLevel"]) { |
655 | 0 | return ExtensionLevel.value<int>(); |
656 | 0 | } |
657 | 0 | return 0; |
658 | 0 | } |
659 | | |
660 | | QPDFObjectHandle |
661 | | QPDF::getTrailer() |
662 | 0 | { |
663 | 0 | return m->trailer; |
664 | 0 | } |
665 | | |
666 | | QPDFObjectHandle |
667 | | QPDF::getRoot() |
668 | 0 | { |
669 | 0 | Dictionary Root = m->trailer["/Root"]; |
670 | 0 | if (!Root) { |
671 | 0 | throw m->c.damagedPDF("", -1, "unable to find /Root dictionary"); |
672 | 0 | } |
673 | 0 | if (!m->objects.root_checked()) { |
674 | 0 | m->objects.root_checked(true); |
675 | 0 | if (Name(Root["/Type"]) != "/Catalog") { |
676 | 0 | warn(m->c.damagedPDF( |
677 | 0 | "", -1, "Catalog: setting missing or invalid /Type entry to /Catalog")); |
678 | 0 | if (!global::Options::inspection_mode()) { |
679 | 0 | Root.replace("/Type", Name("/Catalog")); |
680 | 0 | } |
681 | 0 | } |
682 | 0 | } |
683 | 0 | return Root.oh(); |
684 | 0 | } |
685 | | |
686 | | std::map<QPDFObjGen, QPDFXRefEntry> |
687 | | QPDF::getXRefTable() |
688 | 0 | { |
689 | 0 | return m->objects.xref_table(); |
690 | 0 | } |
691 | | |
692 | | std::map<QPDFObjGen, QPDFXRefEntry> const& |
693 | | Objects::xref_table() |
694 | 0 | { |
695 | 0 | util::assertion(m->parsed, "QPDF::getXRefTable called before parsing"); |
696 | 0 | return m->xref_table; |
697 | 0 | } |
698 | | |
699 | | bool |
700 | | QPDF::pipeStreamData( |
701 | | std::shared_ptr<EncryptionParameters> encp, |
702 | | std::shared_ptr<InputSource> file, |
703 | | QPDF& qpdf_for_warning, |
704 | | QPDFObjGen og, |
705 | | qpdf_offset_t offset, |
706 | | size_t length, |
707 | | QPDFObjectHandle stream_dict, |
708 | | bool is_root_metadata, |
709 | | Pipeline* pipeline, |
710 | | bool suppress_warnings, |
711 | | bool will_retry) |
712 | 0 | { |
713 | 0 | std::unique_ptr<Pipeline> to_delete; |
714 | 0 | if (encp->encrypted) { |
715 | 0 | decryptStream( |
716 | 0 | encp, file, qpdf_for_warning, pipeline, og, stream_dict, is_root_metadata, to_delete); |
717 | 0 | } |
718 | |
|
719 | 0 | bool attempted_finish = false; |
720 | 0 | try { |
721 | 0 | auto buf = file->read(length, offset); |
722 | 0 | if (buf.size() != length) { |
723 | 0 | throw qpdf_for_warning.m->c.damagedPDF( |
724 | 0 | *file, |
725 | 0 | "", |
726 | 0 | offset + QIntC::to_offset(buf.size()), |
727 | 0 | "unexpected EOF reading stream data"); |
728 | 0 | } |
729 | 0 | pipeline->write(buf.data(), length); |
730 | 0 | attempted_finish = true; |
731 | 0 | pipeline->finish(); |
732 | 0 | return true; |
733 | 0 | } catch (QPDFExc& e) { |
734 | 0 | if (!suppress_warnings) { |
735 | 0 | qpdf_for_warning.warn(e); |
736 | 0 | } |
737 | 0 | } catch (std::exception& e) { |
738 | 0 | if (!suppress_warnings) { |
739 | 0 | QTC::TC("qpdf", "QPDF decoding error warning"); |
740 | 0 | qpdf_for_warning.warn( |
741 | | // line-break |
742 | 0 | qpdf_for_warning.m->c.damagedPDF( |
743 | 0 | *file, |
744 | 0 | "", |
745 | 0 | file->getLastOffset(), |
746 | 0 | ("error decoding stream data for object " + og.unparse(' ') + ": " + |
747 | 0 | e.what()))); |
748 | 0 | if (will_retry) { |
749 | 0 | qpdf_for_warning.warn( |
750 | | // line-break |
751 | 0 | qpdf_for_warning.m->c.damagedPDF( |
752 | 0 | *file, |
753 | 0 | "", |
754 | 0 | file->getLastOffset(), |
755 | 0 | "stream will be re-processed without filtering to avoid data loss")); |
756 | 0 | } |
757 | 0 | } |
758 | 0 | } |
759 | 0 | if (!attempted_finish) { |
760 | 0 | try { |
761 | 0 | pipeline->finish(); |
762 | 0 | } catch (std::exception&) { |
763 | | // ignore |
764 | 0 | } |
765 | 0 | } |
766 | 0 | return false; |
767 | 0 | } |
768 | | |
769 | | bool |
770 | | QPDF::pipeStreamData( |
771 | | QPDFObjGen og, |
772 | | qpdf_offset_t offset, |
773 | | size_t length, |
774 | | QPDFObjectHandle stream_dict, |
775 | | bool is_root_metadata, |
776 | | Pipeline* pipeline, |
777 | | bool suppress_warnings, |
778 | | bool will_retry) |
779 | 0 | { |
780 | 0 | return pipeStreamData( |
781 | 0 | m->encp, |
782 | 0 | m->file, |
783 | 0 | *this, |
784 | 0 | og, |
785 | 0 | offset, |
786 | 0 | length, |
787 | 0 | stream_dict, |
788 | 0 | is_root_metadata, |
789 | 0 | pipeline, |
790 | 0 | suppress_warnings, |
791 | 0 | will_retry); |
792 | 0 | } |
793 | | |
794 | | // Throw a generic exception when we lack context for something more specific. New code should not |
795 | | // use this. |
796 | | void |
797 | | Common::stopOnError(std::string const& message) |
798 | 26 | { |
799 | 26 | throw damagedPDF("", message); |
800 | 26 | } |
801 | | |
802 | | // Return an exception of type qpdf_e_damaged_pdf. |
803 | | QPDFExc |
804 | | Common::damagedPDF( |
805 | | InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message) |
806 | 0 | { |
807 | 0 | return {qpdf_e_damaged_pdf, input.getName(), object, offset, message, true}; |
808 | 0 | } |
809 | | |
810 | | // Return an exception of type qpdf_e_damaged_pdf. The object is taken from |
811 | | // m->last_object_description. |
812 | | QPDFExc |
813 | | Common::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message) const |
814 | 0 | { |
815 | 0 | return damagedPDF(input, m->last_object_description, offset, message); |
816 | 0 | } |
817 | | |
818 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file. |
819 | | QPDFExc |
820 | | Common::damagedPDF( |
821 | | std::string const& object, qpdf_offset_t offset, std::string const& message) const |
822 | 26 | { |
823 | 26 | return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message, true}; |
824 | 26 | } |
825 | | |
826 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the |
827 | | // offset from .m->file->getLastOffset(). |
828 | | QPDFExc |
829 | | Common::damagedPDF(std::string const& object, std::string const& message) const |
830 | 26 | { |
831 | 26 | return damagedPDF(object, m->file->getLastOffset(), message); |
832 | 26 | } |
833 | | |
834 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object |
835 | | // from .m->last_object_description. |
836 | | QPDFExc |
837 | | Common::damagedPDF(qpdf_offset_t offset, std::string const& message) const |
838 | 0 | { |
839 | 0 | return damagedPDF(m->last_object_description, offset, message); |
840 | 0 | } |
841 | | |
842 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object |
843 | | // from m->last_object_description and the offset from m->file->getLastOffset(). |
844 | | QPDFExc |
845 | | Common::damagedPDF(std::string const& message) const |
846 | 0 | { |
847 | 0 | return damagedPDF(m->last_object_description, m->file->getLastOffset(), message); |
848 | 0 | } |
849 | | |
850 | | bool |
851 | | QPDF::everCalledGetAllPages() const |
852 | 0 | { |
853 | 0 | return m->pages.ever_called_get_all_pages(); |
854 | 0 | } |
855 | | |
856 | | bool |
857 | | QPDF::everPushedInheritedAttributesToPages() const |
858 | 0 | { |
859 | 0 | return m->pages.ever_pushed_inherited_attributes_to_pages(); |
860 | 0 | } |
861 | | |
862 | | void |
863 | | QPDF::removeSecurityRestrictions() |
864 | 0 | { |
865 | 0 | auto root = getRoot(); |
866 | 0 | root.removeKey("/Perms"); |
867 | 0 | auto acroform = root.getKey("/AcroForm"); |
868 | 0 | if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) { |
869 | 0 | acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0)); |
870 | 0 | } |
871 | 0 | } |