/src/qpdf/libqpdf/QPDF.cc
Line | Count | Source |
1 | | #include <qpdf/qpdf-config.h> // include first for large file support |
2 | | |
3 | | #include <qpdf/QPDF_private.hh> |
4 | | |
5 | | #include <array> |
6 | | #include <atomic> |
7 | | #include <cstring> |
8 | | #include <limits> |
9 | | #include <map> |
10 | | #include <regex> |
11 | | #include <sstream> |
12 | | #include <vector> |
13 | | |
14 | | #include <qpdf/AcroForm.hh> |
15 | | #include <qpdf/FileInputSource.hh> |
16 | | #include <qpdf/InputSource_private.hh> |
17 | | #include <qpdf/OffsetInputSource.hh> |
18 | | #include <qpdf/Pipeline.hh> |
19 | | #include <qpdf/QPDFExc.hh> |
20 | | #include <qpdf/QPDFLogger.hh> |
21 | | #include <qpdf/QPDFObjectHandle_private.hh> |
22 | | #include <qpdf/QPDFObject_private.hh> |
23 | | #include <qpdf/QPDFParser.hh> |
24 | | #include <qpdf/QTC.hh> |
25 | | #include <qpdf/QUtil.hh> |
26 | | #include <qpdf/Util.hh> |
27 | | |
28 | | using namespace qpdf; |
29 | | using namespace std::literals; |
30 | | |
31 | | using Common = impl::Doc::Common; |
32 | | using Objects = impl::Doc::Objects; |
33 | | using Foreign = Objects::Foreign; |
34 | | using Streams = Objects::Streams; |
35 | | |
36 | | // This must be a fixed value. This API returns a const reference to it, and the C API relies on its |
37 | | // being static as well. |
38 | | std::string const QPDF::qpdf_version(QPDF_VERSION); |
39 | | |
40 | | static char const* EMPTY_PDF = ( |
41 | | // force line break |
42 | | "%PDF-1.3\n" |
43 | | "1 0 obj\n" |
44 | | "<< /Type /Catalog /Pages 2 0 R >>\n" |
45 | | "endobj\n" |
46 | | "2 0 obj\n" |
47 | | "<< /Type /Pages /Kids [] /Count 0 >>\n" |
48 | | "endobj\n" |
49 | | "xref\n" |
50 | | "0 3\n" |
51 | | "0000000000 65535 f \n" |
52 | | "0000000009 00000 n \n" |
53 | | "0000000058 00000 n \n" |
54 | | "trailer << /Size 3 /Root 1 0 R >>\n" |
55 | | "startxref\n" |
56 | | "110\n" |
57 | | "%%EOF\n"); |
58 | | |
59 | | namespace |
60 | | { |
61 | | class InvalidInputSource: public InputSource |
62 | | { |
63 | | public: |
64 | | ~InvalidInputSource() override = default; |
65 | | qpdf_offset_t |
66 | | findAndSkipNextEOL() override |
67 | 0 | { |
68 | 0 | throwException(); |
69 | 0 | return 0; |
70 | 0 | } |
71 | | std::string const& |
72 | | getName() const override |
73 | 0 | { |
74 | 0 | static std::string name("closed input source"); |
75 | 0 | return name; |
76 | 0 | } |
77 | | qpdf_offset_t |
78 | | tell() override |
79 | 0 | { |
80 | 0 | throwException(); |
81 | 0 | return 0; |
82 | 0 | } |
83 | | void |
84 | | seek(qpdf_offset_t offset, int whence) override |
85 | 0 | { |
86 | 0 | throwException(); |
87 | 0 | } |
88 | | void |
89 | | rewind() override |
90 | 0 | { |
91 | 0 | throwException(); |
92 | 0 | } |
93 | | size_t |
94 | | read(char* buffer, size_t length) override |
95 | 0 | { |
96 | 0 | throwException(); |
97 | 0 | return 0; |
98 | 0 | } |
99 | | void |
100 | | unreadCh(char ch) override |
101 | 0 | { |
102 | 0 | throwException(); |
103 | 0 | } |
104 | | |
105 | | private: |
106 | | void |
107 | | throwException() |
108 | 0 | { |
109 | 0 | throw std::logic_error( |
110 | 0 | "QPDF operation attempted on a QPDF object with no input " |
111 | 0 | "source. QPDF operations are invalid before processFile (or " |
112 | 0 | "another process method) or after closeInputSource"); |
113 | 0 | } |
114 | | }; |
115 | | } // namespace |
116 | | |
117 | | QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) : |
118 | 130k | qpdf(qpdf), |
119 | 130k | og(og) |
120 | 130k | { |
121 | 130k | } |
122 | | |
123 | | std::string const& |
124 | | QPDF::QPDFVersion() |
125 | 0 | { |
126 | | // The C API relies on this being a static value. |
127 | 0 | return QPDF::qpdf_version; |
128 | 0 | } |
129 | | |
130 | | QPDF::Members::Members(QPDF& qpdf) : |
131 | 17.9k | Doc(qpdf, this), |
132 | 17.9k | c(qpdf, this), |
133 | 17.9k | lin(*this), |
134 | 17.9k | objects(*this), |
135 | 17.9k | pages(*this), |
136 | 17.9k | file(std::make_shared<InvalidInputSource>()), |
137 | 17.9k | encp(std::make_shared<EncryptionParameters>()) |
138 | 17.9k | { |
139 | 17.9k | } |
140 | | |
141 | | QPDF::QPDF() : |
142 | 17.9k | m(std::make_unique<Members>(*this)) |
143 | 17.9k | { |
144 | 17.9k | m->tokenizer.allowEOF(); |
145 | | // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout |
146 | | // the lifetime of this running application. |
147 | 17.9k | static std::atomic<unsigned long long> unique_id{0}; |
148 | 17.9k | m->unique_id = unique_id.fetch_add(1ULL); |
149 | 17.9k | } |
150 | | |
151 | | /// @brief Initializes the AcroForm functionality for the document. |
152 | | /// @par |
153 | | /// This method creates a unique instance of QPDFAcroFormDocumentHelper and associates it |
154 | | /// with the document. It also updates the `acroform_` pointer to reference the AcroForm |
155 | | /// instance managed by the helper. |
156 | | /// |
157 | | /// The method has been separated out from `acroform` to avoid it being inlined |
158 | | /// unnecessarily. |
159 | | void |
160 | | QPDF::Doc::init_acroform() |
161 | 0 | { |
162 | 0 | acroform_dh_ = std::make_unique<QPDFAcroFormDocumentHelper>(qpdf); |
163 | 0 | acroform_ = acroform_dh_->m.get(); |
164 | 0 | } |
165 | | |
166 | | // Provide access to disconnect(). Disconnect will in due course be merged into the current ObjCache |
167 | | // (future Objects::Entry) to centralize all QPDF access to QPDFObject. |
168 | | class Disconnect: BaseHandle |
169 | | { |
170 | | public: |
171 | | Disconnect(std::shared_ptr<QPDFObject> const& obj) : |
172 | 533k | BaseHandle(obj) |
173 | 533k | { |
174 | 533k | } |
175 | | void |
176 | | disconnect() |
177 | 533k | { |
178 | 533k | BaseHandle::disconnect(false); |
179 | 533k | if (raw_type_code() != ::ot_null) { |
180 | 164k | obj->value = QPDF_Destroyed(); |
181 | 164k | } |
182 | 533k | } |
183 | | }; |
184 | | |
185 | | QPDF::~QPDF() |
186 | 17.9k | { |
187 | | // If two objects are mutually referential (through each object having an array or dictionary |
188 | | // that contains an indirect reference to the other), the circular references in the |
189 | | // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects |
190 | | // in the object cache, which is those objects that we read from the file, and break all |
191 | | // resolved indirect references by replacing them with an internal object type representing that |
192 | | // they have been destroyed. Note that we can't break references like this at any time when the |
193 | | // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that |
194 | | // are reachable from this object to release their association with this QPDF. Direct objects |
195 | | // are not destroyed since they can be moved to other QPDF objects safely. |
196 | | |
197 | | // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear |
198 | | // the xref table anyway just to prevent any possibility of resolve() succeeding. |
199 | 17.9k | m->xref_table.clear(); |
200 | 533k | for (auto const& iter: m->obj_cache) { |
201 | 533k | Disconnect(iter.second.object).disconnect(); |
202 | 533k | } |
203 | 17.9k | } |
204 | | |
205 | | std::shared_ptr<QPDF> |
206 | | QPDF::create() |
207 | 0 | { |
208 | 0 | return std::make_shared<QPDF>(); |
209 | 0 | } |
210 | | |
211 | | void |
212 | | QPDF::processFile(char const* filename, char const* password) |
213 | 0 | { |
214 | 0 | auto* fi = new FileInputSource(filename); |
215 | 0 | processInputSource(std::shared_ptr<InputSource>(fi), password); |
216 | 0 | } |
217 | | |
218 | | void |
219 | | QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password) |
220 | 0 | { |
221 | 0 | auto* fi = new FileInputSource(description, filep, close_file); |
222 | 0 | processInputSource(std::shared_ptr<InputSource>(fi), password); |
223 | 0 | } |
224 | | |
225 | | void |
226 | | QPDF::processMemoryFile( |
227 | | char const* description, char const* buf, size_t length, char const* password) |
228 | 0 | { |
229 | 0 | auto is = std::make_shared<is::OffsetBuffer>(description, std::string_view{buf, length}); |
230 | 0 | processInputSource(is, password); |
231 | 0 | } |
232 | | |
233 | | void |
234 | | QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password) |
235 | 17.9k | { |
236 | 17.9k | m->file = source; |
237 | 17.9k | m->objects.parse(password); |
238 | 17.9k | } |
239 | | |
240 | | void |
241 | | QPDF::closeInputSource() |
242 | 0 | { |
243 | 0 | m->file = std::shared_ptr<InputSource>(new InvalidInputSource()); |
244 | 0 | } |
245 | | |
246 | | void |
247 | | QPDF::setPasswordIsHexKey(bool val) |
248 | 0 | { |
249 | 0 | m->cf.password_is_hex_key(val); |
250 | 0 | } |
251 | | |
252 | | void |
253 | | QPDF::emptyPDF() |
254 | 0 | { |
255 | 0 | (void)m->cf.max_warnings(0); |
256 | 0 | processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF)); |
257 | 0 | } |
258 | | |
259 | | void |
260 | | QPDF::registerStreamFilter( |
261 | | std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory) |
262 | 0 | { |
263 | 0 | qpdf::Stream::registerStreamFilter(filter_name, factory); |
264 | 0 | } |
265 | | |
266 | | void |
267 | | QPDF::setIgnoreXRefStreams(bool val) |
268 | 0 | { |
269 | 0 | (void)m->cf.ignore_xref_streams(val); |
270 | 0 | } |
271 | | |
272 | | std::shared_ptr<QPDFLogger> |
273 | | QPDF::getLogger() |
274 | 0 | { |
275 | 0 | return m->cf.log(); |
276 | 0 | } |
277 | | |
278 | | void |
279 | | QPDF::setLogger(std::shared_ptr<QPDFLogger> l) |
280 | 0 | { |
281 | 0 | m->cf.log(l); |
282 | 0 | } |
283 | | |
284 | | void |
285 | | QPDF::setOutputStreams(std::ostream* out, std::ostream* err) |
286 | 0 | { |
287 | 0 | setLogger(QPDFLogger::create()); |
288 | 0 | m->cf.log()->setOutputStreams(out, err); |
289 | 0 | } |
290 | | |
291 | | void |
292 | | QPDF::setSuppressWarnings(bool val) |
293 | 0 | { |
294 | 0 | (void)m->cf.suppress_warnings(val); |
295 | 0 | } |
296 | | |
297 | | void |
298 | | QPDF::setMaxWarnings(size_t val) |
299 | 0 | { |
300 | 0 | (void)m->cf.max_warnings(val); |
301 | 0 | } |
302 | | |
303 | | void |
304 | | QPDF::setAttemptRecovery(bool val) |
305 | 0 | { |
306 | 0 | (void)m->cf.surpress_recovery(!val); |
307 | 0 | } |
308 | | |
309 | | void |
310 | | QPDF::setImmediateCopyFrom(bool val) |
311 | 0 | { |
312 | 0 | (void)m->cf.immediate_copy_from(val); |
313 | 0 | } |
314 | | |
315 | | std::vector<QPDFExc> |
316 | | QPDF::getWarnings() |
317 | 0 | { |
318 | 0 | std::vector<QPDFExc> result = std::move(m->warnings); |
319 | 0 | m->warnings.clear(); |
320 | 0 | return result; |
321 | 0 | } |
322 | | |
323 | | bool |
324 | | QPDF::anyWarnings() const |
325 | 0 | { |
326 | 0 | return !m->warnings.empty(); |
327 | 0 | } |
328 | | |
329 | | size_t |
330 | | QPDF::numWarnings() const |
331 | 0 | { |
332 | 0 | return m->warnings.size(); |
333 | 0 | } |
334 | | |
335 | | void |
336 | | QPDF::warn(QPDFExc const& e) |
337 | 397k | { |
338 | 397k | m->c.warn(e); |
339 | 397k | } |
340 | | |
341 | | void |
342 | | Common::warn(QPDFExc const& e) |
343 | 661k | { |
344 | 661k | if (cf.max_warnings() > 0 && m->warnings.size() >= cf.max_warnings()) { |
345 | 21.1k | stopOnError("Too many warnings - file is too badly damaged"); |
346 | 21.1k | } |
347 | 661k | m->warnings.emplace_back(e); |
348 | 661k | if (!cf.suppress_warnings()) { |
349 | 639k | *cf.log()->getWarn() << "WARNING: " << m->warnings.back().what() << "\n"; |
350 | 639k | } |
351 | 661k | } |
352 | | |
353 | | void |
354 | | QPDF::warn( |
355 | | qpdf_error_code_e error_code, |
356 | | std::string const& object, |
357 | | qpdf_offset_t offset, |
358 | | std::string const& message) |
359 | 3.18k | { |
360 | 3.18k | m->c.warn(QPDFExc(error_code, getFilename(), object, offset, message)); |
361 | 3.18k | } |
362 | | |
363 | | void |
364 | | Common::warn( |
365 | | qpdf_error_code_e error_code, |
366 | | std::string const& object, |
367 | | qpdf_offset_t offset, |
368 | | std::string const& message) |
369 | 1.31k | { |
370 | 1.31k | warn(QPDFExc(error_code, qpdf.getFilename(), object, offset, message)); |
371 | 1.31k | } |
372 | | |
373 | | QPDFObjectHandle |
374 | | QPDF::newReserved() |
375 | 0 | { |
376 | 0 | return m->objects.makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Reserved>()); |
377 | 0 | } |
378 | | |
379 | | QPDFObjectHandle |
380 | | QPDF::newIndirectNull() |
381 | 0 | { |
382 | 0 | return m->objects.makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Null>()); |
383 | 0 | } |
384 | | |
385 | | QPDFObjectHandle |
386 | | QPDF::newStream() |
387 | 0 | { |
388 | 0 | return makeIndirectObject( |
389 | 0 | qpdf::Stream(*this, m->objects.nextObjGen(), Dictionary::empty(), 0, 0)); |
390 | 0 | } |
391 | | |
392 | | QPDFObjectHandle |
393 | | QPDF::newStream(std::shared_ptr<Buffer> data) |
394 | 0 | { |
395 | 0 | auto result = newStream(); |
396 | 0 | result.replaceStreamData(data, {}, {}); |
397 | 0 | return result; |
398 | 0 | } |
399 | | |
400 | | QPDFObjectHandle |
401 | | QPDF::newStream(std::string const& data) |
402 | 0 | { |
403 | 0 | auto result = newStream(); |
404 | 0 | result.replaceStreamData(data, {}, {}); |
405 | 0 | return result; |
406 | 0 | } |
407 | | |
408 | | QPDFObjectHandle |
409 | | QPDF::getObject(int objid, int generation) |
410 | 27.8k | { |
411 | 27.8k | return getObject({objid, generation}); |
412 | 27.8k | } |
413 | | |
414 | | QPDFObjectHandle |
415 | | QPDF::getObjectByObjGen(QPDFObjGen og) |
416 | 0 | { |
417 | 0 | return getObject(og); |
418 | 0 | } |
419 | | |
420 | | QPDFObjectHandle |
421 | | QPDF::getObjectByID(int objid, int generation) |
422 | 0 | { |
423 | 0 | return getObject(QPDFObjGen(objid, generation)); |
424 | 0 | } |
425 | | |
426 | | QPDFObjectHandle |
427 | | QPDF::copyForeignObject(QPDFObjectHandle foreign) |
428 | 0 | { |
429 | 0 | return m->objects.foreign().copied(foreign); |
430 | 0 | } |
431 | | |
432 | | Objects ::Foreign::Copier& |
433 | | Objects::Foreign::copier(QPDFObjectHandle const& foreign) |
434 | 0 | { |
435 | 0 | if (!foreign.isIndirect()) { |
436 | 0 | throw std::logic_error("QPDF::copyForeign called with direct object handle"); |
437 | 0 | } |
438 | 0 | QPDF& other = *foreign.qpdf(); |
439 | 0 | if (&other == &qpdf) { |
440 | 0 | throw std::logic_error("QPDF::copyForeign called with object from this QPDF"); |
441 | 0 | } |
442 | 0 | return copiers.insert({other.getUniqueId(), {qpdf}}).first->second; |
443 | 0 | } |
444 | | |
445 | | QPDFObjectHandle |
446 | | Objects::Foreign::Copier::copied(QPDFObjectHandle const& foreign) |
447 | 0 | { |
448 | | // Here's an explanation of what's going on here. |
449 | | // |
450 | | // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and |
451 | | // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a |
452 | | // foreign QPDF into the local QPDF, we have to replace all indirect object references with |
453 | | // references to the corresponding object in the local file. |
454 | | // |
455 | | // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign |
456 | | // QPDF that we are copying from. The mapping is stored in an Foreign::Copier, which contains a |
457 | | // mapping from the foreign ObjGen to the local QPDFObjectHandle. |
458 | | // |
459 | | // To copy, we do a deep traversal of the foreign object with loop detection to discover all |
460 | | // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an |
461 | | // indirect object, we check to see if we have already created a local copy of it. If not, we |
462 | | // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the |
463 | | // mapping from the foreign object ID to the new object. While we |
464 | | // do this, we keep a list of objects to copy. |
465 | | // |
466 | | // Once we are done with the traversal, we copy all the objects that we need to copy. However, |
467 | | // the copies will contain indirect object IDs that refer to objects in the foreign file. We |
468 | | // need to replace them with references to objects in the local file. This is what |
469 | | // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with |
470 | | // all the indirect references replaced with new ones in the local context, we can replace the |
471 | | // local reserved object with the copy. This mechanism allows us to copy objects with circular |
472 | | // references in any order. |
473 | | |
474 | | // For streams, rather than copying the objects, we set up the stream data to pull from the |
475 | | // original stream by using a stream data provider. This is done in a manner that doesn't |
476 | | // require the original QPDF object but may require the original source of the stream data with |
477 | | // special handling for immediate_copy_from. This logic is also in |
478 | | // replaceForeignIndirectObjects. |
479 | | |
480 | | // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented |
481 | | // use case to copy pages this way if the intention is to not update the pages tree. |
482 | |
|
483 | 0 | util::assertion( |
484 | 0 | visiting.empty(), "obj_copier.visiting is not empty at the beginning of copyForeignObject"); |
485 | | |
486 | | // Make sure we have an object in this file for every referenced object in the old file. |
487 | | // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we |
488 | | // have to copy, the local object will be a reservation, unless it is a stream, in which case |
489 | | // the local object will already be a stream. |
490 | 0 | reserve_objects(foreign, true); |
491 | |
|
492 | 0 | util::assertion(visiting.empty(), "obj_copier.visiting is not empty after reserving objects"); |
493 | | |
494 | | // Copy any new objects and replace the reservations. |
495 | 0 | for (auto& oh: to_copy) { |
496 | 0 | auto copy = replace_indirect_object(oh, true); |
497 | 0 | if (!oh.isStream()) { |
498 | 0 | qpdf.replaceReserved(object_map[oh], copy); |
499 | 0 | } |
500 | 0 | } |
501 | 0 | to_copy.clear(); |
502 | |
|
503 | 0 | auto og = foreign.getObjGen(); |
504 | 0 | if (!object_map.contains(og)) { |
505 | 0 | warn(damagedPDF( |
506 | 0 | foreign.qpdf()->getFilename() + " object " + og.unparse(' '), |
507 | 0 | foreign.offset(), |
508 | 0 | "unexpected reference to /Pages object while copying foreign object; replacing with " |
509 | 0 | "null")); |
510 | 0 | return QPDFObjectHandle::newNull(); |
511 | 0 | } |
512 | 0 | return object_map[foreign]; |
513 | 0 | } |
514 | | |
515 | | void |
516 | | Objects::Foreign::Copier::reserve_objects(QPDFObjectHandle const& foreign, bool top) |
517 | 0 | { |
518 | 0 | auto foreign_tc = foreign.type_code(); |
519 | 0 | util::assertion( |
520 | 0 | foreign_tc != ::ot_reserved, "QPDF: attempting to copy a foreign reserved object"); |
521 | |
|
522 | 0 | if (foreign.isPagesObject()) { |
523 | 0 | return; |
524 | 0 | } |
525 | | |
526 | 0 | if (foreign.indirect()) { |
527 | 0 | QPDFObjGen foreign_og(foreign.getObjGen()); |
528 | 0 | if (!visiting.add(foreign_og)) { |
529 | 0 | return; |
530 | 0 | } |
531 | 0 | if (object_map.contains(foreign_og)) { |
532 | 0 | if (!(top && foreign.isPageObject() && object_map[foreign_og].null())) { |
533 | 0 | visiting.erase(foreign); |
534 | 0 | return; |
535 | 0 | } |
536 | 0 | } else { |
537 | 0 | object_map[foreign_og] = foreign.isStream() ? qpdf.newStream() : qpdf.newIndirectNull(); |
538 | 0 | if (!top && foreign.isPageObject()) { |
539 | 0 | visiting.erase(foreign_og); |
540 | 0 | return; |
541 | 0 | } |
542 | 0 | } |
543 | 0 | to_copy.emplace_back(foreign); |
544 | 0 | } |
545 | | |
546 | 0 | if (foreign_tc == ::ot_array) { |
547 | 0 | for (auto const& item: Array(foreign)) { |
548 | 0 | reserve_objects(item); |
549 | 0 | } |
550 | 0 | } else if (foreign_tc == ::ot_dictionary) { |
551 | 0 | for (auto const& item: Dictionary(foreign)) { |
552 | 0 | if (!item.second.null()) { |
553 | 0 | reserve_objects(item.second); |
554 | 0 | } |
555 | 0 | } |
556 | 0 | } else if (foreign_tc == ::ot_stream) { |
557 | 0 | reserve_objects(foreign.getDict()); |
558 | 0 | } |
559 | |
|
560 | 0 | visiting.erase(foreign); |
561 | 0 | } |
562 | | |
563 | | QPDFObjectHandle |
564 | | Objects::Foreign::Copier::replace_indirect_object(QPDFObjectHandle const& foreign, bool top) |
565 | 0 | { |
566 | 0 | auto foreign_tc = foreign.type_code(); |
567 | |
|
568 | 0 | if (!top && foreign.indirect()) { |
569 | 0 | auto mapping = object_map.find(foreign.id_gen()); |
570 | 0 | if (mapping == object_map.end()) { |
571 | | // This case would occur if this is a reference to a Pages object that we didn't |
572 | | // traverse into. |
573 | 0 | return QPDFObjectHandle::newNull(); |
574 | 0 | } |
575 | 0 | return mapping->second; |
576 | 0 | } |
577 | | |
578 | 0 | if (foreign_tc == ::ot_array) { |
579 | 0 | Array array = foreign; |
580 | 0 | std::vector<QPDFObjectHandle> result; |
581 | 0 | result.reserve(array.size()); |
582 | 0 | for (auto const& item: array) { |
583 | 0 | result.emplace_back(replace_indirect_object(item)); |
584 | 0 | } |
585 | 0 | return Array(std::move(result)); |
586 | 0 | } |
587 | | |
588 | 0 | if (foreign_tc == ::ot_dictionary) { |
589 | 0 | auto result = Dictionary::empty(); |
590 | 0 | for (auto const& [key, value]: Dictionary(foreign)) { |
591 | 0 | if (!value.null()) { |
592 | 0 | result.replace(key, replace_indirect_object(value)); |
593 | 0 | } |
594 | 0 | } |
595 | 0 | return result; |
596 | 0 | } |
597 | | |
598 | 0 | if (foreign_tc == ::ot_stream) { |
599 | 0 | Stream stream = foreign; |
600 | 0 | Stream result = object_map[foreign]; |
601 | 0 | auto dict = result.getDict(); |
602 | 0 | for (auto const& [key, value]: stream.getDict()) { |
603 | 0 | if (!value.null()) { |
604 | 0 | dict.replace(key, replace_indirect_object(value)); |
605 | 0 | } |
606 | 0 | } |
607 | 0 | stream.copy_data_to(result); |
608 | 0 | return result; |
609 | 0 | } |
610 | | |
611 | 0 | foreign.assertScalar(); |
612 | 0 | auto result = foreign; |
613 | 0 | result.makeDirect(); |
614 | 0 | return result; |
615 | 0 | } |
616 | | |
617 | | unsigned long long |
618 | | QPDF::getUniqueId() const |
619 | 0 | { |
620 | 0 | return m->unique_id; |
621 | 0 | } |
622 | | |
623 | | std::string |
624 | | QPDF::getFilename() const |
625 | 74.7k | { |
626 | 74.7k | return m->file->getName(); |
627 | 74.7k | } |
628 | | |
629 | | PDFVersion |
630 | | QPDF::getVersionAsPDFVersion() |
631 | 0 | { |
632 | 0 | int major = 1; |
633 | 0 | int minor = 3; |
634 | 0 | int extension_level = getExtensionLevel(); |
635 | |
|
636 | 0 | std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)"); |
637 | 0 | std::smatch match; |
638 | 0 | if (std::regex_search(m->pdf_version, match, v)) { |
639 | 0 | major = QUtil::string_to_int(match[1].str().c_str()); |
640 | 0 | minor = QUtil::string_to_int(match[2].str().c_str()); |
641 | 0 | } |
642 | |
|
643 | 0 | return {major, minor, extension_level}; |
644 | 0 | } |
645 | | |
646 | | std::string |
647 | | QPDF::getPDFVersion() const |
648 | 7.75k | { |
649 | 7.75k | return m->pdf_version; |
650 | 7.75k | } |
651 | | |
652 | | int |
653 | | QPDF::getExtensionLevel() |
654 | 7.75k | { |
655 | 7.75k | if (Integer ExtensionLevel = getRoot()["/Extensions"]["/ADBE"]["/ExtensionLevel"]) { |
656 | 37 | return ExtensionLevel.value<int>(); |
657 | 37 | } |
658 | 7.71k | return 0; |
659 | 7.75k | } |
660 | | |
661 | | QPDFObjectHandle |
662 | | QPDF::getTrailer() |
663 | 53.2k | { |
664 | 53.2k | return m->trailer; |
665 | 53.2k | } |
666 | | |
667 | | QPDFObjectHandle |
668 | | QPDF::getRoot() |
669 | 76.9k | { |
670 | 76.9k | Dictionary Root = m->trailer["/Root"]; |
671 | 76.9k | if (!Root) { |
672 | 3.29k | throw m->c.damagedPDF("", -1, "unable to find /Root dictionary"); |
673 | 3.29k | } |
674 | 73.6k | if (!m->objects.root_checked()) { |
675 | 8.95k | m->objects.root_checked(true); |
676 | 8.95k | if (Name(Root["/Type"]) != "/Catalog") { |
677 | 1.33k | warn(m->c.damagedPDF( |
678 | 1.33k | "", -1, "Catalog: setting missing or invalid /Type entry to /Catalog")); |
679 | 1.33k | if (!global::Options::inspection_mode()) { |
680 | 1.32k | Root.replace("/Type", Name("/Catalog")); |
681 | 1.32k | } |
682 | 1.33k | } |
683 | 8.95k | } |
684 | 73.6k | return Root.oh(); |
685 | 76.9k | } |
686 | | |
687 | | std::map<QPDFObjGen, QPDFXRefEntry> |
688 | | QPDF::getXRefTable() |
689 | 0 | { |
690 | 0 | return m->objects.xref_table(); |
691 | 0 | } |
692 | | |
693 | | std::map<QPDFObjGen, QPDFXRefEntry> const& |
694 | | Objects::xref_table() |
695 | 7.75k | { |
696 | 7.75k | util::assertion(m->parsed, "QPDF::getXRefTable called before parsing"); |
697 | 7.75k | return m->xref_table; |
698 | 7.75k | } |
699 | | |
700 | | bool |
701 | | QPDF::pipeStreamData( |
702 | | std::shared_ptr<EncryptionParameters> encp, |
703 | | std::shared_ptr<InputSource> file, |
704 | | QPDF& qpdf_for_warning, |
705 | | QPDFObjGen og, |
706 | | qpdf_offset_t offset, |
707 | | size_t length, |
708 | | QPDFObjectHandle stream_dict, |
709 | | bool is_root_metadata, |
710 | | Pipeline* pipeline, |
711 | | bool suppress_warnings, |
712 | | bool will_retry) |
713 | 76.4k | { |
714 | 76.4k | std::unique_ptr<Pipeline> to_delete; |
715 | 76.4k | if (encp->encrypted) { |
716 | 7.78k | decryptStream( |
717 | 7.78k | encp, file, qpdf_for_warning, pipeline, og, stream_dict, is_root_metadata, to_delete); |
718 | 7.78k | } |
719 | | |
720 | 76.4k | bool attempted_finish = false; |
721 | 76.4k | try { |
722 | 76.4k | auto buf = file->read(length, offset); |
723 | 76.4k | if (buf.size() != length) { |
724 | 0 | throw qpdf_for_warning.m->c.damagedPDF( |
725 | 0 | *file, |
726 | 0 | "", |
727 | 0 | offset + QIntC::to_offset(buf.size()), |
728 | 0 | "unexpected EOF reading stream data"); |
729 | 0 | } |
730 | 76.4k | pipeline->write(buf.data(), length); |
731 | 76.4k | attempted_finish = true; |
732 | 76.4k | pipeline->finish(); |
733 | 76.4k | return true; |
734 | 76.4k | } catch (QPDFExc& e) { |
735 | 0 | if (!suppress_warnings) { |
736 | 0 | qpdf_for_warning.warn(e); |
737 | 0 | } |
738 | 9.46k | } catch (std::exception& e) { |
739 | 9.46k | if (!suppress_warnings) { |
740 | 9.46k | QTC::TC("qpdf", "QPDF decoding error warning"); |
741 | 9.46k | qpdf_for_warning.warn( |
742 | | // line-break |
743 | 9.46k | qpdf_for_warning.m->c.damagedPDF( |
744 | 9.46k | *file, |
745 | 9.46k | "", |
746 | 9.46k | file->getLastOffset(), |
747 | 9.46k | ("error decoding stream data for object " + og.unparse(' ') + ": " + |
748 | 9.46k | e.what()))); |
749 | 9.46k | if (will_retry) { |
750 | 8.43k | qpdf_for_warning.warn( |
751 | | // line-break |
752 | 8.43k | qpdf_for_warning.m->c.damagedPDF( |
753 | 8.43k | *file, |
754 | 8.43k | "", |
755 | 8.43k | file->getLastOffset(), |
756 | 8.43k | "stream will be re-processed without filtering to avoid data loss")); |
757 | 8.43k | } |
758 | 9.46k | } |
759 | 9.46k | } |
760 | 9.41k | if (!attempted_finish) { |
761 | 3.82k | try { |
762 | 3.82k | pipeline->finish(); |
763 | 3.82k | } catch (std::exception&) { |
764 | | // ignore |
765 | 2.22k | } |
766 | 3.82k | } |
767 | 9.41k | return false; |
768 | 9.41k | } |
769 | | |
770 | | bool |
771 | | QPDF::pipeStreamData( |
772 | | QPDFObjGen og, |
773 | | qpdf_offset_t offset, |
774 | | size_t length, |
775 | | QPDFObjectHandle stream_dict, |
776 | | bool is_root_metadata, |
777 | | Pipeline* pipeline, |
778 | | bool suppress_warnings, |
779 | | bool will_retry) |
780 | 76.4k | { |
781 | 76.4k | return pipeStreamData( |
782 | 76.4k | m->encp, |
783 | 76.4k | m->file, |
784 | 76.4k | *this, |
785 | 76.4k | og, |
786 | 76.4k | offset, |
787 | 76.4k | length, |
788 | 76.4k | stream_dict, |
789 | 76.4k | is_root_metadata, |
790 | 76.4k | pipeline, |
791 | 76.4k | suppress_warnings, |
792 | 76.4k | will_retry); |
793 | 76.4k | } |
794 | | |
795 | | // Throw a generic exception when we lack context for something more specific. New code should not |
796 | | // use this. |
797 | | void |
798 | | Common::stopOnError(std::string const& message) |
799 | 21.1k | { |
800 | 21.1k | throw damagedPDF("", message); |
801 | 21.1k | } |
802 | | |
803 | | // Return an exception of type qpdf_e_damaged_pdf. |
804 | | QPDFExc |
805 | | Common::damagedPDF( |
806 | | InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message) |
807 | 61.9k | { |
808 | 61.9k | return {qpdf_e_damaged_pdf, input.getName(), object, offset, message, true}; |
809 | 61.9k | } |
810 | | |
811 | | // Return an exception of type qpdf_e_damaged_pdf. The object is taken from |
812 | | // m->last_object_description. |
813 | | QPDFExc |
814 | | Common::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message) const |
815 | 44.0k | { |
816 | 44.0k | return damagedPDF(input, m->last_object_description, offset, message); |
817 | 44.0k | } |
818 | | |
819 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file. |
820 | | QPDFExc |
821 | | Common::damagedPDF( |
822 | | std::string const& object, qpdf_offset_t offset, std::string const& message) const |
823 | 219k | { |
824 | 219k | return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message, true}; |
825 | 219k | } |
826 | | |
827 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the |
828 | | // offset from .m->file->getLastOffset(). |
829 | | QPDFExc |
830 | | Common::damagedPDF(std::string const& object, std::string const& message) const |
831 | 59.5k | { |
832 | 59.5k | return damagedPDF(object, m->file->getLastOffset(), message); |
833 | 59.5k | } |
834 | | |
835 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object |
836 | | // from .m->last_object_description. |
837 | | QPDFExc |
838 | | Common::damagedPDF(qpdf_offset_t offset, std::string const& message) const |
839 | 30.9k | { |
840 | 30.9k | return damagedPDF(m->last_object_description, offset, message); |
841 | 30.9k | } |
842 | | |
843 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object |
844 | | // from m->last_object_description and the offset from m->file->getLastOffset(). |
845 | | QPDFExc |
846 | | Common::damagedPDF(std::string const& message) const |
847 | 35.6k | { |
848 | 35.6k | return damagedPDF(m->last_object_description, m->file->getLastOffset(), message); |
849 | 35.6k | } |
850 | | |
851 | | bool |
852 | | QPDF::everCalledGetAllPages() const |
853 | 0 | { |
854 | 0 | return m->pages.ever_called_get_all_pages(); |
855 | 0 | } |
856 | | |
857 | | bool |
858 | | QPDF::everPushedInheritedAttributesToPages() const |
859 | 0 | { |
860 | 0 | return m->pages.ever_pushed_inherited_attributes_to_pages(); |
861 | 0 | } |
862 | | |
863 | | void |
864 | | QPDF::removeSecurityRestrictions() |
865 | 0 | { |
866 | 0 | auto root = getRoot(); |
867 | 0 | root.removeKey("/Perms"); |
868 | 0 | auto acroform = root.getKey("/AcroForm"); |
869 | 0 | if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) { |
870 | 0 | acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0)); |
871 | 0 | } |
872 | 0 | } |