/src/qpdf/libqpdf/QPDF.cc
Line | Count | Source |
1 | | #include <qpdf/qpdf-config.h> // include first for large file support |
2 | | |
3 | | #include <qpdf/QPDF_private.hh> |
4 | | |
5 | | #include <array> |
6 | | #include <atomic> |
7 | | #include <cstring> |
8 | | #include <limits> |
9 | | #include <map> |
10 | | #include <regex> |
11 | | #include <sstream> |
12 | | #include <vector> |
13 | | |
14 | | #include <qpdf/FileInputSource.hh> |
15 | | #include <qpdf/InputSource_private.hh> |
16 | | #include <qpdf/OffsetInputSource.hh> |
17 | | #include <qpdf/Pipeline.hh> |
18 | | #include <qpdf/QPDFExc.hh> |
19 | | #include <qpdf/QPDFLogger.hh> |
20 | | #include <qpdf/QPDFObjectHandle_private.hh> |
21 | | #include <qpdf/QPDFObject_private.hh> |
22 | | #include <qpdf/QPDFParser.hh> |
23 | | #include <qpdf/QTC.hh> |
24 | | #include <qpdf/QUtil.hh> |
25 | | #include <qpdf/Util.hh> |
26 | | |
27 | | using namespace qpdf; |
28 | | using namespace std::literals; |
29 | | |
30 | | using Common = impl::Doc::Common; |
31 | | using Objects = impl::Doc::Objects; |
32 | | using Foreign = Objects::Foreign; |
33 | | using Streams = Objects::Streams; |
34 | | |
35 | | // This must be a fixed value. This API returns a const reference to it, and the C API relies on its |
36 | | // being static as well. |
37 | | std::string const QPDF::qpdf_version(QPDF_VERSION); |
38 | | |
39 | | static char const* EMPTY_PDF = ( |
40 | | // force line break |
41 | | "%PDF-1.3\n" |
42 | | "1 0 obj\n" |
43 | | "<< /Type /Catalog /Pages 2 0 R >>\n" |
44 | | "endobj\n" |
45 | | "2 0 obj\n" |
46 | | "<< /Type /Pages /Kids [] /Count 0 >>\n" |
47 | | "endobj\n" |
48 | | "xref\n" |
49 | | "0 3\n" |
50 | | "0000000000 65535 f \n" |
51 | | "0000000009 00000 n \n" |
52 | | "0000000058 00000 n \n" |
53 | | "trailer << /Size 3 /Root 1 0 R >>\n" |
54 | | "startxref\n" |
55 | | "110\n" |
56 | | "%%EOF\n"); |
57 | | |
58 | | namespace |
59 | | { |
60 | | class InvalidInputSource: public InputSource |
61 | | { |
62 | | public: |
63 | | ~InvalidInputSource() override = default; |
64 | | qpdf_offset_t |
65 | | findAndSkipNextEOL() override |
66 | 0 | { |
67 | 0 | throwException(); |
68 | 0 | return 0; |
69 | 0 | } |
70 | | std::string const& |
71 | | getName() const override |
72 | 0 | { |
73 | 0 | static std::string name("closed input source"); |
74 | 0 | return name; |
75 | 0 | } |
76 | | qpdf_offset_t |
77 | | tell() override |
78 | 0 | { |
79 | 0 | throwException(); |
80 | 0 | return 0; |
81 | 0 | } |
82 | | void |
83 | | seek(qpdf_offset_t offset, int whence) override |
84 | 0 | { |
85 | 0 | throwException(); |
86 | 0 | } |
87 | | void |
88 | | rewind() override |
89 | 0 | { |
90 | 0 | throwException(); |
91 | 0 | } |
92 | | size_t |
93 | | read(char* buffer, size_t length) override |
94 | 0 | { |
95 | 0 | throwException(); |
96 | 0 | return 0; |
97 | 0 | } |
98 | | void |
99 | | unreadCh(char ch) override |
100 | 0 | { |
101 | 0 | throwException(); |
102 | 0 | } |
103 | | |
104 | | private: |
105 | | void |
106 | | throwException() |
107 | 0 | { |
108 | 0 | throw std::logic_error( |
109 | 0 | "QPDF operation attempted on a QPDF object with no input " |
110 | 0 | "source. QPDF operations are invalid before processFile (or " |
111 | 0 | "another process method) or after closeInputSource"); |
112 | 0 | } |
113 | | }; |
114 | | } // namespace |
115 | | |
116 | | QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) : |
117 | 114k | qpdf(qpdf), |
118 | 114k | og(og) |
119 | 114k | { |
120 | 114k | } |
121 | | |
122 | | std::string const& |
123 | | QPDF::QPDFVersion() |
124 | 0 | { |
125 | | // The C API relies on this being a static value. |
126 | 0 | return QPDF::qpdf_version; |
127 | 0 | } |
128 | | |
129 | | QPDF::Members::Members(QPDF& qpdf) : |
130 | 23.3k | Doc(qpdf, this), |
131 | 23.3k | c(qpdf, this), |
132 | 23.3k | lin(*this), |
133 | 23.3k | objects(*this), |
134 | 23.3k | pages(*this), |
135 | 23.3k | file(std::make_shared<InvalidInputSource>()), |
136 | 23.3k | encp(std::make_shared<EncryptionParameters>()) |
137 | 23.3k | { |
138 | 23.3k | } |
139 | | |
140 | | QPDF::QPDF() : |
141 | 23.3k | m(std::make_unique<Members>(*this)) |
142 | 23.3k | { |
143 | 23.3k | m->tokenizer.allowEOF(); |
144 | | // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout |
145 | | // the lifetime of this running application. |
146 | 23.3k | static std::atomic<unsigned long long> unique_id{0}; |
147 | 23.3k | m->unique_id = unique_id.fetch_add(1ULL); |
148 | 23.3k | } |
149 | | |
150 | | // Provide access to disconnect(). Disconnect will in due course be merged into the current ObjCache |
151 | | // (future Objects::Entry) to centralize all QPDF access to QPDFObject. |
152 | | class Disconnect: BaseHandle |
153 | | { |
154 | | public: |
155 | | Disconnect(std::shared_ptr<QPDFObject> const& obj) : |
156 | 584k | BaseHandle(obj) |
157 | 584k | { |
158 | 584k | } |
159 | | void |
160 | | disconnect() |
161 | 584k | { |
162 | 584k | BaseHandle::disconnect(false); |
163 | 584k | if (raw_type_code() != ::ot_null) { |
164 | 148k | obj->value = QPDF_Destroyed(); |
165 | 148k | } |
166 | 584k | } |
167 | | }; |
168 | | |
169 | | QPDF::~QPDF() |
170 | 23.3k | { |
171 | | // If two objects are mutually referential (through each object having an array or dictionary |
172 | | // that contains an indirect reference to the other), the circular references in the |
173 | | // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects |
174 | | // in the object cache, which is those objects that we read from the file, and break all |
175 | | // resolved indirect references by replacing them with an internal object type representing that |
176 | | // they have been destroyed. Note that we can't break references like this at any time when the |
177 | | // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that |
178 | | // are reachable from this object to release their association with this QPDF. Direct objects |
179 | | // are not destroyed since they can be moved to other QPDF objects safely. |
180 | | |
181 | | // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear |
182 | | // the xref table anyway just to prevent any possibility of resolve() succeeding. |
183 | 23.3k | m->xref_table.clear(); |
184 | 584k | for (auto const& iter: m->obj_cache) { |
185 | 584k | Disconnect(iter.second.object).disconnect(); |
186 | 584k | } |
187 | 23.3k | } |
188 | | |
189 | | std::shared_ptr<QPDF> |
190 | | QPDF::create() |
191 | 23.3k | { |
192 | 23.3k | return std::make_shared<QPDF>(); |
193 | 23.3k | } |
194 | | |
195 | | void |
196 | | QPDF::processFile(char const* filename, char const* password) |
197 | 0 | { |
198 | 0 | auto* fi = new FileInputSource(filename); |
199 | 0 | processInputSource(std::shared_ptr<InputSource>(fi), password); |
200 | 0 | } |
201 | | |
202 | | void |
203 | | QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password) |
204 | 0 | { |
205 | 0 | auto* fi = new FileInputSource(description, filep, close_file); |
206 | 0 | processInputSource(std::shared_ptr<InputSource>(fi), password); |
207 | 0 | } |
208 | | |
209 | | void |
210 | | QPDF::processMemoryFile( |
211 | | char const* description, char const* buf, size_t length, char const* password) |
212 | 0 | { |
213 | 0 | auto is = std::make_shared<is::OffsetBuffer>(description, std::string_view{buf, length}); |
214 | 0 | processInputSource(is, password); |
215 | 0 | } |
216 | | |
217 | | void |
218 | | QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password) |
219 | 23.3k | { |
220 | 23.3k | m->file = source; |
221 | 23.3k | m->objects.parse(password); |
222 | 23.3k | } |
223 | | |
224 | | void |
225 | | QPDF::closeInputSource() |
226 | 0 | { |
227 | 0 | m->file = std::shared_ptr<InputSource>(new InvalidInputSource()); |
228 | 0 | } |
229 | | |
230 | | void |
231 | | QPDF::setPasswordIsHexKey(bool val) |
232 | 0 | { |
233 | 0 | m->cf.password_is_hex_key(val); |
234 | 0 | } |
235 | | |
236 | | void |
237 | | QPDF::emptyPDF() |
238 | 0 | { |
239 | 0 | processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF)); |
240 | 0 | } |
241 | | |
242 | | void |
243 | | QPDF::registerStreamFilter( |
244 | | std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory) |
245 | 0 | { |
246 | 0 | qpdf::Stream::registerStreamFilter(filter_name, factory); |
247 | 0 | } |
248 | | |
249 | | void |
250 | | QPDF::setIgnoreXRefStreams(bool val) |
251 | 0 | { |
252 | 0 | (void)m->cf.ignore_xref_streams(val); |
253 | 0 | } |
254 | | |
255 | | std::shared_ptr<QPDFLogger> |
256 | | QPDF::getLogger() |
257 | 0 | { |
258 | 0 | return m->cf.log(); |
259 | 0 | } |
260 | | |
261 | | void |
262 | | QPDF::setLogger(std::shared_ptr<QPDFLogger> l) |
263 | 0 | { |
264 | 0 | m->cf.log(l); |
265 | 0 | } |
266 | | |
267 | | void |
268 | | QPDF::setOutputStreams(std::ostream* out, std::ostream* err) |
269 | 0 | { |
270 | 0 | setLogger(QPDFLogger::create()); |
271 | 0 | m->cf.log()->setOutputStreams(out, err); |
272 | 0 | } |
273 | | |
274 | | void |
275 | | QPDF::setSuppressWarnings(bool val) |
276 | 0 | { |
277 | 0 | (void)m->cf.suppress_warnings(val); |
278 | 0 | } |
279 | | |
280 | | void |
281 | | QPDF::setMaxWarnings(size_t val) |
282 | 23.3k | { |
283 | 23.3k | (void)m->cf.max_warnings(val); |
284 | 23.3k | } |
285 | | |
286 | | void |
287 | | QPDF::setAttemptRecovery(bool val) |
288 | 0 | { |
289 | 0 | (void)m->cf.surpress_recovery(!val); |
290 | 0 | } |
291 | | |
292 | | void |
293 | | QPDF::setImmediateCopyFrom(bool val) |
294 | 0 | { |
295 | 0 | (void)m->cf.immediate_copy_from(val); |
296 | 0 | } |
297 | | |
298 | | std::vector<QPDFExc> |
299 | | QPDF::getWarnings() |
300 | 0 | { |
301 | 0 | std::vector<QPDFExc> result = std::move(m->warnings); |
302 | 0 | m->warnings.clear(); |
303 | 0 | return result; |
304 | 0 | } |
305 | | |
306 | | bool |
307 | | QPDF::anyWarnings() const |
308 | 0 | { |
309 | 0 | return !m->warnings.empty(); |
310 | 0 | } |
311 | | |
312 | | size_t |
313 | | QPDF::numWarnings() const |
314 | 0 | { |
315 | 0 | return m->warnings.size(); |
316 | 0 | } |
317 | | |
318 | | void |
319 | | QPDF::warn(QPDFExc const& e) |
320 | 509k | { |
321 | 509k | m->c.warn(e); |
322 | 509k | } |
323 | | |
324 | | void |
325 | | Common::warn(QPDFExc const& e) |
326 | 797k | { |
327 | 797k | if (cf.max_warnings() > 0 && m->warnings.size() >= cf.max_warnings()) { |
328 | 15.4k | stopOnError("Too many warnings - file is too badly damaged"); |
329 | 15.4k | } |
330 | 797k | m->warnings.emplace_back(e); |
331 | 797k | if (!cf.suppress_warnings()) { |
332 | 781k | *cf.log()->getWarn() << "WARNING: " << m->warnings.back().what() << "\n"; |
333 | 781k | } |
334 | 797k | } |
335 | | |
336 | | void |
337 | | QPDF::warn( |
338 | | qpdf_error_code_e error_code, |
339 | | std::string const& object, |
340 | | qpdf_offset_t offset, |
341 | | std::string const& message) |
342 | 3.16k | { |
343 | 3.16k | m->c.warn(QPDFExc(error_code, getFilename(), object, offset, message)); |
344 | 3.16k | } |
345 | | |
346 | | void |
347 | | Common::warn( |
348 | | qpdf_error_code_e error_code, |
349 | | std::string const& object, |
350 | | qpdf_offset_t offset, |
351 | | std::string const& message) |
352 | 985 | { |
353 | 985 | warn(QPDFExc(error_code, qpdf.getFilename(), object, offset, message)); |
354 | 985 | } |
355 | | |
356 | | QPDFObjectHandle |
357 | | QPDF::newReserved() |
358 | 0 | { |
359 | 0 | return m->objects.makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Reserved>()); |
360 | 0 | } |
361 | | |
362 | | QPDFObjectHandle |
363 | | QPDF::newIndirectNull() |
364 | 10.1k | { |
365 | 10.1k | return m->objects.makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Null>()); |
366 | 10.1k | } |
367 | | |
368 | | QPDFObjectHandle |
369 | | QPDF::newStream() |
370 | 0 | { |
371 | 0 | return makeIndirectObject( |
372 | 0 | qpdf::Stream(*this, m->objects.nextObjGen(), Dictionary::empty(), 0, 0)); |
373 | 0 | } |
374 | | |
375 | | QPDFObjectHandle |
376 | | QPDF::newStream(std::shared_ptr<Buffer> data) |
377 | 0 | { |
378 | 0 | auto result = newStream(); |
379 | 0 | result.replaceStreamData(data, {}, {}); |
380 | 0 | return result; |
381 | 0 | } |
382 | | |
383 | | QPDFObjectHandle |
384 | | QPDF::newStream(std::string const& data) |
385 | 0 | { |
386 | 0 | auto result = newStream(); |
387 | 0 | result.replaceStreamData(data, {}, {}); |
388 | 0 | return result; |
389 | 0 | } |
390 | | |
391 | | QPDFObjectHandle |
392 | | QPDF::getObject(int objid, int generation) |
393 | 14.6k | { |
394 | 14.6k | return getObject({objid, generation}); |
395 | 14.6k | } |
396 | | |
397 | | QPDFObjectHandle |
398 | | QPDF::getObjectByObjGen(QPDFObjGen og) |
399 | 0 | { |
400 | 0 | return getObject(og); |
401 | 0 | } |
402 | | |
403 | | QPDFObjectHandle |
404 | | QPDF::getObjectByID(int objid, int generation) |
405 | 0 | { |
406 | 0 | return getObject(QPDFObjGen(objid, generation)); |
407 | 0 | } |
408 | | |
409 | | QPDFObjectHandle |
410 | | QPDF::copyForeignObject(QPDFObjectHandle foreign) |
411 | 0 | { |
412 | 0 | return m->objects.foreign().copied(foreign); |
413 | 0 | } |
414 | | |
415 | | Objects ::Foreign::Copier& |
416 | | Objects::Foreign::copier(QPDFObjectHandle const& foreign) |
417 | 0 | { |
418 | 0 | if (!foreign.isIndirect()) { |
419 | 0 | throw std::logic_error("QPDF::copyForeign called with direct object handle"); |
420 | 0 | } |
421 | 0 | QPDF& other = *foreign.qpdf(); |
422 | 0 | if (&other == &qpdf) { |
423 | 0 | throw std::logic_error("QPDF::copyForeign called with object from this QPDF"); |
424 | 0 | } |
425 | 0 | return copiers.insert({other.getUniqueId(), {qpdf}}).first->second; |
426 | 0 | } |
427 | | |
428 | | QPDFObjectHandle |
429 | | Objects::Foreign::Copier::copied(QPDFObjectHandle const& foreign) |
430 | 0 | { |
431 | | // Here's an explanation of what's going on here. |
432 | | // |
433 | | // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and |
434 | | // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a |
435 | | // foreign QPDF into the local QPDF, we have to replace all indirect object references with |
436 | | // references to the corresponding object in the local file. |
437 | | // |
438 | | // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign |
439 | | // QPDF that we are copying from. The mapping is stored in an Foreign::Copier, which contains a |
440 | | // mapping from the foreign ObjGen to the local QPDFObjectHandle. |
441 | | // |
442 | | // To copy, we do a deep traversal of the foreign object with loop detection to discover all |
443 | | // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an |
444 | | // indirect object, we check to see if we have already created a local copy of it. If not, we |
445 | | // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the |
446 | | // mapping from the foreign object ID to the new object. While we |
447 | | // do this, we keep a list of objects to copy. |
448 | | // |
449 | | // Once we are done with the traversal, we copy all the objects that we need to copy. However, |
450 | | // the copies will contain indirect object IDs that refer to objects in the foreign file. We |
451 | | // need to replace them with references to objects in the local file. This is what |
452 | | // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with |
453 | | // all the indirect references replaced with new ones in the local context, we can replace the |
454 | | // local reserved object with the copy. This mechanism allows us to copy objects with circular |
455 | | // references in any order. |
456 | | |
457 | | // For streams, rather than copying the objects, we set up the stream data to pull from the |
458 | | // original stream by using a stream data provider. This is done in a manner that doesn't |
459 | | // require the original QPDF object but may require the original source of the stream data with |
460 | | // special handling for immediate_copy_from. This logic is also in |
461 | | // replaceForeignIndirectObjects. |
462 | | |
463 | | // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented |
464 | | // use case to copy pages this way if the intention is to not update the pages tree. |
465 | |
|
466 | 0 | util::assertion( |
467 | 0 | visiting.empty(), "obj_copier.visiting is not empty at the beginning of copyForeignObject"); |
468 | | |
469 | | // Make sure we have an object in this file for every referenced object in the old file. |
470 | | // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we |
471 | | // have to copy, the local object will be a reservation, unless it is a stream, in which case |
472 | | // the local object will already be a stream. |
473 | 0 | reserve_objects(foreign, true); |
474 | |
|
475 | 0 | util::assertion(visiting.empty(), "obj_copier.visiting is not empty after reserving objects"); |
476 | | |
477 | | // Copy any new objects and replace the reservations. |
478 | 0 | for (auto& oh: to_copy) { |
479 | 0 | auto copy = replace_indirect_object(oh, true); |
480 | 0 | if (!oh.isStream()) { |
481 | 0 | qpdf.replaceReserved(object_map[oh], copy); |
482 | 0 | } |
483 | 0 | } |
484 | 0 | to_copy.clear(); |
485 | |
|
486 | 0 | auto og = foreign.getObjGen(); |
487 | 0 | if (!object_map.contains(og)) { |
488 | 0 | warn(damagedPDF( |
489 | 0 | foreign.qpdf()->getFilename() + " object " + og.unparse(' '), |
490 | 0 | foreign.offset(), |
491 | 0 | "unexpected reference to /Pages object while copying foreign object; replacing with " |
492 | 0 | "null")); |
493 | 0 | return QPDFObjectHandle::newNull(); |
494 | 0 | } |
495 | 0 | return object_map[foreign]; |
496 | 0 | } |
497 | | |
498 | | void |
499 | | Objects::Foreign::Copier::reserve_objects(QPDFObjectHandle const& foreign, bool top) |
500 | 0 | { |
501 | 0 | auto foreign_tc = foreign.type_code(); |
502 | 0 | util::assertion( |
503 | 0 | foreign_tc != ::ot_reserved, "QPDF: attempting to copy a foreign reserved object"); |
504 | |
|
505 | 0 | if (foreign.isPagesObject()) { |
506 | 0 | return; |
507 | 0 | } |
508 | | |
509 | 0 | if (foreign.indirect()) { |
510 | 0 | QPDFObjGen foreign_og(foreign.getObjGen()); |
511 | 0 | if (!visiting.add(foreign_og)) { |
512 | 0 | return; |
513 | 0 | } |
514 | 0 | if (object_map.contains(foreign_og)) { |
515 | 0 | if (!(top && foreign.isPageObject() && object_map[foreign_og].null())) { |
516 | 0 | visiting.erase(foreign); |
517 | 0 | return; |
518 | 0 | } |
519 | 0 | } else { |
520 | 0 | object_map[foreign_og] = foreign.isStream() ? qpdf.newStream() : qpdf.newIndirectNull(); |
521 | 0 | if (!top && foreign.isPageObject()) { |
522 | 0 | visiting.erase(foreign_og); |
523 | 0 | return; |
524 | 0 | } |
525 | 0 | } |
526 | 0 | to_copy.emplace_back(foreign); |
527 | 0 | } |
528 | | |
529 | 0 | if (foreign_tc == ::ot_array) { |
530 | 0 | for (auto const& item: Array(foreign)) { |
531 | 0 | reserve_objects(item); |
532 | 0 | } |
533 | 0 | } else if (foreign_tc == ::ot_dictionary) { |
534 | 0 | for (auto const& item: Dictionary(foreign)) { |
535 | 0 | if (!item.second.null()) { |
536 | 0 | reserve_objects(item.second); |
537 | 0 | } |
538 | 0 | } |
539 | 0 | } else if (foreign_tc == ::ot_stream) { |
540 | 0 | reserve_objects(foreign.getDict()); |
541 | 0 | } |
542 | |
|
543 | 0 | visiting.erase(foreign); |
544 | 0 | } |
545 | | |
546 | | QPDFObjectHandle |
547 | | Objects::Foreign::Copier::replace_indirect_object(QPDFObjectHandle const& foreign, bool top) |
548 | 0 | { |
549 | 0 | auto foreign_tc = foreign.type_code(); |
550 | |
|
551 | 0 | if (!top && foreign.indirect()) { |
552 | 0 | auto mapping = object_map.find(foreign.id_gen()); |
553 | 0 | if (mapping == object_map.end()) { |
554 | | // This case would occur if this is a reference to a Pages object that we didn't |
555 | | // traverse into. |
556 | 0 | return QPDFObjectHandle::newNull(); |
557 | 0 | } |
558 | 0 | return mapping->second; |
559 | 0 | } |
560 | | |
561 | 0 | if (foreign_tc == ::ot_array) { |
562 | 0 | Array array = foreign; |
563 | 0 | std::vector<QPDFObjectHandle> result; |
564 | 0 | result.reserve(array.size()); |
565 | 0 | for (auto const& item: array) { |
566 | 0 | result.emplace_back(replace_indirect_object(item)); |
567 | 0 | } |
568 | 0 | return Array(std::move(result)); |
569 | 0 | } |
570 | | |
571 | 0 | if (foreign_tc == ::ot_dictionary) { |
572 | 0 | auto result = Dictionary::empty(); |
573 | 0 | for (auto const& [key, value]: Dictionary(foreign)) { |
574 | 0 | if (!value.null()) { |
575 | 0 | result.replaceKey(key, replace_indirect_object(value)); |
576 | 0 | } |
577 | 0 | } |
578 | 0 | return result; |
579 | 0 | } |
580 | | |
581 | 0 | if (foreign_tc == ::ot_stream) { |
582 | 0 | Stream stream = foreign; |
583 | 0 | Stream result = object_map[foreign]; |
584 | 0 | auto dict = result.getDict(); |
585 | 0 | for (auto const& [key, value]: stream.getDict()) { |
586 | 0 | if (!value.null()) { |
587 | 0 | dict.replaceKey(key, replace_indirect_object(value)); |
588 | 0 | } |
589 | 0 | } |
590 | 0 | stream.copy_data_to(result); |
591 | 0 | return result; |
592 | 0 | } |
593 | | |
594 | 0 | foreign.assertScalar(); |
595 | 0 | auto result = foreign; |
596 | 0 | result.makeDirect(); |
597 | 0 | return result; |
598 | 0 | } |
599 | | |
600 | | unsigned long long |
601 | | QPDF::getUniqueId() const |
602 | 0 | { |
603 | 0 | return m->unique_id; |
604 | 0 | } |
605 | | |
606 | | std::string |
607 | | QPDF::getFilename() const |
608 | 68.6k | { |
609 | 68.6k | return m->file->getName(); |
610 | 68.6k | } |
611 | | |
612 | | PDFVersion |
613 | | QPDF::getVersionAsPDFVersion() |
614 | 0 | { |
615 | 0 | int major = 1; |
616 | 0 | int minor = 3; |
617 | 0 | int extension_level = getExtensionLevel(); |
618 | |
|
619 | 0 | std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)"); |
620 | 0 | std::smatch match; |
621 | 0 | if (std::regex_search(m->pdf_version, match, v)) { |
622 | 0 | major = QUtil::string_to_int(match[1].str().c_str()); |
623 | 0 | minor = QUtil::string_to_int(match[2].str().c_str()); |
624 | 0 | } |
625 | |
|
626 | 0 | return {major, minor, extension_level}; |
627 | 0 | } |
628 | | |
629 | | std::string |
630 | | QPDF::getPDFVersion() const |
631 | 9.52k | { |
632 | 9.52k | return m->pdf_version; |
633 | 9.52k | } |
634 | | |
635 | | int |
636 | | QPDF::getExtensionLevel() |
637 | 9.52k | { |
638 | 9.52k | if (Integer ExtensionLevel = getRoot()["/Extensions"]["/ADBE"]["/ExtensionLevel"]) { |
639 | 6 | return ExtensionLevel.value<int>(); |
640 | 6 | } |
641 | 9.51k | return 0; |
642 | 9.52k | } |
643 | | |
644 | | QPDFObjectHandle |
645 | | QPDF::getTrailer() |
646 | 64.2k | { |
647 | 64.2k | return m->trailer; |
648 | 64.2k | } |
649 | | |
650 | | QPDFObjectHandle |
651 | | QPDF::getRoot() |
652 | 96.9k | { |
653 | 96.9k | Dictionary Root = m->trailer["/Root"]; |
654 | 96.9k | if (!Root) { |
655 | 4.58k | throw m->c.damagedPDF("", -1, "unable to find /Root dictionary"); |
656 | 4.58k | } |
657 | | // Check_mode is an interim solution to request #810 pending a more comprehensive review of the |
658 | | // approach to more extensive checks and warning levels. |
659 | 92.3k | if (m->cf.check_mode() && Name(Root["/Type"]) != "/Catalog") { |
660 | 0 | warn(m->c.damagedPDF("", -1, "catalog /Type entry missing or invalid")); |
661 | 0 | Root.replaceKey("/Type", Name("/Catalog")); |
662 | 0 | } |
663 | 92.3k | return Root.oh(); |
664 | 96.9k | } |
665 | | |
666 | | std::map<QPDFObjGen, QPDFXRefEntry> |
667 | | QPDF::getXRefTable() |
668 | 0 | { |
669 | 0 | return m->objects.xref_table(); |
670 | 0 | } |
671 | | |
672 | | std::map<QPDFObjGen, QPDFXRefEntry> const& |
673 | | Objects::xref_table() |
674 | 0 | { |
675 | 0 | if (!m->parsed) { |
676 | 0 | throw std::logic_error("QPDF::getXRefTable called before parsing."); |
677 | 0 | } |
678 | | |
679 | 0 | return m->xref_table; |
680 | 0 | } |
681 | | |
682 | | bool |
683 | | QPDF::pipeStreamData( |
684 | | std::shared_ptr<EncryptionParameters> encp, |
685 | | std::shared_ptr<InputSource> file, |
686 | | QPDF& qpdf_for_warning, |
687 | | QPDFObjGen og, |
688 | | qpdf_offset_t offset, |
689 | | size_t length, |
690 | | QPDFObjectHandle stream_dict, |
691 | | bool is_root_metadata, |
692 | | Pipeline* pipeline, |
693 | | bool suppress_warnings, |
694 | | bool will_retry) |
695 | 79.7k | { |
696 | 79.7k | std::unique_ptr<Pipeline> to_delete; |
697 | 79.7k | if (encp->encrypted) { |
698 | 51 | decryptStream( |
699 | 51 | encp, file, qpdf_for_warning, pipeline, og, stream_dict, is_root_metadata, to_delete); |
700 | 51 | } |
701 | | |
702 | 79.7k | bool attempted_finish = false; |
703 | 79.7k | try { |
704 | 79.7k | auto buf = file->read(length, offset); |
705 | 79.7k | if (buf.size() != length) { |
706 | 0 | throw qpdf_for_warning.m->c.damagedPDF( |
707 | 0 | *file, |
708 | 0 | "", |
709 | 0 | offset + QIntC::to_offset(buf.size()), |
710 | 0 | "unexpected EOF reading stream data"); |
711 | 0 | } |
712 | 79.7k | pipeline->write(buf.data(), length); |
713 | 79.7k | attempted_finish = true; |
714 | 79.7k | pipeline->finish(); |
715 | 79.7k | return true; |
716 | 79.7k | } catch (QPDFExc& e) { |
717 | 0 | if (!suppress_warnings) { |
718 | 0 | qpdf_for_warning.warn(e); |
719 | 0 | } |
720 | 9.63k | } catch (std::exception& e) { |
721 | 9.63k | if (!suppress_warnings) { |
722 | 9.63k | QTC::TC("qpdf", "QPDF decoding error warning"); |
723 | 9.63k | qpdf_for_warning.warn( |
724 | | // line-break |
725 | 9.63k | qpdf_for_warning.m->c.damagedPDF( |
726 | 9.63k | *file, |
727 | 9.63k | "", |
728 | 9.63k | file->getLastOffset(), |
729 | 9.63k | ("error decoding stream data for object " + og.unparse(' ') + ": " + |
730 | 9.63k | e.what()))); |
731 | 9.63k | if (will_retry) { |
732 | 8.51k | qpdf_for_warning.warn( |
733 | | // line-break |
734 | 8.51k | qpdf_for_warning.m->c.damagedPDF( |
735 | 8.51k | *file, |
736 | 8.51k | "", |
737 | 8.51k | file->getLastOffset(), |
738 | 8.51k | "stream will be re-processed without filtering to avoid data loss")); |
739 | 8.51k | } |
740 | 9.63k | } |
741 | 9.63k | } |
742 | 9.58k | if (!attempted_finish) { |
743 | 3.10k | try { |
744 | 3.10k | pipeline->finish(); |
745 | 3.10k | } catch (std::exception&) { |
746 | | // ignore |
747 | 1.48k | } |
748 | 3.10k | } |
749 | 9.58k | return false; |
750 | 9.58k | } |
751 | | |
752 | | bool |
753 | | QPDF::pipeStreamData( |
754 | | QPDFObjGen og, |
755 | | qpdf_offset_t offset, |
756 | | size_t length, |
757 | | QPDFObjectHandle stream_dict, |
758 | | bool is_root_metadata, |
759 | | Pipeline* pipeline, |
760 | | bool suppress_warnings, |
761 | | bool will_retry) |
762 | 79.7k | { |
763 | 79.7k | return pipeStreamData( |
764 | 79.7k | m->encp, |
765 | 79.7k | m->file, |
766 | 79.7k | *this, |
767 | 79.7k | og, |
768 | 79.7k | offset, |
769 | 79.7k | length, |
770 | 79.7k | stream_dict, |
771 | 79.7k | is_root_metadata, |
772 | 79.7k | pipeline, |
773 | 79.7k | suppress_warnings, |
774 | 79.7k | will_retry); |
775 | 79.7k | } |
776 | | |
777 | | // Throw a generic exception when we lack context for something more specific. New code should not |
778 | | // use this. |
779 | | void |
780 | | Common::stopOnError(std::string const& message) |
781 | 15.4k | { |
782 | 15.4k | throw damagedPDF("", message); |
783 | 15.4k | } |
784 | | |
785 | | // Return an exception of type qpdf_e_damaged_pdf. |
786 | | QPDFExc |
787 | | Common::damagedPDF( |
788 | | InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message) |
789 | 66.4k | { |
790 | 66.4k | return {qpdf_e_damaged_pdf, input.getName(), object, offset, message, true}; |
791 | 66.4k | } |
792 | | |
793 | | // Return an exception of type qpdf_e_damaged_pdf. The object is taken from |
794 | | // m->last_object_description. |
795 | | QPDFExc |
796 | | Common::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message) const |
797 | 48.2k | { |
798 | 48.2k | return damagedPDF(input, m->last_object_description, offset, message); |
799 | 48.2k | } |
800 | | |
801 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file. |
802 | | QPDFExc |
803 | | Common::damagedPDF( |
804 | | std::string const& object, qpdf_offset_t offset, std::string const& message) const |
805 | 235k | { |
806 | 235k | return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message, true}; |
807 | 235k | } |
808 | | |
809 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the |
810 | | // offset from .m->file->getLastOffset(). |
811 | | QPDFExc |
812 | | Common::damagedPDF(std::string const& object, std::string const& message) const |
813 | 42.0k | { |
814 | 42.0k | return damagedPDF(object, m->file->getLastOffset(), message); |
815 | 42.0k | } |
816 | | |
817 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object |
818 | | // from .m->last_object_description. |
819 | | QPDFExc |
820 | | Common::damagedPDF(qpdf_offset_t offset, std::string const& message) const |
821 | 34.7k | { |
822 | 34.7k | return damagedPDF(m->last_object_description, offset, message); |
823 | 34.7k | } |
824 | | |
825 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object |
826 | | // from m->last_object_description and the offset from m->file->getLastOffset(). |
827 | | QPDFExc |
828 | | Common::damagedPDF(std::string const& message) const |
829 | 38.0k | { |
830 | 38.0k | return damagedPDF(m->last_object_description, m->file->getLastOffset(), message); |
831 | 38.0k | } |
832 | | |
833 | | bool |
834 | | QPDF::everCalledGetAllPages() const |
835 | 0 | { |
836 | 0 | return m->pages.ever_called_get_all_pages(); |
837 | 0 | } |
838 | | |
839 | | bool |
840 | | QPDF::everPushedInheritedAttributesToPages() const |
841 | 0 | { |
842 | 0 | return m->pages.ever_pushed_inherited_attributes_to_pages(); |
843 | 0 | } |
844 | | |
845 | | void |
846 | | QPDF::removeSecurityRestrictions() |
847 | 0 | { |
848 | 0 | auto root = getRoot(); |
849 | 0 | root.removeKey("/Perms"); |
850 | 0 | auto acroform = root.getKey("/AcroForm"); |
851 | 0 | if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) { |
852 | 0 | acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0)); |
853 | 0 | } |
854 | 0 | } |