/src/qpdf/libqpdf/QPDF.cc
Line | Count | Source |
1 | | #include <qpdf/qpdf-config.h> // include first for large file support |
2 | | |
3 | | #include <qpdf/QPDF_private.hh> |
4 | | |
5 | | #include <array> |
6 | | #include <atomic> |
7 | | #include <cstring> |
8 | | #include <limits> |
9 | | #include <map> |
10 | | #include <regex> |
11 | | #include <sstream> |
12 | | #include <vector> |
13 | | |
14 | | #include <qpdf/FileInputSource.hh> |
15 | | #include <qpdf/InputSource_private.hh> |
16 | | #include <qpdf/OffsetInputSource.hh> |
17 | | #include <qpdf/Pipeline.hh> |
18 | | #include <qpdf/QPDFExc.hh> |
19 | | #include <qpdf/QPDFLogger.hh> |
20 | | #include <qpdf/QPDFObjectHandle_private.hh> |
21 | | #include <qpdf/QPDFObject_private.hh> |
22 | | #include <qpdf/QPDFParser.hh> |
23 | | #include <qpdf/QTC.hh> |
24 | | #include <qpdf/QUtil.hh> |
25 | | #include <qpdf/Util.hh> |
26 | | |
27 | | using namespace qpdf; |
28 | | using namespace std::literals; |
29 | | |
30 | | using Objects = QPDF::Doc::Objects; |
31 | | using Foreign = Objects::Foreign; |
32 | | using Streams = Objects::Streams; |
33 | | |
34 | | // This must be a fixed value. This API returns a const reference to it, and the C API relies on its |
35 | | // being static as well. |
36 | | std::string const QPDF::qpdf_version(QPDF_VERSION); |
37 | | |
38 | | static char const* EMPTY_PDF = ( |
39 | | // force line break |
40 | | "%PDF-1.3\n" |
41 | | "1 0 obj\n" |
42 | | "<< /Type /Catalog /Pages 2 0 R >>\n" |
43 | | "endobj\n" |
44 | | "2 0 obj\n" |
45 | | "<< /Type /Pages /Kids [] /Count 0 >>\n" |
46 | | "endobj\n" |
47 | | "xref\n" |
48 | | "0 3\n" |
49 | | "0000000000 65535 f \n" |
50 | | "0000000009 00000 n \n" |
51 | | "0000000058 00000 n \n" |
52 | | "trailer << /Size 3 /Root 1 0 R >>\n" |
53 | | "startxref\n" |
54 | | "110\n" |
55 | | "%%EOF\n"); |
56 | | |
57 | | namespace |
58 | | { |
59 | | class InvalidInputSource: public InputSource |
60 | | { |
61 | | public: |
62 | | ~InvalidInputSource() override = default; |
63 | | qpdf_offset_t |
64 | | findAndSkipNextEOL() override |
65 | 0 | { |
66 | 0 | throwException(); |
67 | 0 | return 0; |
68 | 0 | } |
69 | | std::string const& |
70 | | getName() const override |
71 | 0 | { |
72 | 0 | static std::string name("closed input source"); |
73 | 0 | return name; |
74 | 0 | } |
75 | | qpdf_offset_t |
76 | | tell() override |
77 | 0 | { |
78 | 0 | throwException(); |
79 | 0 | return 0; |
80 | 0 | } |
81 | | void |
82 | | seek(qpdf_offset_t offset, int whence) override |
83 | 0 | { |
84 | 0 | throwException(); |
85 | 0 | } |
86 | | void |
87 | | rewind() override |
88 | 0 | { |
89 | 0 | throwException(); |
90 | 0 | } |
91 | | size_t |
92 | | read(char* buffer, size_t length) override |
93 | 0 | { |
94 | 0 | throwException(); |
95 | 0 | return 0; |
96 | 0 | } |
97 | | void |
98 | | unreadCh(char ch) override |
99 | 0 | { |
100 | 0 | throwException(); |
101 | 0 | } |
102 | | |
103 | | private: |
104 | | void |
105 | | throwException() |
106 | 0 | { |
107 | 0 | throw std::logic_error( |
108 | 0 | "QPDF operation attempted on a QPDF object with no input " |
109 | 0 | "source. QPDF operations are invalid before processFile (or " |
110 | 0 | "another process method) or after closeInputSource"); |
111 | 0 | } |
112 | | }; |
113 | | } // namespace |
114 | | |
115 | | QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) : |
116 | 113k | qpdf(qpdf), |
117 | 113k | og(og) |
118 | 113k | { |
119 | 113k | } |
120 | | |
121 | | std::string const& |
122 | | QPDF::QPDFVersion() |
123 | 0 | { |
124 | | // The C API relies on this being a static value. |
125 | 0 | return QPDF::qpdf_version; |
126 | 0 | } |
127 | | |
128 | | QPDF::Members::Members(QPDF& qpdf) : |
129 | 24.5k | doc(qpdf, *this), |
130 | 24.5k | lin(doc.linearization()), |
131 | 24.5k | objects(doc.objects()), |
132 | 24.5k | pages(doc.pages()), |
133 | 24.5k | log(QPDFLogger::defaultLogger()), |
134 | 24.5k | file(std::make_shared<InvalidInputSource>()), |
135 | 24.5k | encp(std::make_shared<EncryptionParameters>()) |
136 | 24.5k | { |
137 | 24.5k | } |
138 | | |
139 | | QPDF::QPDF() : |
140 | 24.5k | m(std::make_unique<Members>(*this)) |
141 | 24.5k | { |
142 | 24.5k | m->tokenizer.allowEOF(); |
143 | | // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout |
144 | | // the lifetime of this running application. |
145 | 24.5k | static std::atomic<unsigned long long> unique_id{0}; |
146 | 24.5k | m->unique_id = unique_id.fetch_add(1ULL); |
147 | 24.5k | } |
148 | | |
149 | | // Provide access to disconnect(). Disconnect will in due course be merged into the current ObjCache |
150 | | // (future Objects::Entry) to centralize all QPDF access to QPDFObject. |
151 | | class Disconnect: BaseHandle |
152 | | { |
153 | | public: |
154 | | Disconnect(std::shared_ptr<QPDFObject> const& obj) : |
155 | 339k | BaseHandle(obj) |
156 | 339k | { |
157 | 339k | } |
158 | | void |
159 | | disconnect() |
160 | 339k | { |
161 | 339k | BaseHandle::disconnect(false); |
162 | 339k | if (raw_type_code() != ::ot_null) { |
163 | 126k | obj->value = QPDF_Destroyed(); |
164 | 126k | } |
165 | 339k | } |
166 | | }; |
167 | | |
168 | | QPDF::~QPDF() |
169 | 24.5k | { |
170 | | // If two objects are mutually referential (through each object having an array or dictionary |
171 | | // that contains an indirect reference to the other), the circular references in the |
172 | | // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects |
173 | | // in the object cache, which is those objects that we read from the file, and break all |
174 | | // resolved indirect references by replacing them with an internal object type representing that |
175 | | // they have been destroyed. Note that we can't break references like this at any time when the |
176 | | // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that |
177 | | // are reachable from this object to release their association with this QPDF. Direct objects |
178 | | // are not destroyed since they can be moved to other QPDF objects safely. |
179 | | |
180 | | // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear |
181 | | // the xref table anyway just to prevent any possibility of resolve() succeeding. |
182 | 24.5k | m->xref_table.clear(); |
183 | 339k | for (auto const& iter: m->obj_cache) { |
184 | 339k | Disconnect(iter.second.object).disconnect(); |
185 | 339k | } |
186 | 24.5k | } |
187 | | |
188 | | std::shared_ptr<QPDF> |
189 | | QPDF::create() |
190 | 24.5k | { |
191 | 24.5k | return std::make_shared<QPDF>(); |
192 | 24.5k | } |
193 | | |
194 | | void |
195 | | QPDF::processFile(char const* filename, char const* password) |
196 | 0 | { |
197 | 0 | auto* fi = new FileInputSource(filename); |
198 | 0 | processInputSource(std::shared_ptr<InputSource>(fi), password); |
199 | 0 | } |
200 | | |
201 | | void |
202 | | QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password) |
203 | 0 | { |
204 | 0 | auto* fi = new FileInputSource(description, filep, close_file); |
205 | 0 | processInputSource(std::shared_ptr<InputSource>(fi), password); |
206 | 0 | } |
207 | | |
208 | | void |
209 | | QPDF::processMemoryFile( |
210 | | char const* description, char const* buf, size_t length, char const* password) |
211 | 0 | { |
212 | 0 | auto is = std::make_shared<is::OffsetBuffer>(description, std::string_view{buf, length}); |
213 | 0 | processInputSource(is, password); |
214 | 0 | } |
215 | | |
216 | | void |
217 | | QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password) |
218 | 24.5k | { |
219 | 24.5k | m->file = source; |
220 | 24.5k | m->objects.parse(password); |
221 | 24.5k | } |
222 | | |
223 | | void |
224 | | QPDF::closeInputSource() |
225 | 0 | { |
226 | 0 | m->file = std::shared_ptr<InputSource>(new InvalidInputSource()); |
227 | 0 | } |
228 | | |
229 | | void |
230 | | QPDF::setPasswordIsHexKey(bool val) |
231 | 0 | { |
232 | 0 | m->provided_password_is_hex_key = val; |
233 | 0 | } |
234 | | |
235 | | void |
236 | | QPDF::emptyPDF() |
237 | 0 | { |
238 | 0 | processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF)); |
239 | 0 | } |
240 | | |
241 | | void |
242 | | QPDF::registerStreamFilter( |
243 | | std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory) |
244 | 0 | { |
245 | 0 | qpdf::Stream::registerStreamFilter(filter_name, factory); |
246 | 0 | } |
247 | | |
248 | | void |
249 | | QPDF::setIgnoreXRefStreams(bool val) |
250 | 0 | { |
251 | 0 | m->ignore_xref_streams = val; |
252 | 0 | } |
253 | | |
254 | | std::shared_ptr<QPDFLogger> |
255 | | QPDF::getLogger() |
256 | 0 | { |
257 | 0 | return m->log; |
258 | 0 | } |
259 | | |
260 | | void |
261 | | QPDF::setLogger(std::shared_ptr<QPDFLogger> l) |
262 | 0 | { |
263 | 0 | m->log = l; |
264 | 0 | } |
265 | | |
266 | | void |
267 | | QPDF::setOutputStreams(std::ostream* out, std::ostream* err) |
268 | 0 | { |
269 | 0 | setLogger(QPDFLogger::create()); |
270 | 0 | m->log->setOutputStreams(out, err); |
271 | 0 | } |
272 | | |
273 | | void |
274 | | QPDF::setSuppressWarnings(bool val) |
275 | 0 | { |
276 | 0 | m->suppress_warnings = val; |
277 | 0 | } |
278 | | |
279 | | void |
280 | | QPDF::setMaxWarnings(size_t val) |
281 | 24.5k | { |
282 | 24.5k | m->max_warnings = val; |
283 | 24.5k | } |
284 | | |
285 | | void |
286 | | QPDF::setAttemptRecovery(bool val) |
287 | 0 | { |
288 | 0 | m->attempt_recovery = val; |
289 | 0 | } |
290 | | |
291 | | void |
292 | | QPDF::setImmediateCopyFrom(bool val) |
293 | 0 | { |
294 | 0 | m->immediate_copy_from = val; |
295 | 0 | } |
296 | | |
297 | | std::vector<QPDFExc> |
298 | | QPDF::getWarnings() |
299 | 0 | { |
300 | 0 | std::vector<QPDFExc> result = m->warnings; |
301 | 0 | m->warnings.clear(); |
302 | 0 | return result; |
303 | 0 | } |
304 | | |
305 | | bool |
306 | | QPDF::anyWarnings() const |
307 | 0 | { |
308 | 0 | return !m->warnings.empty(); |
309 | 0 | } |
310 | | |
311 | | size_t |
312 | | QPDF::numWarnings() const |
313 | 0 | { |
314 | 0 | return m->warnings.size(); |
315 | 0 | } |
316 | | |
317 | | bool |
318 | | QPDF::validatePDFVersion(char const*& p, std::string& version) |
319 | 6.51k | { |
320 | 6.51k | if (!util::is_digit(*p)) { |
321 | 3.09k | return false; |
322 | 3.09k | } |
323 | 9.14k | while (util::is_digit(*p)) { |
324 | 5.72k | version.append(1, *p++); |
325 | 5.72k | } |
326 | 3.41k | if (!(*p == '.' && util::is_digit(*(p + 1)))) { |
327 | 1.03k | return false; |
328 | 1.03k | } |
329 | 2.38k | version.append(1, *p++); |
330 | 10.4k | while (util::is_digit(*p)) { |
331 | 8.02k | version.append(1, *p++); |
332 | 8.02k | } |
333 | 2.38k | return true; |
334 | 3.41k | } |
335 | | |
336 | | bool |
337 | | QPDF::findHeader() |
338 | 6.52k | { |
339 | 6.52k | qpdf_offset_t global_offset = m->file->tell(); |
340 | 6.52k | std::string line = m->file->readLine(1024); |
341 | 6.52k | char const* p = line.data(); |
342 | 6.52k | if (strncmp(p, "%PDF-", 5) != 0) { |
343 | 0 | throw std::logic_error("findHeader is not looking at %PDF-"); |
344 | 0 | } |
345 | 6.52k | p += 5; |
346 | 6.52k | std::string version; |
347 | | // Note: The string returned by line.data() is always null-terminated. The code below never |
348 | | // overruns the buffer because a null character always short-circuits further advancement. |
349 | 6.52k | if (!validatePDFVersion(p, version)) { |
350 | 4.13k | return false; |
351 | 4.13k | } |
352 | 2.38k | m->pdf_version = version; |
353 | 2.38k | if (global_offset != 0) { |
354 | | // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is |
355 | | // leading material prior to the PDF header, all explicit offsets in the file are such that |
356 | | // 0 points to the beginning of the header. |
357 | 1.33k | QTC::TC("qpdf", "QPDF global offset"); |
358 | 1.33k | m->file = std::make_shared<OffsetInputSource>(m->file, global_offset); |
359 | 1.33k | } |
360 | 2.38k | return true; |
361 | 6.52k | } |
362 | | |
363 | | void |
364 | | QPDF::warn(QPDFExc const& e) |
365 | 794k | { |
366 | 794k | if (m->max_warnings > 0 && m->warnings.size() >= m->max_warnings) { |
367 | 25.9k | stopOnError("Too many warnings - file is too badly damaged"); |
368 | 25.9k | } |
369 | 794k | m->warnings.push_back(e); |
370 | 794k | if (!m->suppress_warnings) { |
371 | 769k | *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n"; |
372 | 769k | } |
373 | 794k | } |
374 | | |
375 | | void |
376 | | QPDF::warn( |
377 | | qpdf_error_code_e error_code, |
378 | | std::string const& object, |
379 | | qpdf_offset_t offset, |
380 | | std::string const& message) |
381 | 6.14k | { |
382 | 6.14k | warn(QPDFExc(error_code, getFilename(), object, offset, message)); |
383 | 6.14k | } |
384 | | |
385 | | QPDFObjectHandle |
386 | | QPDF::newReserved() |
387 | 0 | { |
388 | 0 | return m->objects.makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Reserved>()); |
389 | 0 | } |
390 | | |
391 | | QPDFObjectHandle |
392 | | QPDF::newIndirectNull() |
393 | 0 | { |
394 | 0 | return m->objects.makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Null>()); |
395 | 0 | } |
396 | | |
397 | | QPDFObjectHandle |
398 | | QPDF::newStream() |
399 | 0 | { |
400 | 0 | return makeIndirectObject( |
401 | 0 | qpdf::Stream(*this, m->objects.nextObjGen(), Dictionary::empty(), 0, 0)); |
402 | 0 | } |
403 | | |
404 | | QPDFObjectHandle |
405 | | QPDF::newStream(std::shared_ptr<Buffer> data) |
406 | 0 | { |
407 | 0 | auto result = newStream(); |
408 | 0 | result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); |
409 | 0 | return result; |
410 | 0 | } |
411 | | |
412 | | QPDFObjectHandle |
413 | | QPDF::newStream(std::string const& data) |
414 | 0 | { |
415 | 0 | auto result = newStream(); |
416 | 0 | result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); |
417 | 0 | return result; |
418 | 0 | } |
419 | | |
420 | | QPDFObjectHandle |
421 | | QPDF::getObject(int objid, int generation) |
422 | 27.6k | { |
423 | 27.6k | return getObject(QPDFObjGen(objid, generation)); |
424 | 27.6k | } |
425 | | |
426 | | QPDFObjectHandle |
427 | | QPDF::getObjectByObjGen(QPDFObjGen og) |
428 | 0 | { |
429 | 0 | return getObject(og); |
430 | 0 | } |
431 | | |
432 | | QPDFObjectHandle |
433 | | QPDF::getObjectByID(int objid, int generation) |
434 | 0 | { |
435 | 0 | return getObject(QPDFObjGen(objid, generation)); |
436 | 0 | } |
437 | | |
438 | | QPDFObjectHandle |
439 | | QPDF::copyForeignObject(QPDFObjectHandle foreign) |
440 | 0 | { |
441 | 0 | return m->objects.foreign().copied(foreign); |
442 | 0 | } |
443 | | |
444 | | Objects ::Foreign::Copier& |
445 | | Objects::Foreign::copier(QPDFObjectHandle const& foreign) |
446 | 0 | { |
447 | 0 | if (!foreign.isIndirect()) { |
448 | 0 | throw std::logic_error("QPDF::copyForeign called with direct object handle"); |
449 | 0 | } |
450 | 0 | QPDF& other = *foreign.qpdf(); |
451 | 0 | if (&other == &qpdf) { |
452 | 0 | throw std::logic_error("QPDF::copyForeign called with object from this QPDF"); |
453 | 0 | } |
454 | 0 | return copiers.insert({other.getUniqueId(), {qpdf}}).first->second; |
455 | 0 | } |
456 | | |
457 | | QPDFObjectHandle |
458 | | Objects::Foreign::Copier::copied(QPDFObjectHandle const& foreign) |
459 | 0 | { |
460 | | // Here's an explanation of what's going on here. |
461 | | // |
462 | | // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and |
463 | | // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a |
464 | | // foreign QPDF into the local QPDF, we have to replace all indirect object references with |
465 | | // references to the corresponding object in the local file. |
466 | | // |
467 | | // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign |
468 | | // QPDF that we are copying from. The mapping is stored in an Foreign::Copier, which contains a |
469 | | // mapping from the foreign ObjGen to the local QPDFObjectHandle. |
470 | | // |
471 | | // To copy, we do a deep traversal of the foreign object with loop detection to discover all |
472 | | // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an |
473 | | // indirect object, we check to see if we have already created a local copy of it. If not, we |
474 | | // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the |
475 | | // mapping from the foreign object ID to the new object. While we |
476 | | // do this, we keep a list of objects to copy. |
477 | | // |
478 | | // Once we are done with the traversal, we copy all the objects that we need to copy. However, |
479 | | // the copies will contain indirect object IDs that refer to objects in the foreign file. We |
480 | | // need to replace them with references to objects in the local file. This is what |
481 | | // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with |
482 | | // all the indirect references replaced with new ones in the local context, we can replace the |
483 | | // local reserved object with the copy. This mechanism allows us to copy objects with circular |
484 | | // references in any order. |
485 | | |
486 | | // For streams, rather than copying the objects, we set up the stream data to pull from the |
487 | | // original stream by using a stream data provider. This is done in a manner that doesn't |
488 | | // require the original QPDF object but may require the original source of the stream data with |
489 | | // special handling for immediate_copy_from. This logic is also in |
490 | | // replaceForeignIndirectObjects. |
491 | | |
492 | | // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented |
493 | | // use case to copy pages this way if the intention is to not update the pages tree. |
494 | |
|
495 | 0 | util::assertion( |
496 | 0 | visiting.empty(), "obj_copier.visiting is not empty at the beginning of copyForeignObject"); |
497 | | |
498 | | // Make sure we have an object in this file for every referenced object in the old file. |
499 | | // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we |
500 | | // have to copy, the local object will be a reservation, unless it is a stream, in which case |
501 | | // the local object will already be a stream. |
502 | 0 | reserve_objects(foreign, true); |
503 | |
|
504 | 0 | util::assertion(visiting.empty(), "obj_copier.visiting is not empty after reserving objects"); |
505 | | |
506 | | // Copy any new objects and replace the reservations. |
507 | 0 | for (auto& oh: to_copy) { |
508 | 0 | auto copy = replace_indirect_object(oh, true); |
509 | 0 | if (!oh.isStream()) { |
510 | 0 | qpdf.replaceReserved(object_map[oh], copy); |
511 | 0 | } |
512 | 0 | } |
513 | 0 | to_copy.clear(); |
514 | |
|
515 | 0 | auto og = foreign.getObjGen(); |
516 | 0 | if (!object_map.contains(og)) { |
517 | 0 | qpdf.warn(qpdf.damagedPDF( |
518 | 0 | foreign.qpdf()->getFilename() + " object " + og.unparse(' '), |
519 | 0 | foreign.offset(), |
520 | 0 | "unexpected reference to /Pages object while copying foreign object; replacing with " |
521 | 0 | "null")); |
522 | 0 | return QPDFObjectHandle::newNull(); |
523 | 0 | } |
524 | 0 | return object_map[foreign]; |
525 | 0 | } |
526 | | |
527 | | void |
528 | | Objects::Foreign::Copier::reserve_objects(QPDFObjectHandle const& foreign, bool top) |
529 | 0 | { |
530 | 0 | auto foreign_tc = foreign.type_code(); |
531 | 0 | util::assertion( |
532 | 0 | foreign_tc != ::ot_reserved, "QPDF: attempting to copy a foreign reserved object"); |
533 | |
|
534 | 0 | if (foreign.isPagesObject()) { |
535 | 0 | return; |
536 | 0 | } |
537 | | |
538 | 0 | if (foreign.indirect()) { |
539 | 0 | QPDFObjGen foreign_og(foreign.getObjGen()); |
540 | 0 | if (!visiting.add(foreign_og)) { |
541 | 0 | return; |
542 | 0 | } |
543 | 0 | if (object_map.contains(foreign_og)) { |
544 | 0 | if (!(top && foreign.isPageObject() && object_map[foreign_og].null())) { |
545 | 0 | visiting.erase(foreign); |
546 | 0 | return; |
547 | 0 | } |
548 | 0 | } else { |
549 | 0 | object_map[foreign_og] = foreign.isStream() ? qpdf.newStream() : qpdf.newIndirectNull(); |
550 | 0 | if (!top && foreign.isPageObject()) { |
551 | 0 | visiting.erase(foreign_og); |
552 | 0 | return; |
553 | 0 | } |
554 | 0 | } |
555 | 0 | to_copy.emplace_back(foreign); |
556 | 0 | } |
557 | | |
558 | 0 | if (foreign_tc == ::ot_array) { |
559 | 0 | for (auto const& item: Array(foreign)) { |
560 | 0 | reserve_objects(item); |
561 | 0 | } |
562 | 0 | } else if (foreign_tc == ::ot_dictionary) { |
563 | 0 | for (auto const& item: Dictionary(foreign)) { |
564 | 0 | if (!item.second.null()) { |
565 | 0 | reserve_objects(item.second); |
566 | 0 | } |
567 | 0 | } |
568 | 0 | } else if (foreign_tc == ::ot_stream) { |
569 | 0 | reserve_objects(foreign.getDict()); |
570 | 0 | } |
571 | |
|
572 | 0 | visiting.erase(foreign); |
573 | 0 | } |
574 | | |
575 | | QPDFObjectHandle |
576 | | Objects::Foreign::Copier::replace_indirect_object(QPDFObjectHandle const& foreign, bool top) |
577 | 0 | { |
578 | 0 | auto foreign_tc = foreign.type_code(); |
579 | |
|
580 | 0 | if (!top && foreign.indirect()) { |
581 | 0 | auto mapping = object_map.find(foreign.id_gen()); |
582 | 0 | if (mapping == object_map.end()) { |
583 | | // This case would occur if this is a reference to a Pages object that we didn't |
584 | | // traverse into. |
585 | 0 | return QPDFObjectHandle::newNull(); |
586 | 0 | } |
587 | 0 | return mapping->second; |
588 | 0 | } |
589 | | |
590 | 0 | if (foreign_tc == ::ot_array) { |
591 | 0 | Array array = foreign; |
592 | 0 | std::vector<QPDFObjectHandle> result; |
593 | 0 | result.reserve(array.size()); |
594 | 0 | for (auto const& item: array) { |
595 | 0 | result.emplace_back(replace_indirect_object(item)); |
596 | 0 | } |
597 | 0 | return Array(std::move(result)); |
598 | 0 | } |
599 | | |
600 | 0 | if (foreign_tc == ::ot_dictionary) { |
601 | 0 | auto result = Dictionary::empty(); |
602 | 0 | for (auto const& [key, value]: Dictionary(foreign)) { |
603 | 0 | if (!value.null()) { |
604 | 0 | result.replaceKey(key, replace_indirect_object(value)); |
605 | 0 | } |
606 | 0 | } |
607 | 0 | return result; |
608 | 0 | } |
609 | | |
610 | 0 | if (foreign_tc == ::ot_stream) { |
611 | 0 | Stream stream = foreign; |
612 | 0 | Stream result = object_map[foreign]; |
613 | 0 | auto dict = result.getDict(); |
614 | 0 | for (auto const& [key, value]: stream.getDict()) { |
615 | 0 | if (!value.null()) { |
616 | 0 | dict.replaceKey(key, replace_indirect_object(value)); |
617 | 0 | } |
618 | 0 | } |
619 | 0 | stream.copy_data_to(result); |
620 | 0 | return result; |
621 | 0 | } |
622 | | |
623 | 0 | foreign.assertScalar(); |
624 | 0 | auto result = foreign; |
625 | 0 | result.makeDirect(); |
626 | 0 | return result; |
627 | 0 | } |
628 | | |
629 | | unsigned long long |
630 | | QPDF::getUniqueId() const |
631 | 0 | { |
632 | 0 | return m->unique_id; |
633 | 0 | } |
634 | | |
635 | | std::string |
636 | | QPDF::getFilename() const |
637 | 67.0k | { |
638 | 67.0k | return m->file->getName(); |
639 | 67.0k | } |
640 | | |
641 | | PDFVersion |
642 | | QPDF::getVersionAsPDFVersion() |
643 | 0 | { |
644 | 0 | int major = 1; |
645 | 0 | int minor = 3; |
646 | 0 | int extension_level = getExtensionLevel(); |
647 | |
|
648 | 0 | std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)"); |
649 | 0 | std::smatch match; |
650 | 0 | if (std::regex_search(m->pdf_version, match, v)) { |
651 | 0 | major = QUtil::string_to_int(match[1].str().c_str()); |
652 | 0 | minor = QUtil::string_to_int(match[2].str().c_str()); |
653 | 0 | } |
654 | |
|
655 | 0 | return {major, minor, extension_level}; |
656 | 0 | } |
657 | | |
658 | | std::string |
659 | | QPDF::getPDFVersion() const |
660 | 9.17k | { |
661 | 9.17k | return m->pdf_version; |
662 | 9.17k | } |
663 | | |
664 | | int |
665 | | QPDF::getExtensionLevel() |
666 | 9.17k | { |
667 | 9.17k | int result = 0; |
668 | 9.17k | QPDFObjectHandle obj = getRoot(); |
669 | 9.17k | if (obj.hasKey("/Extensions")) { |
670 | 383 | obj = obj.getKey("/Extensions"); |
671 | 383 | if (obj.isDictionary() && obj.hasKey("/ADBE")) { |
672 | 275 | obj = obj.getKey("/ADBE"); |
673 | 275 | if (obj.isDictionary() && obj.hasKey("/ExtensionLevel")) { |
674 | 10 | obj = obj.getKey("/ExtensionLevel"); |
675 | 10 | if (obj.isInteger()) { |
676 | 9 | result = obj.getIntValueAsInt(); |
677 | 9 | } |
678 | 10 | } |
679 | 275 | } |
680 | 383 | } |
681 | 9.17k | return result; |
682 | 9.17k | } |
683 | | |
684 | | QPDFObjectHandle |
685 | | QPDF::getTrailer() |
686 | 35.8k | { |
687 | 35.8k | return m->trailer; |
688 | 35.8k | } |
689 | | |
690 | | QPDFObjectHandle |
691 | | QPDF::getRoot() |
692 | 65.0k | { |
693 | 65.0k | QPDFObjectHandle root = m->trailer.getKey("/Root"); |
694 | 65.0k | if (!root.isDictionary()) { |
695 | 5.06k | throw damagedPDF("", -1, "unable to find /Root dictionary"); |
696 | 60.0k | } else if ( |
697 | | // Check_mode is an interim solution to request #810 pending a more comprehensive review of |
698 | | // the approach to more extensive checks and warning levels. |
699 | 60.0k | m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) { |
700 | 0 | warn(damagedPDF("", -1, "catalog /Type entry missing or invalid")); |
701 | 0 | root.replaceKey("/Type", "/Catalog"_qpdf); |
702 | 0 | } |
703 | 60.0k | return root; |
704 | 65.0k | } |
705 | | |
706 | | std::map<QPDFObjGen, QPDFXRefEntry> |
707 | | QPDF::getXRefTable() |
708 | 0 | { |
709 | 0 | return m->objects.getXRefTableInternal(); |
710 | 0 | } |
711 | | |
712 | | std::map<QPDFObjGen, QPDFXRefEntry> const& |
713 | | Objects::getXRefTableInternal() |
714 | 9.19k | { |
715 | 9.19k | if (!m->parsed) { |
716 | 0 | throw std::logic_error("QPDF::getXRefTable called before parsing."); |
717 | 0 | } |
718 | | |
719 | 9.19k | return m->xref_table; |
720 | 9.19k | } |
721 | | |
722 | | bool |
723 | | QPDF::pipeStreamData( |
724 | | std::shared_ptr<EncryptionParameters> encp, |
725 | | std::shared_ptr<InputSource> file, |
726 | | QPDF& qpdf_for_warning, |
727 | | QPDFObjGen og, |
728 | | qpdf_offset_t offset, |
729 | | size_t length, |
730 | | QPDFObjectHandle stream_dict, |
731 | | bool is_root_metadata, |
732 | | Pipeline* pipeline, |
733 | | bool suppress_warnings, |
734 | | bool will_retry) |
735 | 37.8k | { |
736 | 37.8k | std::unique_ptr<Pipeline> to_delete; |
737 | 37.8k | if (encp->encrypted) { |
738 | 5.22k | decryptStream( |
739 | 5.22k | encp, file, qpdf_for_warning, pipeline, og, stream_dict, is_root_metadata, to_delete); |
740 | 5.22k | } |
741 | | |
742 | 37.8k | bool attempted_finish = false; |
743 | 37.8k | try { |
744 | 37.8k | auto buf = file->read(length, offset); |
745 | 37.8k | if (buf.size() != length) { |
746 | 0 | throw damagedPDF( |
747 | 0 | *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data"); |
748 | 0 | } |
749 | 37.8k | pipeline->write(buf.data(), length); |
750 | 37.8k | attempted_finish = true; |
751 | 37.8k | pipeline->finish(); |
752 | 37.8k | return true; |
753 | 37.8k | } catch (QPDFExc& e) { |
754 | 1 | if (!suppress_warnings) { |
755 | 1 | qpdf_for_warning.warn(e); |
756 | 1 | } |
757 | 10.3k | } catch (std::exception& e) { |
758 | 10.3k | if (!suppress_warnings) { |
759 | 10.3k | QTC::TC("qpdf", "QPDF decoding error warning"); |
760 | 10.3k | qpdf_for_warning.warn( |
761 | | // line-break |
762 | 10.3k | damagedPDF( |
763 | 10.3k | *file, |
764 | 10.3k | "", |
765 | 10.3k | file->getLastOffset(), |
766 | 10.3k | ("error decoding stream data for object " + og.unparse(' ') + ": " + |
767 | 10.3k | e.what()))); |
768 | 10.3k | if (will_retry) { |
769 | 8.86k | qpdf_for_warning.warn( |
770 | | // line-break |
771 | 8.86k | damagedPDF( |
772 | 8.86k | *file, |
773 | 8.86k | "", |
774 | 8.86k | file->getLastOffset(), |
775 | 8.86k | "stream will be re-processed without filtering to avoid data loss")); |
776 | 8.86k | } |
777 | 10.3k | } |
778 | 10.3k | } |
779 | 10.3k | if (!attempted_finish) { |
780 | 4.97k | try { |
781 | 4.97k | pipeline->finish(); |
782 | 4.97k | } catch (std::exception&) { |
783 | | // ignore |
784 | 3.29k | } |
785 | 4.97k | } |
786 | 10.3k | return false; |
787 | 10.3k | } |
788 | | |
789 | | bool |
790 | | QPDF::pipeStreamData( |
791 | | QPDFObjGen og, |
792 | | qpdf_offset_t offset, |
793 | | size_t length, |
794 | | QPDFObjectHandle stream_dict, |
795 | | bool is_root_metadata, |
796 | | Pipeline* pipeline, |
797 | | bool suppress_warnings, |
798 | | bool will_retry) |
799 | 37.8k | { |
800 | 37.8k | return pipeStreamData( |
801 | 37.8k | m->encp, |
802 | 37.8k | m->file, |
803 | 37.8k | *this, |
804 | 37.8k | og, |
805 | 37.8k | offset, |
806 | 37.8k | length, |
807 | 37.8k | stream_dict, |
808 | 37.8k | is_root_metadata, |
809 | 37.8k | pipeline, |
810 | 37.8k | suppress_warnings, |
811 | 37.8k | will_retry); |
812 | 37.8k | } |
813 | | |
814 | | // Throw a generic exception when we lack context for something more specific. New code should not |
815 | | // use this. |
816 | | void |
817 | | QPDF::stopOnError(std::string const& message) |
818 | 25.9k | { |
819 | 25.9k | throw damagedPDF("", message); |
820 | 25.9k | } |
821 | | |
822 | | // Return an exception of type qpdf_e_damaged_pdf. |
823 | | QPDFExc |
824 | | QPDF::damagedPDF( |
825 | | InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message) |
826 | 73.6k | { |
827 | 73.6k | return {qpdf_e_damaged_pdf, input.getName(), object, offset, message, true}; |
828 | 73.6k | } |
829 | | |
830 | | // Return an exception of type qpdf_e_damaged_pdf. The object is taken from |
831 | | // m->last_object_description. |
832 | | QPDFExc |
833 | | QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message) |
834 | 54.4k | { |
835 | 54.4k | return damagedPDF(input, m->last_object_description, offset, message); |
836 | 54.4k | } |
837 | | |
838 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file. |
839 | | QPDFExc |
840 | | QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message) |
841 | 287k | { |
842 | 287k | return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message, true}; |
843 | 287k | } |
844 | | |
845 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the |
846 | | // offset from .m->file->getLastOffset(). |
847 | | QPDFExc |
848 | | QPDF::damagedPDF(std::string const& object, std::string const& message) |
849 | 68.8k | { |
850 | 68.8k | return damagedPDF(object, m->file->getLastOffset(), message); |
851 | 68.8k | } |
852 | | |
853 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object |
854 | | // from .m->last_object_description. |
855 | | QPDFExc |
856 | | QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message) |
857 | 45.6k | { |
858 | 45.6k | return damagedPDF(m->last_object_description, offset, message); |
859 | 45.6k | } |
860 | | |
861 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object |
862 | | // from m->last_object_description and the offset from m->file->getLastOffset(). |
863 | | QPDFExc |
864 | | QPDF::damagedPDF(std::string const& message) |
865 | 46.3k | { |
866 | 46.3k | return damagedPDF(m->last_object_description, m->file->getLastOffset(), message); |
867 | 46.3k | } |
868 | | |
869 | | bool |
870 | | QPDF::everCalledGetAllPages() const |
871 | 0 | { |
872 | 0 | return m->ever_called_get_all_pages; |
873 | 0 | } |
874 | | |
875 | | bool |
876 | | QPDF::everPushedInheritedAttributesToPages() const |
877 | 0 | { |
878 | 0 | return m->ever_pushed_inherited_attributes_to_pages; |
879 | 0 | } |
880 | | |
881 | | void |
882 | | QPDF::removeSecurityRestrictions() |
883 | 0 | { |
884 | 0 | auto root = getRoot(); |
885 | 0 | root.removeKey("/Perms"); |
886 | 0 | auto acroform = root.getKey("/AcroForm"); |
887 | 0 | if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) { |
888 | 0 | acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0)); |
889 | 0 | } |
890 | 0 | } |