/src/qpdf/libqpdf/QPDF.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/qpdf-config.h> // include first for large file support |
2 | | |
3 | | #include <qpdf/QPDF_private.hh> |
4 | | |
5 | | #include <array> |
6 | | #include <atomic> |
7 | | #include <cstring> |
8 | | #include <limits> |
9 | | #include <map> |
10 | | #include <regex> |
11 | | #include <sstream> |
12 | | #include <vector> |
13 | | |
14 | | #include <qpdf/BufferInputSource.hh> |
15 | | #include <qpdf/FileInputSource.hh> |
16 | | #include <qpdf/InputSource_private.hh> |
17 | | #include <qpdf/OffsetInputSource.hh> |
18 | | #include <qpdf/Pipeline.hh> |
19 | | #include <qpdf/QPDFExc.hh> |
20 | | #include <qpdf/QPDFLogger.hh> |
21 | | #include <qpdf/QPDFObjectHandle_private.hh> |
22 | | #include <qpdf/QPDFObject_private.hh> |
23 | | #include <qpdf/QPDFParser.hh> |
24 | | #include <qpdf/QTC.hh> |
25 | | #include <qpdf/QUtil.hh> |
26 | | #include <qpdf/Util.hh> |
27 | | |
28 | | using namespace qpdf; |
29 | | using namespace std::literals; |
30 | | |
31 | | // This must be a fixed value. This API returns a const reference to it, and the C API relies on its |
32 | | // being static as well. |
33 | | std::string const QPDF::qpdf_version(QPDF_VERSION); |
34 | | |
35 | | static char const* EMPTY_PDF = ( |
36 | | // force line break |
37 | | "%PDF-1.3\n" |
38 | | "1 0 obj\n" |
39 | | "<< /Type /Catalog /Pages 2 0 R >>\n" |
40 | | "endobj\n" |
41 | | "2 0 obj\n" |
42 | | "<< /Type /Pages /Kids [] /Count 0 >>\n" |
43 | | "endobj\n" |
44 | | "xref\n" |
45 | | "0 3\n" |
46 | | "0000000000 65535 f \n" |
47 | | "0000000009 00000 n \n" |
48 | | "0000000058 00000 n \n" |
49 | | "trailer << /Size 3 /Root 1 0 R >>\n" |
50 | | "startxref\n" |
51 | | "110\n" |
52 | | "%%EOF\n"); |
53 | | |
54 | | namespace |
55 | | { |
56 | | class InvalidInputSource: public InputSource |
57 | | { |
58 | | public: |
59 | | ~InvalidInputSource() override = default; |
60 | | qpdf_offset_t |
61 | | findAndSkipNextEOL() override |
62 | 0 | { |
63 | 0 | throwException(); |
64 | 0 | return 0; |
65 | 0 | } |
66 | | std::string const& |
67 | | getName() const override |
68 | 0 | { |
69 | 0 | static std::string name("closed input source"); |
70 | 0 | return name; |
71 | 0 | } |
72 | | qpdf_offset_t |
73 | | tell() override |
74 | 0 | { |
75 | 0 | throwException(); |
76 | 0 | return 0; |
77 | 0 | } |
78 | | void |
79 | | seek(qpdf_offset_t offset, int whence) override |
80 | 0 | { |
81 | 0 | throwException(); |
82 | 0 | } |
83 | | void |
84 | | rewind() override |
85 | 0 | { |
86 | 0 | throwException(); |
87 | 0 | } |
88 | | size_t |
89 | | read(char* buffer, size_t length) override |
90 | 0 | { |
91 | 0 | throwException(); |
92 | 0 | return 0; |
93 | 0 | } |
94 | | void |
95 | | unreadCh(char ch) override |
96 | 0 | { |
97 | 0 | throwException(); |
98 | 0 | } |
99 | | |
100 | | private: |
101 | | void |
102 | | throwException() |
103 | 0 | { |
104 | 0 | throw std::logic_error( |
105 | 0 | "QPDF operation attempted on a QPDF object with no input " |
106 | 0 | "source. QPDF operations are invalid before processFile (or " |
107 | 0 | "another process method) or after closeInputSource"); |
108 | 0 | } |
109 | | }; |
110 | | } // namespace |
111 | | |
112 | | QPDF::ForeignStreamData::ForeignStreamData( |
113 | | std::shared_ptr<EncryptionParameters> encp, |
114 | | std::shared_ptr<InputSource> file, |
115 | | QPDFObjGen foreign_og, |
116 | | qpdf_offset_t offset, |
117 | | size_t length, |
118 | | QPDFObjectHandle local_dict, |
119 | | bool is_root_metadata) : |
120 | 0 | encp(encp), |
121 | 0 | file(file), |
122 | 0 | foreign_og(foreign_og), |
123 | 0 | offset(offset), |
124 | 0 | length(length), |
125 | 0 | local_dict(local_dict), |
126 | 0 | is_root_metadata(is_root_metadata) |
127 | 0 | { |
128 | 0 | } |
129 | | |
130 | | QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(QPDF& destination_qpdf) : |
131 | 0 | QPDFObjectHandle::StreamDataProvider(true), |
132 | 0 | destination_qpdf(destination_qpdf) |
133 | 0 | { |
134 | 0 | } |
135 | | |
136 | | bool |
137 | | QPDF::CopiedStreamDataProvider::provideStreamData( |
138 | | QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) |
139 | 0 | { |
140 | 0 | std::shared_ptr<ForeignStreamData> foreign_data = foreign_stream_data[og]; |
141 | 0 | bool result = false; |
142 | 0 | if (foreign_data.get()) { |
143 | 0 | result = destination_qpdf.pipeForeignStreamData( |
144 | 0 | foreign_data, pipeline, suppress_warnings, will_retry); |
145 | 0 | QTC::TC("qpdf", "QPDF copy foreign with data", result ? 0 : 1); |
146 | 0 | } else { |
147 | 0 | auto foreign_stream = foreign_streams[og]; |
148 | 0 | result = foreign_stream.pipeStreamData( |
149 | 0 | pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry); |
150 | 0 | QTC::TC("qpdf", "QPDF copy foreign with foreign_stream", result ? 0 : 1); |
151 | 0 | } |
152 | 0 | return result; |
153 | 0 | } |
154 | | |
155 | | void |
156 | | QPDF::CopiedStreamDataProvider::registerForeignStream( |
157 | | QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream) |
158 | 0 | { |
159 | 0 | this->foreign_streams[local_og] = foreign_stream; |
160 | 0 | } |
161 | | |
162 | | void |
163 | | QPDF::CopiedStreamDataProvider::registerForeignStream( |
164 | | QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData> foreign_stream) |
165 | 0 | { |
166 | 0 | this->foreign_stream_data[local_og] = foreign_stream; |
167 | 0 | } |
168 | | |
169 | | QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) : |
170 | 0 | qpdf(qpdf), |
171 | 0 | og(og) |
172 | 0 | { |
173 | 0 | } |
174 | | |
175 | | std::string const& |
176 | | QPDF::QPDFVersion() |
177 | 0 | { |
178 | | // The C API relies on this being a static value. |
179 | 0 | return QPDF::qpdf_version; |
180 | 0 | } |
181 | | |
182 | | QPDF::Members::Members() : |
183 | 0 | log(QPDFLogger::defaultLogger()), |
184 | 0 | file(new InvalidInputSource()), |
185 | 0 | encp(new EncryptionParameters) |
186 | 0 | { |
187 | 0 | } |
188 | | |
189 | | QPDF::QPDF() : |
190 | 0 | m(std::make_unique<Members>()) |
191 | 0 | { |
192 | 0 | m->tokenizer.allowEOF(); |
193 | | // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout |
194 | | // the lifetime of this running application. |
195 | 0 | static std::atomic<unsigned long long> unique_id{0}; |
196 | 0 | m->unique_id = unique_id.fetch_add(1ULL); |
197 | 0 | } |
198 | | |
199 | | // Provide access to disconnect(). Disconnect will in due course be merged into the current ObjCache |
200 | | // (future Objects::Entry) to centralize all QPDF access to QPDFObject. |
201 | | class Disconnect: BaseHandle |
202 | | { |
203 | | public: |
204 | | Disconnect(std::shared_ptr<QPDFObject> const& obj) : |
205 | 0 | BaseHandle(obj) |
206 | 0 | { |
207 | 0 | } |
208 | | void |
209 | | disconnect() |
210 | 0 | { |
211 | 0 | BaseHandle::disconnect(false); |
212 | 0 | if (raw_type_code() != ::ot_null) { |
213 | 0 | obj->value = QPDF_Destroyed(); |
214 | 0 | } |
215 | 0 | } |
216 | | }; |
217 | | |
218 | | QPDF::~QPDF() |
219 | 0 | { |
220 | | // If two objects are mutually referential (through each object having an array or dictionary |
221 | | // that contains an indirect reference to the other), the circular references in the |
222 | | // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects |
223 | | // in the object cache, which is those objects that we read from the file, and break all |
224 | | // resolved indirect references by replacing them with an internal object type representing that |
225 | | // they have been destroyed. Note that we can't break references like this at any time when the |
226 | | // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that |
227 | | // are reachable from this object to release their association with this QPDF. Direct objects |
228 | | // are not destroyed since they can be moved to other QPDF objects safely. |
229 | | |
230 | | // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear |
231 | | // the xref table anyway just to prevent any possibility of resolve() succeeding. |
232 | 0 | m->xref_table.clear(); |
233 | 0 | for (auto const& iter: m->obj_cache) { |
234 | 0 | Disconnect(iter.second.object).disconnect(); |
235 | 0 | } |
236 | 0 | } |
237 | | |
238 | | std::shared_ptr<QPDF> |
239 | | QPDF::create() |
240 | 0 | { |
241 | 0 | return std::make_shared<QPDF>(); |
242 | 0 | } |
243 | | |
244 | | void |
245 | | QPDF::processFile(char const* filename, char const* password) |
246 | 0 | { |
247 | 0 | auto* fi = new FileInputSource(filename); |
248 | 0 | processInputSource(std::shared_ptr<InputSource>(fi), password); |
249 | 0 | } |
250 | | |
251 | | void |
252 | | QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password) |
253 | 0 | { |
254 | 0 | auto* fi = new FileInputSource(description, filep, close_file); |
255 | 0 | processInputSource(std::shared_ptr<InputSource>(fi), password); |
256 | 0 | } |
257 | | |
258 | | void |
259 | | QPDF::processMemoryFile( |
260 | | char const* description, char const* buf, size_t length, char const* password) |
261 | 0 | { |
262 | 0 | processInputSource( |
263 | 0 | std::shared_ptr<InputSource>( |
264 | | // line-break |
265 | 0 | new BufferInputSource( |
266 | 0 | description, new Buffer(QUtil::unsigned_char_pointer(buf), length), true)), |
267 | 0 | password); |
268 | 0 | } |
269 | | |
270 | | void |
271 | | QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password) |
272 | 0 | { |
273 | 0 | m->file = source; |
274 | 0 | parse(password); |
275 | 0 | } |
276 | | |
277 | | void |
278 | | QPDF::closeInputSource() |
279 | 0 | { |
280 | 0 | m->file = std::shared_ptr<InputSource>(new InvalidInputSource()); |
281 | 0 | } |
282 | | |
283 | | void |
284 | | QPDF::setPasswordIsHexKey(bool val) |
285 | 0 | { |
286 | 0 | m->provided_password_is_hex_key = val; |
287 | 0 | } |
288 | | |
289 | | void |
290 | | QPDF::emptyPDF() |
291 | 0 | { |
292 | 0 | processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF)); |
293 | 0 | } |
294 | | |
295 | | void |
296 | | QPDF::registerStreamFilter( |
297 | | std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory) |
298 | 0 | { |
299 | 0 | qpdf::Stream::registerStreamFilter(filter_name, factory); |
300 | 0 | } |
301 | | |
302 | | void |
303 | | QPDF::setIgnoreXRefStreams(bool val) |
304 | 0 | { |
305 | 0 | m->ignore_xref_streams = val; |
306 | 0 | } |
307 | | |
308 | | std::shared_ptr<QPDFLogger> |
309 | | QPDF::getLogger() |
310 | 0 | { |
311 | 0 | return m->log; |
312 | 0 | } |
313 | | |
314 | | void |
315 | | QPDF::setLogger(std::shared_ptr<QPDFLogger> l) |
316 | 0 | { |
317 | 0 | m->log = l; |
318 | 0 | } |
319 | | |
320 | | void |
321 | | QPDF::setOutputStreams(std::ostream* out, std::ostream* err) |
322 | 0 | { |
323 | 0 | setLogger(QPDFLogger::create()); |
324 | 0 | m->log->setOutputStreams(out, err); |
325 | 0 | } |
326 | | |
327 | | void |
328 | | QPDF::setSuppressWarnings(bool val) |
329 | 0 | { |
330 | 0 | m->suppress_warnings = val; |
331 | 0 | } |
332 | | |
333 | | void |
334 | | QPDF::setMaxWarnings(size_t val) |
335 | 0 | { |
336 | 0 | m->max_warnings = val; |
337 | 0 | } |
338 | | |
339 | | void |
340 | | QPDF::setAttemptRecovery(bool val) |
341 | 0 | { |
342 | 0 | m->attempt_recovery = val; |
343 | 0 | } |
344 | | |
345 | | void |
346 | | QPDF::setImmediateCopyFrom(bool val) |
347 | 0 | { |
348 | 0 | m->immediate_copy_from = val; |
349 | 0 | } |
350 | | |
351 | | std::vector<QPDFExc> |
352 | | QPDF::getWarnings() |
353 | 0 | { |
354 | 0 | std::vector<QPDFExc> result = m->warnings; |
355 | 0 | m->warnings.clear(); |
356 | 0 | return result; |
357 | 0 | } |
358 | | |
359 | | bool |
360 | | QPDF::anyWarnings() const |
361 | 0 | { |
362 | 0 | return !m->warnings.empty(); |
363 | 0 | } |
364 | | |
365 | | size_t |
366 | | QPDF::numWarnings() const |
367 | 0 | { |
368 | 0 | return m->warnings.size(); |
369 | 0 | } |
370 | | |
371 | | bool |
372 | | QPDF::validatePDFVersion(char const*& p, std::string& version) |
373 | 0 | { |
374 | 0 | bool valid = util::is_digit(*p); |
375 | 0 | if (valid) { |
376 | 0 | while (util::is_digit(*p)) { |
377 | 0 | version.append(1, *p++); |
378 | 0 | } |
379 | 0 | if ((*p == '.') && util::is_digit(*(p + 1))) { |
380 | 0 | version.append(1, *p++); |
381 | 0 | while (util::is_digit(*p)) { |
382 | 0 | version.append(1, *p++); |
383 | 0 | } |
384 | 0 | } else { |
385 | 0 | valid = false; |
386 | 0 | } |
387 | 0 | } |
388 | 0 | return valid; |
389 | 0 | } |
390 | | |
391 | | bool |
392 | | QPDF::findHeader() |
393 | 0 | { |
394 | 0 | qpdf_offset_t global_offset = m->file->tell(); |
395 | 0 | std::string line = m->file->readLine(1024); |
396 | 0 | char const* p = line.c_str(); |
397 | 0 | if (strncmp(p, "%PDF-", 5) != 0) { |
398 | 0 | throw std::logic_error("findHeader is not looking at %PDF-"); |
399 | 0 | } |
400 | 0 | p += 5; |
401 | 0 | std::string version; |
402 | | // Note: The string returned by line.c_str() is always null-terminated. The code below never |
403 | | // overruns the buffer because a null character always short-circuits further advancement. |
404 | 0 | bool valid = validatePDFVersion(p, version); |
405 | 0 | if (valid) { |
406 | 0 | m->pdf_version = version; |
407 | 0 | if (global_offset != 0) { |
408 | | // Empirical evidence strongly suggests that when there is leading material prior to the |
409 | | // PDF header, all explicit offsets in the file are such that 0 points to the beginning |
410 | | // of the header. |
411 | 0 | QTC::TC("qpdf", "QPDF global offset"); |
412 | 0 | m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset)); |
413 | 0 | } |
414 | 0 | } |
415 | 0 | return valid; |
416 | 0 | } |
417 | | |
418 | | void |
419 | | QPDF::warn(QPDFExc const& e) |
420 | 0 | { |
421 | 0 | if (m->max_warnings > 0 && m->warnings.size() >= m->max_warnings) { |
422 | 0 | stopOnError("Too many warnings - file is too badly damaged"); |
423 | 0 | } |
424 | 0 | m->warnings.push_back(e); |
425 | 0 | if (!m->suppress_warnings) { |
426 | 0 | *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n"; |
427 | 0 | } |
428 | 0 | } |
429 | | |
430 | | void |
431 | | QPDF::warn( |
432 | | qpdf_error_code_e error_code, |
433 | | std::string const& object, |
434 | | qpdf_offset_t offset, |
435 | | std::string const& message) |
436 | 0 | { |
437 | 0 | warn(QPDFExc(error_code, getFilename(), object, offset, message)); |
438 | 0 | } |
439 | | |
440 | | QPDFObjectHandle |
441 | | QPDF::newReserved() |
442 | 0 | { |
443 | 0 | return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Reserved>()); |
444 | 0 | } |
445 | | |
446 | | QPDFObjectHandle |
447 | | QPDF::newIndirectNull() |
448 | 0 | { |
449 | 0 | return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Null>()); |
450 | 0 | } |
451 | | |
452 | | QPDFObjectHandle |
453 | | QPDF::newStream() |
454 | 0 | { |
455 | 0 | return makeIndirectObject( |
456 | 0 | qpdf::Stream(*this, nextObjGen(), QPDFObjectHandle::newDictionary(), 0, 0)); |
457 | 0 | } |
458 | | |
459 | | QPDFObjectHandle |
460 | | QPDF::newStream(std::shared_ptr<Buffer> data) |
461 | 0 | { |
462 | 0 | auto result = newStream(); |
463 | 0 | result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); |
464 | 0 | return result; |
465 | 0 | } |
466 | | |
467 | | QPDFObjectHandle |
468 | | QPDF::newStream(std::string const& data) |
469 | 0 | { |
470 | 0 | auto result = newStream(); |
471 | 0 | result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); |
472 | 0 | return result; |
473 | 0 | } |
474 | | |
475 | | QPDFObjectHandle |
476 | | QPDF::getObject(int objid, int generation) |
477 | 0 | { |
478 | 0 | return getObject(QPDFObjGen(objid, generation)); |
479 | 0 | } |
480 | | |
481 | | QPDFObjectHandle |
482 | | QPDF::getObjectByObjGen(QPDFObjGen og) |
483 | 0 | { |
484 | 0 | return getObject(og); |
485 | 0 | } |
486 | | |
487 | | QPDFObjectHandle |
488 | | QPDF::getObjectByID(int objid, int generation) |
489 | 0 | { |
490 | 0 | return getObject(QPDFObjGen(objid, generation)); |
491 | 0 | } |
492 | | |
493 | | QPDFObjectHandle |
494 | | QPDF::copyForeignObject(QPDFObjectHandle foreign) |
495 | 0 | { |
496 | | // Here's an explanation of what's going on here. |
497 | | // |
498 | | // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and |
499 | | // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a |
500 | | // foreign QPDF into the local QPDF, we have to replace all indirect object references with |
501 | | // references to the corresponding object in the local file. |
502 | | // |
503 | | // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign |
504 | | // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a |
505 | | // mapping from the foreign ObjGen to the local QPDFObjectHandle. |
506 | | // |
507 | | // To copy, we do a deep traversal of the foreign object with loop detection to discover all |
508 | | // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an |
509 | | // indirect object, we check to see if we have already created a local copy of it. If not, we |
510 | | // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the |
511 | | // mapping from the foreign object ID to the new object. While we |
512 | | // do this, we keep a list of objects to copy. |
513 | | // |
514 | | // Once we are done with the traversal, we copy all the objects that we need to copy. However, |
515 | | // the copies will contain indirect object IDs that refer to objects in the foreign file. We |
516 | | // need to replace them with references to objects in the local file. This is what |
517 | | // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with |
518 | | // all the indirect references replaced with new ones in the local context, we can replace the |
519 | | // local reserved object with the copy. This mechanism allows us to copy objects with circular |
520 | | // references in any order. |
521 | | |
522 | | // For streams, rather than copying the objects, we set up the stream data to pull from the |
523 | | // original stream by using a stream data provider. This is done in a manner that doesn't |
524 | | // require the original QPDF object but may require the original source of the stream data with |
525 | | // special handling for immediate_copy_from. This logic is also in |
526 | | // replaceForeignIndirectObjects. |
527 | | |
528 | | // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented |
529 | | // use case to copy pages this way if the intention is to not update the pages tree. |
530 | 0 | if (!foreign.isIndirect()) { |
531 | 0 | QTC::TC("qpdf", "QPDF copyForeign direct"); |
532 | 0 | throw std::logic_error("QPDF::copyForeign called with direct object handle"); |
533 | 0 | } |
534 | 0 | QPDF& other = foreign.getQPDF(); |
535 | 0 | if (&other == this) { |
536 | 0 | QTC::TC("qpdf", "QPDF copyForeign not foreign"); |
537 | 0 | throw std::logic_error("QPDF::copyForeign called with object from this QPDF"); |
538 | 0 | } |
539 | | |
540 | 0 | ObjCopier& obj_copier = m->object_copiers[other.m->unique_id]; |
541 | 0 | if (!obj_copier.visiting.empty()) { |
542 | 0 | throw std::logic_error( |
543 | 0 | "obj_copier.visiting is not empty at the beginning of copyForeignObject"); |
544 | 0 | } |
545 | | |
546 | | // Make sure we have an object in this file for every referenced object in the old file. |
547 | | // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we |
548 | | // have to copy, the local object will be a reservation, unless it is a stream, in which case |
549 | | // the local object will already be a stream. |
550 | 0 | reserveObjects(foreign, obj_copier, true); |
551 | |
|
552 | 0 | if (!obj_copier.visiting.empty()) { |
553 | 0 | throw std::logic_error("obj_copier.visiting is not empty after reserving objects"); |
554 | 0 | } |
555 | | |
556 | | // Copy any new objects and replace the reservations. |
557 | 0 | for (auto& to_copy: obj_copier.to_copy) { |
558 | 0 | QPDFObjectHandle copy = replaceForeignIndirectObjects(to_copy, obj_copier, true); |
559 | 0 | if (!to_copy.isStream()) { |
560 | 0 | QPDFObjGen og(to_copy.getObjGen()); |
561 | 0 | replaceReserved(obj_copier.object_map[og], copy); |
562 | 0 | } |
563 | 0 | } |
564 | 0 | obj_copier.to_copy.clear(); |
565 | |
|
566 | 0 | auto og = foreign.getObjGen(); |
567 | 0 | if (!obj_copier.object_map.contains(og)) { |
568 | 0 | warn(damagedPDF( |
569 | 0 | other.getFilename() + " object " + og.unparse(' '), |
570 | 0 | foreign.getParsedOffset(), |
571 | 0 | "unexpected reference to /Pages object while copying foreign object; replacing with " |
572 | 0 | "null")); |
573 | 0 | return QPDFObjectHandle::newNull(); |
574 | 0 | } |
575 | 0 | return obj_copier.object_map[foreign.getObjGen()]; |
576 | 0 | } |
577 | | |
578 | | void |
579 | | QPDF::reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top) |
580 | 0 | { |
581 | 0 | auto foreign_tc = foreign.getTypeCode(); |
582 | 0 | if (foreign_tc == ::ot_reserved) { |
583 | 0 | throw std::logic_error("QPDF: attempting to copy a foreign reserved object"); |
584 | 0 | } |
585 | | |
586 | 0 | if (foreign.isPagesObject()) { |
587 | 0 | QTC::TC("qpdf", "QPDF not copying pages object"); |
588 | 0 | return; |
589 | 0 | } |
590 | | |
591 | 0 | if (foreign.isIndirect()) { |
592 | 0 | QPDFObjGen foreign_og(foreign.getObjGen()); |
593 | 0 | if (!obj_copier.visiting.add(foreign_og)) { |
594 | 0 | QTC::TC("qpdf", "QPDF loop reserving objects"); |
595 | 0 | return; |
596 | 0 | } |
597 | 0 | if (obj_copier.object_map.contains(foreign_og)) { |
598 | 0 | QTC::TC("qpdf", "QPDF already reserved object"); |
599 | 0 | if (!(top && foreign.isPageObject() && obj_copier.object_map[foreign_og].isNull())) { |
600 | 0 | obj_copier.visiting.erase(foreign); |
601 | 0 | return; |
602 | 0 | } |
603 | 0 | } else { |
604 | 0 | QTC::TC("qpdf", "QPDF copy indirect"); |
605 | 0 | obj_copier.object_map[foreign_og] = |
606 | 0 | foreign.isStream() ? newStream() : newIndirectNull(); |
607 | 0 | if ((!top) && foreign.isPageObject()) { |
608 | 0 | QTC::TC("qpdf", "QPDF not crossing page boundary"); |
609 | 0 | obj_copier.visiting.erase(foreign_og); |
610 | 0 | return; |
611 | 0 | } |
612 | 0 | } |
613 | 0 | obj_copier.to_copy.push_back(foreign); |
614 | 0 | } |
615 | | |
616 | 0 | if (foreign_tc == ::ot_array) { |
617 | 0 | QTC::TC("qpdf", "QPDF reserve array"); |
618 | 0 | for (auto const& item: foreign.as_array()) { |
619 | 0 | reserveObjects(item, obj_copier, false); |
620 | 0 | } |
621 | 0 | } else if (foreign_tc == ::ot_dictionary) { |
622 | 0 | QTC::TC("qpdf", "QPDF reserve dictionary"); |
623 | 0 | for (auto const& item: foreign.as_dictionary()) { |
624 | 0 | if (!item.second.null()) { |
625 | 0 | reserveObjects(item.second, obj_copier, false); |
626 | 0 | } |
627 | 0 | } |
628 | 0 | } else if (foreign_tc == ::ot_stream) { |
629 | 0 | QTC::TC("qpdf", "QPDF reserve stream"); |
630 | 0 | reserveObjects(foreign.getDict(), obj_copier, false); |
631 | 0 | } |
632 | |
|
633 | 0 | obj_copier.visiting.erase(foreign); |
634 | 0 | } |
635 | | |
636 | | QPDFObjectHandle |
637 | | QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top) |
638 | 0 | { |
639 | 0 | auto foreign_tc = foreign.getTypeCode(); |
640 | 0 | QPDFObjectHandle result; |
641 | 0 | if ((!top) && foreign.isIndirect()) { |
642 | 0 | QTC::TC("qpdf", "QPDF replace indirect"); |
643 | 0 | auto mapping = obj_copier.object_map.find(foreign.getObjGen()); |
644 | 0 | if (mapping == obj_copier.object_map.end()) { |
645 | | // This case would occur if this is a reference to a Pages object that we didn't |
646 | | // traverse into. |
647 | 0 | QTC::TC("qpdf", "QPDF replace foreign indirect with null"); |
648 | 0 | result = QPDFObjectHandle::newNull(); |
649 | 0 | } else { |
650 | 0 | result = mapping->second; |
651 | 0 | } |
652 | 0 | } else if (foreign_tc == ::ot_array) { |
653 | 0 | QTC::TC("qpdf", "QPDF replace array"); |
654 | 0 | result = QPDFObjectHandle::newArray(); |
655 | 0 | for (auto const& item: foreign.as_array()) { |
656 | 0 | result.appendItem(replaceForeignIndirectObjects(item, obj_copier, false)); |
657 | 0 | } |
658 | 0 | } else if (foreign_tc == ::ot_dictionary) { |
659 | 0 | QTC::TC("qpdf", "QPDF replace dictionary"); |
660 | 0 | result = QPDFObjectHandle::newDictionary(); |
661 | 0 | for (auto const& [key, value]: foreign.as_dictionary()) { |
662 | 0 | if (!value.null()) { |
663 | 0 | result.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false)); |
664 | 0 | } |
665 | 0 | } |
666 | 0 | } else if (foreign_tc == ::ot_stream) { |
667 | 0 | QTC::TC("qpdf", "QPDF replace stream"); |
668 | 0 | result = obj_copier.object_map[foreign.getObjGen()]; |
669 | 0 | QPDFObjectHandle dict = result.getDict(); |
670 | 0 | QPDFObjectHandle old_dict = foreign.getDict(); |
671 | 0 | for (auto const& [key, value]: old_dict.as_dictionary()) { |
672 | 0 | if (!value.null()) { |
673 | 0 | dict.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false)); |
674 | 0 | } |
675 | 0 | } |
676 | 0 | copyStreamData(result, foreign); |
677 | 0 | } else { |
678 | 0 | foreign.assertScalar(); |
679 | 0 | result = foreign; |
680 | 0 | result.makeDirect(); |
681 | 0 | } |
682 | |
|
683 | 0 | if (top && (!result.isStream()) && result.isIndirect()) { |
684 | 0 | throw std::logic_error("replacement for foreign object is indirect"); |
685 | 0 | } |
686 | | |
687 | 0 | return result; |
688 | 0 | } |
689 | | |
690 | | void |
691 | | QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) |
692 | 0 | { |
693 | | // This method was originally written for copying foreign streams, but it is used by |
694 | | // QPDFObjectHandle to copy streams from the same QPDF object as well. |
695 | |
|
696 | 0 | QPDFObjectHandle dict = result.getDict(); |
697 | 0 | QPDFObjectHandle old_dict = foreign.getDict(); |
698 | 0 | if (m->copied_stream_data_provider == nullptr) { |
699 | 0 | m->copied_stream_data_provider = new CopiedStreamDataProvider(*this); |
700 | 0 | m->copied_streams = |
701 | 0 | std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider); |
702 | 0 | } |
703 | 0 | QPDFObjGen local_og(result.getObjGen()); |
704 | | // Copy information from the foreign stream so we can pipe its data later without keeping the |
705 | | // original QPDF object around. |
706 | |
|
707 | 0 | QPDF& foreign_stream_qpdf = |
708 | 0 | foreign.getQPDF("unable to retrieve owning qpdf from foreign stream"); |
709 | |
|
710 | 0 | auto stream = foreign.as_stream(); |
711 | 0 | if (!stream) { |
712 | 0 | throw std::logic_error("unable to retrieve underlying stream object from foreign stream"); |
713 | 0 | } |
714 | 0 | std::shared_ptr<Buffer> stream_buffer = stream.getStreamDataBuffer(); |
715 | 0 | if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) { |
716 | | // Pull the stream data into a buffer before attempting the copy operation. Do it on the |
717 | | // source stream so that if the source stream is copied multiple times, we don't have to |
718 | | // keep duplicating the memory. |
719 | 0 | QTC::TC("qpdf", "QPDF immediate copy stream data"); |
720 | 0 | foreign.replaceStreamData( |
721 | 0 | foreign.getRawStreamData(), |
722 | 0 | old_dict.getKey("/Filter"), |
723 | 0 | old_dict.getKey("/DecodeParms")); |
724 | 0 | stream_buffer = stream.getStreamDataBuffer(); |
725 | 0 | } |
726 | 0 | std::shared_ptr<QPDFObjectHandle::StreamDataProvider> stream_provider = |
727 | 0 | stream.getStreamDataProvider(); |
728 | 0 | if (stream_buffer.get()) { |
729 | 0 | QTC::TC("qpdf", "QPDF copy foreign stream with buffer"); |
730 | 0 | result.replaceStreamData( |
731 | 0 | stream_buffer, dict.getKey("/Filter"), dict.getKey("/DecodeParms")); |
732 | 0 | } else if (stream_provider.get()) { |
733 | | // In this case, the remote stream's QPDF must stay in scope. |
734 | 0 | QTC::TC("qpdf", "QPDF copy foreign stream with provider"); |
735 | 0 | m->copied_stream_data_provider->registerForeignStream(local_og, foreign); |
736 | 0 | result.replaceStreamData( |
737 | 0 | m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms")); |
738 | 0 | } else { |
739 | 0 | auto foreign_stream_data = std::make_shared<ForeignStreamData>( |
740 | 0 | foreign_stream_qpdf.m->encp, |
741 | 0 | foreign_stream_qpdf.m->file, |
742 | 0 | foreign, |
743 | 0 | foreign.getParsedOffset(), |
744 | 0 | stream.getLength(), |
745 | 0 | dict, |
746 | 0 | stream.isRootMetadata()); |
747 | 0 | m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data); |
748 | 0 | result.replaceStreamData( |
749 | 0 | m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms")); |
750 | 0 | } |
751 | 0 | } |
752 | | |
753 | | unsigned long long |
754 | | QPDF::getUniqueId() const |
755 | 0 | { |
756 | 0 | return m->unique_id; |
757 | 0 | } |
758 | | |
759 | | std::string |
760 | | QPDF::getFilename() const |
761 | 0 | { |
762 | 0 | return m->file->getName(); |
763 | 0 | } |
764 | | |
765 | | PDFVersion |
766 | | QPDF::getVersionAsPDFVersion() |
767 | 0 | { |
768 | 0 | int major = 1; |
769 | 0 | int minor = 3; |
770 | 0 | int extension_level = getExtensionLevel(); |
771 | |
|
772 | 0 | std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)"); |
773 | 0 | std::smatch match; |
774 | 0 | if (std::regex_search(m->pdf_version, match, v)) { |
775 | 0 | major = QUtil::string_to_int(match[1].str().c_str()); |
776 | 0 | minor = QUtil::string_to_int(match[2].str().c_str()); |
777 | 0 | } |
778 | |
|
779 | 0 | return {major, minor, extension_level}; |
780 | 0 | } |
781 | | |
782 | | std::string |
783 | | QPDF::getPDFVersion() const |
784 | 0 | { |
785 | 0 | return m->pdf_version; |
786 | 0 | } |
787 | | |
788 | | int |
789 | | QPDF::getExtensionLevel() |
790 | 0 | { |
791 | 0 | int result = 0; |
792 | 0 | QPDFObjectHandle obj = getRoot(); |
793 | 0 | if (obj.hasKey("/Extensions")) { |
794 | 0 | obj = obj.getKey("/Extensions"); |
795 | 0 | if (obj.isDictionary() && obj.hasKey("/ADBE")) { |
796 | 0 | obj = obj.getKey("/ADBE"); |
797 | 0 | if (obj.isDictionary() && obj.hasKey("/ExtensionLevel")) { |
798 | 0 | obj = obj.getKey("/ExtensionLevel"); |
799 | 0 | if (obj.isInteger()) { |
800 | 0 | result = obj.getIntValueAsInt(); |
801 | 0 | } |
802 | 0 | } |
803 | 0 | } |
804 | 0 | } |
805 | 0 | return result; |
806 | 0 | } |
807 | | |
808 | | QPDFObjectHandle |
809 | | QPDF::getTrailer() |
810 | 0 | { |
811 | 0 | return m->trailer; |
812 | 0 | } |
813 | | |
814 | | QPDFObjectHandle |
815 | | QPDF::getRoot() |
816 | 0 | { |
817 | 0 | QPDFObjectHandle root = m->trailer.getKey("/Root"); |
818 | 0 | if (!root.isDictionary()) { |
819 | 0 | throw damagedPDF("", -1, "unable to find /Root dictionary"); |
820 | 0 | } else if ( |
821 | | // Check_mode is an interim solution to request #810 pending a more comprehensive review of |
822 | | // the approach to more extensive checks and warning levels. |
823 | 0 | m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) { |
824 | 0 | warn(damagedPDF("", -1, "catalog /Type entry missing or invalid")); |
825 | 0 | root.replaceKey("/Type", "/Catalog"_qpdf); |
826 | 0 | } |
827 | 0 | return root; |
828 | 0 | } |
829 | | |
830 | | std::map<QPDFObjGen, QPDFXRefEntry> |
831 | | QPDF::getXRefTable() |
832 | 0 | { |
833 | 0 | return getXRefTableInternal(); |
834 | 0 | } |
835 | | |
836 | | std::map<QPDFObjGen, QPDFXRefEntry> const& |
837 | | QPDF::getXRefTableInternal() |
838 | 0 | { |
839 | 0 | if (!m->parsed) { |
840 | 0 | throw std::logic_error("QPDF::getXRefTable called before parsing."); |
841 | 0 | } |
842 | | |
843 | 0 | return m->xref_table; |
844 | 0 | } |
845 | | |
846 | | bool |
847 | | QPDF::pipeStreamData( |
848 | | std::shared_ptr<EncryptionParameters> encp, |
849 | | std::shared_ptr<InputSource> file, |
850 | | QPDF& qpdf_for_warning, |
851 | | QPDFObjGen og, |
852 | | qpdf_offset_t offset, |
853 | | size_t length, |
854 | | QPDFObjectHandle stream_dict, |
855 | | bool is_root_metadata, |
856 | | Pipeline* pipeline, |
857 | | bool suppress_warnings, |
858 | | bool will_retry) |
859 | 0 | { |
860 | 0 | std::unique_ptr<Pipeline> to_delete; |
861 | 0 | if (encp->encrypted) { |
862 | 0 | decryptStream( |
863 | 0 | encp, file, qpdf_for_warning, pipeline, og, stream_dict, is_root_metadata, to_delete); |
864 | 0 | } |
865 | |
|
866 | 0 | bool attempted_finish = false; |
867 | 0 | try { |
868 | 0 | auto buf = file->read(length, offset); |
869 | 0 | if (buf.size() != length) { |
870 | 0 | throw damagedPDF( |
871 | 0 | *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data"); |
872 | 0 | } |
873 | 0 | pipeline->write(buf.data(), length); |
874 | 0 | attempted_finish = true; |
875 | 0 | pipeline->finish(); |
876 | 0 | return true; |
877 | 0 | } catch (QPDFExc& e) { |
878 | 0 | if (!suppress_warnings) { |
879 | 0 | qpdf_for_warning.warn(e); |
880 | 0 | } |
881 | 0 | } catch (std::exception& e) { |
882 | 0 | if (!suppress_warnings) { |
883 | 0 | QTC::TC("qpdf", "QPDF decoding error warning"); |
884 | 0 | qpdf_for_warning.warn( |
885 | | // line-break |
886 | 0 | damagedPDF( |
887 | 0 | *file, |
888 | 0 | "", |
889 | 0 | file->getLastOffset(), |
890 | 0 | ("error decoding stream data for object " + og.unparse(' ') + ": " + |
891 | 0 | e.what()))); |
892 | 0 | if (will_retry) { |
893 | 0 | qpdf_for_warning.warn( |
894 | | // line-break |
895 | 0 | damagedPDF( |
896 | 0 | *file, |
897 | 0 | "", |
898 | 0 | file->getLastOffset(), |
899 | 0 | "stream will be re-processed without filtering to avoid data loss")); |
900 | 0 | } |
901 | 0 | } |
902 | 0 | } |
903 | 0 | if (!attempted_finish) { |
904 | 0 | try { |
905 | 0 | pipeline->finish(); |
906 | 0 | } catch (std::exception&) { |
907 | | // ignore |
908 | 0 | } |
909 | 0 | } |
910 | 0 | return false; |
911 | 0 | } |
912 | | |
913 | | bool |
914 | | QPDF::pipeStreamData( |
915 | | QPDFObjGen og, |
916 | | qpdf_offset_t offset, |
917 | | size_t length, |
918 | | QPDFObjectHandle stream_dict, |
919 | | bool is_root_metadata, |
920 | | Pipeline* pipeline, |
921 | | bool suppress_warnings, |
922 | | bool will_retry) |
923 | 0 | { |
924 | 0 | return pipeStreamData( |
925 | 0 | m->encp, |
926 | 0 | m->file, |
927 | 0 | *this, |
928 | 0 | og, |
929 | 0 | offset, |
930 | 0 | length, |
931 | 0 | stream_dict, |
932 | 0 | is_root_metadata, |
933 | 0 | pipeline, |
934 | 0 | suppress_warnings, |
935 | 0 | will_retry); |
936 | 0 | } |
937 | | |
938 | | bool |
939 | | QPDF::pipeForeignStreamData( |
940 | | std::shared_ptr<ForeignStreamData> foreign, |
941 | | Pipeline* pipeline, |
942 | | bool suppress_warnings, |
943 | | bool will_retry) |
944 | 0 | { |
945 | 0 | if (foreign->encp->encrypted) { |
946 | 0 | QTC::TC("qpdf", "QPDF pipe foreign encrypted stream"); |
947 | 0 | } |
948 | 0 | return pipeStreamData( |
949 | 0 | foreign->encp, |
950 | 0 | foreign->file, |
951 | 0 | *this, |
952 | 0 | foreign->foreign_og, |
953 | 0 | foreign->offset, |
954 | 0 | foreign->length, |
955 | 0 | foreign->local_dict, |
956 | 0 | foreign->is_root_metadata, |
957 | 0 | pipeline, |
958 | 0 | suppress_warnings, |
959 | 0 | will_retry); |
960 | 0 | } |
961 | | |
962 | | // Throw a generic exception when we lack context for something more specific. New code should not |
963 | | // use this. This method exists to improve somewhat from calling assert in very old code. |
964 | | void |
965 | | QPDF::stopOnError(std::string const& message) |
966 | 0 | { |
967 | 0 | throw damagedPDF("", message); |
968 | 0 | } |
969 | | |
970 | | // Return an exception of type qpdf_e_damaged_pdf. |
971 | | QPDFExc |
972 | | QPDF::damagedPDF( |
973 | | InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message) |
974 | 0 | { |
975 | 0 | return {qpdf_e_damaged_pdf, input.getName(), object, offset, message, true}; |
976 | 0 | } |
977 | | |
978 | | // Return an exception of type qpdf_e_damaged_pdf. The object is taken from |
979 | | // m->last_object_description. |
980 | | QPDFExc |
981 | | QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message) |
982 | 0 | { |
983 | 0 | return damagedPDF(input, m->last_object_description, offset, message); |
984 | 0 | } |
985 | | |
986 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file. |
987 | | QPDFExc |
988 | | QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message) |
989 | 0 | { |
990 | 0 | return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message, true}; |
991 | 0 | } |
992 | | |
993 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the |
994 | | // offset from .m->file->getLastOffset(). |
995 | | QPDFExc |
996 | | QPDF::damagedPDF(std::string const& object, std::string const& message) |
997 | 0 | { |
998 | 0 | return damagedPDF(object, m->file->getLastOffset(), message); |
999 | 0 | } |
1000 | | |
1001 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object |
1002 | | // from .m->last_object_description. |
1003 | | QPDFExc |
1004 | | QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message) |
1005 | 0 | { |
1006 | 0 | return damagedPDF(m->last_object_description, offset, message); |
1007 | 0 | } |
1008 | | |
1009 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object |
1010 | | // from m->last_object_description and the offset from m->file->getLastOffset(). |
1011 | | QPDFExc |
1012 | | QPDF::damagedPDF(std::string const& message) |
1013 | 0 | { |
1014 | 0 | return damagedPDF(m->last_object_description, m->file->getLastOffset(), message); |
1015 | 0 | } |
1016 | | |
1017 | | bool |
1018 | | QPDF::everCalledGetAllPages() const |
1019 | 0 | { |
1020 | 0 | return m->ever_called_get_all_pages; |
1021 | 0 | } |
1022 | | |
1023 | | bool |
1024 | | QPDF::everPushedInheritedAttributesToPages() const |
1025 | 0 | { |
1026 | 0 | return m->ever_pushed_inherited_attributes_to_pages; |
1027 | 0 | } |
1028 | | |
1029 | | void |
1030 | | QPDF::removeSecurityRestrictions() |
1031 | 0 | { |
1032 | 0 | auto root = getRoot(); |
1033 | 0 | root.removeKey("/Perms"); |
1034 | 0 | auto acroform = root.getKey("/AcroForm"); |
1035 | 0 | if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) { |
1036 | 0 | acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0)); |
1037 | 0 | } |
1038 | 0 | } |