/src/qpdf/libqpdf/QPDF.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/qpdf-config.h> // include first for large file support |
2 | | |
3 | | #include <qpdf/QPDF_private.hh> |
4 | | |
5 | | #include <array> |
6 | | #include <atomic> |
7 | | #include <cstring> |
8 | | #include <limits> |
9 | | #include <map> |
10 | | #include <regex> |
11 | | #include <sstream> |
12 | | #include <vector> |
13 | | |
14 | | #include <qpdf/FileInputSource.hh> |
15 | | #include <qpdf/InputSource_private.hh> |
16 | | #include <qpdf/OffsetInputSource.hh> |
17 | | #include <qpdf/Pipeline.hh> |
18 | | #include <qpdf/QPDFExc.hh> |
19 | | #include <qpdf/QPDFLogger.hh> |
20 | | #include <qpdf/QPDFObjectHandle_private.hh> |
21 | | #include <qpdf/QPDFObject_private.hh> |
22 | | #include <qpdf/QPDFParser.hh> |
23 | | #include <qpdf/QTC.hh> |
24 | | #include <qpdf/QUtil.hh> |
25 | | #include <qpdf/Util.hh> |
26 | | |
27 | | using namespace qpdf; |
28 | | using namespace std::literals; |
29 | | |
30 | | // This must be a fixed value. This API returns a const reference to it, and the C API relies on its |
31 | | // being static as well. |
32 | | std::string const QPDF::qpdf_version(QPDF_VERSION); |
33 | | |
34 | | static char const* EMPTY_PDF = ( |
35 | | // force line break |
36 | | "%PDF-1.3\n" |
37 | | "1 0 obj\n" |
38 | | "<< /Type /Catalog /Pages 2 0 R >>\n" |
39 | | "endobj\n" |
40 | | "2 0 obj\n" |
41 | | "<< /Type /Pages /Kids [] /Count 0 >>\n" |
42 | | "endobj\n" |
43 | | "xref\n" |
44 | | "0 3\n" |
45 | | "0000000000 65535 f \n" |
46 | | "0000000009 00000 n \n" |
47 | | "0000000058 00000 n \n" |
48 | | "trailer << /Size 3 /Root 1 0 R >>\n" |
49 | | "startxref\n" |
50 | | "110\n" |
51 | | "%%EOF\n"); |
52 | | |
53 | | namespace |
54 | | { |
55 | | class InvalidInputSource: public InputSource |
56 | | { |
57 | | public: |
58 | | ~InvalidInputSource() override = default; |
59 | | qpdf_offset_t |
60 | | findAndSkipNextEOL() override |
61 | 0 | { |
62 | 0 | throwException(); |
63 | 0 | return 0; |
64 | 0 | } |
65 | | std::string const& |
66 | | getName() const override |
67 | 0 | { |
68 | 0 | static std::string name("closed input source"); |
69 | 0 | return name; |
70 | 0 | } |
71 | | qpdf_offset_t |
72 | | tell() override |
73 | 0 | { |
74 | 0 | throwException(); |
75 | 0 | return 0; |
76 | 0 | } |
77 | | void |
78 | | seek(qpdf_offset_t offset, int whence) override |
79 | 0 | { |
80 | 0 | throwException(); |
81 | 0 | } |
82 | | void |
83 | | rewind() override |
84 | 0 | { |
85 | 0 | throwException(); |
86 | 0 | } |
87 | | size_t |
88 | | read(char* buffer, size_t length) override |
89 | 0 | { |
90 | 0 | throwException(); |
91 | 0 | return 0; |
92 | 0 | } |
93 | | void |
94 | | unreadCh(char ch) override |
95 | 0 | { |
96 | 0 | throwException(); |
97 | 0 | } |
98 | | |
99 | | private: |
100 | | void |
101 | | throwException() |
102 | 0 | { |
103 | 0 | throw std::logic_error( |
104 | 0 | "QPDF operation attempted on a QPDF object with no input " |
105 | 0 | "source. QPDF operations are invalid before processFile (or " |
106 | 0 | "another process method) or after closeInputSource"); |
107 | 0 | } |
108 | | }; |
109 | | } // namespace |
110 | | |
111 | | QPDF::ForeignStreamData::ForeignStreamData( |
112 | | std::shared_ptr<EncryptionParameters> encp, |
113 | | std::shared_ptr<InputSource> file, |
114 | | QPDFObjGen foreign_og, |
115 | | qpdf_offset_t offset, |
116 | | size_t length, |
117 | | QPDFObjectHandle local_dict, |
118 | | bool is_root_metadata) : |
119 | 0 | encp(encp), |
120 | 0 | file(file), |
121 | 0 | foreign_og(foreign_og), |
122 | 0 | offset(offset), |
123 | 0 | length(length), |
124 | 0 | local_dict(local_dict), |
125 | 0 | is_root_metadata(is_root_metadata) |
126 | 0 | { |
127 | 0 | } |
128 | | |
129 | | QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(QPDF& destination_qpdf) : |
130 | 0 | QPDFObjectHandle::StreamDataProvider(true), |
131 | 0 | destination_qpdf(destination_qpdf) |
132 | 0 | { |
133 | 0 | } |
134 | | |
135 | | bool |
136 | | QPDF::CopiedStreamDataProvider::provideStreamData( |
137 | | QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) |
138 | 0 | { |
139 | 0 | std::shared_ptr<ForeignStreamData> foreign_data = foreign_stream_data[og]; |
140 | 0 | bool result = false; |
141 | 0 | if (foreign_data.get()) { |
142 | 0 | result = destination_qpdf.pipeForeignStreamData( |
143 | 0 | foreign_data, pipeline, suppress_warnings, will_retry); |
144 | 0 | QTC::TC("qpdf", "QPDF copy foreign with data", result ? 0 : 1); |
145 | 0 | } else { |
146 | 0 | auto foreign_stream = foreign_streams[og]; |
147 | 0 | result = foreign_stream.pipeStreamData( |
148 | 0 | pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry); |
149 | 0 | QTC::TC("qpdf", "QPDF copy foreign with foreign_stream", result ? 0 : 1); |
150 | 0 | } |
151 | 0 | return result; |
152 | 0 | } |
153 | | |
154 | | void |
155 | | QPDF::CopiedStreamDataProvider::registerForeignStream( |
156 | | QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream) |
157 | 0 | { |
158 | 0 | this->foreign_streams[local_og] = foreign_stream; |
159 | 0 | } |
160 | | |
161 | | void |
162 | | QPDF::CopiedStreamDataProvider::registerForeignStream( |
163 | | QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData> foreign_stream) |
164 | 0 | { |
165 | 0 | this->foreign_stream_data[local_og] = foreign_stream; |
166 | 0 | } |
167 | | |
168 | | QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) : |
169 | 113k | qpdf(qpdf), |
170 | 113k | og(og) |
171 | 113k | { |
172 | 113k | } |
173 | | |
174 | | std::string const& |
175 | | QPDF::QPDFVersion() |
176 | 0 | { |
177 | | // The C API relies on this being a static value. |
178 | 0 | return QPDF::qpdf_version; |
179 | 0 | } |
180 | | |
181 | | QPDF::Members::Members() : |
182 | 23.4k | log(QPDFLogger::defaultLogger()), |
183 | 23.4k | file(new InvalidInputSource()), |
184 | 23.4k | encp(new EncryptionParameters) |
185 | 23.4k | { |
186 | 23.4k | } |
187 | | |
188 | | QPDF::QPDF() : |
189 | 23.4k | m(std::make_unique<Members>()) |
190 | 23.4k | { |
191 | 23.4k | m->tokenizer.allowEOF(); |
192 | | // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout |
193 | | // the lifetime of this running application. |
194 | 23.4k | static std::atomic<unsigned long long> unique_id{0}; |
195 | 23.4k | m->unique_id = unique_id.fetch_add(1ULL); |
196 | 23.4k | } |
197 | | |
198 | | // Provide access to disconnect(). Disconnect will in due course be merged into the current ObjCache |
199 | | // (future Objects::Entry) to centralize all QPDF access to QPDFObject. |
200 | | class Disconnect: BaseHandle |
201 | | { |
202 | | public: |
203 | | Disconnect(std::shared_ptr<QPDFObject> const& obj) : |
204 | 290k | BaseHandle(obj) |
205 | 290k | { |
206 | 290k | } |
207 | | void |
208 | | disconnect() |
209 | 290k | { |
210 | 290k | BaseHandle::disconnect(false); |
211 | 290k | if (raw_type_code() != ::ot_null) { |
212 | 124k | obj->value = QPDF_Destroyed(); |
213 | 124k | } |
214 | 290k | } |
215 | | }; |
216 | | |
217 | | QPDF::~QPDF() |
218 | 23.4k | { |
219 | | // If two objects are mutually referential (through each object having an array or dictionary |
220 | | // that contains an indirect reference to the other), the circular references in the |
221 | | // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects |
222 | | // in the object cache, which is those objects that we read from the file, and break all |
223 | | // resolved indirect references by replacing them with an internal object type representing that |
224 | | // they have been destroyed. Note that we can't break references like this at any time when the |
225 | | // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that |
226 | | // are reachable from this object to release their association with this QPDF. Direct objects |
227 | | // are not destroyed since they can be moved to other QPDF objects safely. |
228 | | |
229 | | // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear |
230 | | // the xref table anyway just to prevent any possibility of resolve() succeeding. |
231 | 23.4k | m->xref_table.clear(); |
232 | 290k | for (auto const& iter: m->obj_cache) { |
233 | 290k | Disconnect(iter.second.object).disconnect(); |
234 | 290k | } |
235 | 23.4k | } |
236 | | |
237 | | std::shared_ptr<QPDF> |
238 | | QPDF::create() |
239 | 23.4k | { |
240 | 23.4k | return std::make_shared<QPDF>(); |
241 | 23.4k | } |
242 | | |
243 | | void |
244 | | QPDF::processFile(char const* filename, char const* password) |
245 | 0 | { |
246 | 0 | auto* fi = new FileInputSource(filename); |
247 | 0 | processInputSource(std::shared_ptr<InputSource>(fi), password); |
248 | 0 | } |
249 | | |
250 | | void |
251 | | QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password) |
252 | 0 | { |
253 | 0 | auto* fi = new FileInputSource(description, filep, close_file); |
254 | 0 | processInputSource(std::shared_ptr<InputSource>(fi), password); |
255 | 0 | } |
256 | | |
257 | | void |
258 | | QPDF::processMemoryFile( |
259 | | char const* description, char const* buf, size_t length, char const* password) |
260 | 0 | { |
261 | 0 | auto is = std::make_shared<is::OffsetBuffer>(description, std::string_view{buf, length}); |
262 | 0 | processInputSource(is, password); |
263 | 0 | } |
264 | | |
265 | | void |
266 | | QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password) |
267 | 23.4k | { |
268 | 23.4k | m->file = source; |
269 | 23.4k | parse(password); |
270 | 23.4k | } |
271 | | |
272 | | void |
273 | | QPDF::closeInputSource() |
274 | 0 | { |
275 | 0 | m->file = std::shared_ptr<InputSource>(new InvalidInputSource()); |
276 | 0 | } |
277 | | |
278 | | void |
279 | | QPDF::setPasswordIsHexKey(bool val) |
280 | 0 | { |
281 | 0 | m->provided_password_is_hex_key = val; |
282 | 0 | } |
283 | | |
284 | | void |
285 | | QPDF::emptyPDF() |
286 | 0 | { |
287 | 0 | processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF)); |
288 | 0 | } |
289 | | |
290 | | void |
291 | | QPDF::registerStreamFilter( |
292 | | std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory) |
293 | 0 | { |
294 | 0 | qpdf::Stream::registerStreamFilter(filter_name, factory); |
295 | 0 | } |
296 | | |
297 | | void |
298 | | QPDF::setIgnoreXRefStreams(bool val) |
299 | 0 | { |
300 | 0 | m->ignore_xref_streams = val; |
301 | 0 | } |
302 | | |
303 | | std::shared_ptr<QPDFLogger> |
304 | | QPDF::getLogger() |
305 | 0 | { |
306 | 0 | return m->log; |
307 | 0 | } |
308 | | |
309 | | void |
310 | | QPDF::setLogger(std::shared_ptr<QPDFLogger> l) |
311 | 0 | { |
312 | 0 | m->log = l; |
313 | 0 | } |
314 | | |
315 | | void |
316 | | QPDF::setOutputStreams(std::ostream* out, std::ostream* err) |
317 | 0 | { |
318 | 0 | setLogger(QPDFLogger::create()); |
319 | 0 | m->log->setOutputStreams(out, err); |
320 | 0 | } |
321 | | |
322 | | void |
323 | | QPDF::setSuppressWarnings(bool val) |
324 | 0 | { |
325 | 0 | m->suppress_warnings = val; |
326 | 0 | } |
327 | | |
328 | | void |
329 | | QPDF::setMaxWarnings(size_t val) |
330 | 23.4k | { |
331 | 23.4k | m->max_warnings = val; |
332 | 23.4k | } |
333 | | |
334 | | void |
335 | | QPDF::setAttemptRecovery(bool val) |
336 | 0 | { |
337 | 0 | m->attempt_recovery = val; |
338 | 0 | } |
339 | | |
340 | | void |
341 | | QPDF::setImmediateCopyFrom(bool val) |
342 | 0 | { |
343 | 0 | m->immediate_copy_from = val; |
344 | 0 | } |
345 | | |
346 | | std::vector<QPDFExc> |
347 | | QPDF::getWarnings() |
348 | 0 | { |
349 | 0 | std::vector<QPDFExc> result = m->warnings; |
350 | 0 | m->warnings.clear(); |
351 | 0 | return result; |
352 | 0 | } |
353 | | |
354 | | bool |
355 | | QPDF::anyWarnings() const |
356 | 0 | { |
357 | 0 | return !m->warnings.empty(); |
358 | 0 | } |
359 | | |
360 | | size_t |
361 | | QPDF::numWarnings() const |
362 | 0 | { |
363 | 0 | return m->warnings.size(); |
364 | 0 | } |
365 | | |
366 | | bool |
367 | | QPDF::validatePDFVersion(char const*& p, std::string& version) |
368 | 8.56k | { |
369 | 8.56k | bool valid = util::is_digit(*p); |
370 | 8.56k | if (valid) { |
371 | 13.0k | while (util::is_digit(*p)) { |
372 | 8.12k | version.append(1, *p++); |
373 | 8.12k | } |
374 | 4.95k | if ((*p == '.') && util::is_digit(*(p + 1))) { |
375 | 3.61k | version.append(1, *p++); |
376 | 12.0k | while (util::is_digit(*p)) { |
377 | 8.46k | version.append(1, *p++); |
378 | 8.46k | } |
379 | 3.61k | } else { |
380 | 1.33k | valid = false; |
381 | 1.33k | } |
382 | 4.95k | } |
383 | 8.56k | return valid; |
384 | 8.56k | } |
385 | | |
386 | | bool |
387 | | QPDF::findHeader() |
388 | 8.57k | { |
389 | 8.57k | qpdf_offset_t global_offset = m->file->tell(); |
390 | 8.57k | std::string line = m->file->readLine(1024); |
391 | 8.57k | char const* p = line.c_str(); |
392 | 8.57k | if (strncmp(p, "%PDF-", 5) != 0) { |
393 | 0 | throw std::logic_error("findHeader is not looking at %PDF-"); |
394 | 0 | } |
395 | 8.57k | p += 5; |
396 | 8.57k | std::string version; |
397 | | // Note: The string returned by line.c_str() is always null-terminated. The code below never |
398 | | // overruns the buffer because a null character always short-circuits further advancement. |
399 | 8.57k | bool valid = validatePDFVersion(p, version); |
400 | 8.57k | if (valid) { |
401 | 3.61k | m->pdf_version = version; |
402 | 3.61k | if (global_offset != 0) { |
403 | | // Empirical evidence strongly suggests that when there is leading material prior to the |
404 | | // PDF header, all explicit offsets in the file are such that 0 points to the beginning |
405 | | // of the header. |
406 | 2.55k | QTC::TC("qpdf", "QPDF global offset"); |
407 | 2.55k | m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset)); |
408 | 2.55k | } |
409 | 3.61k | } |
410 | 8.57k | return valid; |
411 | 8.57k | } |
412 | | |
413 | | void |
414 | | QPDF::warn(QPDFExc const& e) |
415 | 756k | { |
416 | 756k | if (m->max_warnings > 0 && m->warnings.size() >= m->max_warnings) { |
417 | 16.5k | stopOnError("Too many warnings - file is too badly damaged"); |
418 | 16.5k | } |
419 | 756k | m->warnings.push_back(e); |
420 | 756k | if (!m->suppress_warnings) { |
421 | 739k | *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n"; |
422 | 739k | } |
423 | 756k | } |
424 | | |
425 | | void |
426 | | QPDF::warn( |
427 | | qpdf_error_code_e error_code, |
428 | | std::string const& object, |
429 | | qpdf_offset_t offset, |
430 | | std::string const& message) |
431 | 3.21k | { |
432 | 3.21k | warn(QPDFExc(error_code, getFilename(), object, offset, message)); |
433 | 3.21k | } |
434 | | |
435 | | QPDFObjectHandle |
436 | | QPDF::newReserved() |
437 | 0 | { |
438 | 0 | return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Reserved>()); |
439 | 0 | } |
440 | | |
441 | | QPDFObjectHandle |
442 | | QPDF::newIndirectNull() |
443 | 0 | { |
444 | 0 | return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Null>()); |
445 | 0 | } |
446 | | |
447 | | QPDFObjectHandle |
448 | | QPDF::newStream() |
449 | 0 | { |
450 | 0 | return makeIndirectObject( |
451 | 0 | qpdf::Stream(*this, nextObjGen(), QPDFObjectHandle::newDictionary(), 0, 0)); |
452 | 0 | } |
453 | | |
454 | | QPDFObjectHandle |
455 | | QPDF::newStream(std::shared_ptr<Buffer> data) |
456 | 0 | { |
457 | 0 | auto result = newStream(); |
458 | 0 | result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); |
459 | 0 | return result; |
460 | 0 | } |
461 | | |
462 | | QPDFObjectHandle |
463 | | QPDF::newStream(std::string const& data) |
464 | 0 | { |
465 | 0 | auto result = newStream(); |
466 | 0 | result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); |
467 | 0 | return result; |
468 | 0 | } |
469 | | |
470 | | QPDFObjectHandle |
471 | | QPDF::getObject(int objid, int generation) |
472 | 9.29k | { |
473 | 9.29k | return getObject(QPDFObjGen(objid, generation)); |
474 | 9.29k | } |
475 | | |
476 | | QPDFObjectHandle |
477 | | QPDF::getObjectByObjGen(QPDFObjGen og) |
478 | 0 | { |
479 | 0 | return getObject(og); |
480 | 0 | } |
481 | | |
482 | | QPDFObjectHandle |
483 | | QPDF::getObjectByID(int objid, int generation) |
484 | 0 | { |
485 | 0 | return getObject(QPDFObjGen(objid, generation)); |
486 | 0 | } |
487 | | |
488 | | QPDFObjectHandle |
489 | | QPDF::copyForeignObject(QPDFObjectHandle foreign) |
490 | 0 | { |
491 | | // Here's an explanation of what's going on here. |
492 | | // |
493 | | // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and |
494 | | // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a |
495 | | // foreign QPDF into the local QPDF, we have to replace all indirect object references with |
496 | | // references to the corresponding object in the local file. |
497 | | // |
498 | | // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign |
499 | | // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a |
500 | | // mapping from the foreign ObjGen to the local QPDFObjectHandle. |
501 | | // |
502 | | // To copy, we do a deep traversal of the foreign object with loop detection to discover all |
503 | | // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an |
504 | | // indirect object, we check to see if we have already created a local copy of it. If not, we |
505 | | // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the |
506 | | // mapping from the foreign object ID to the new object. While we |
507 | | // do this, we keep a list of objects to copy. |
508 | | // |
509 | | // Once we are done with the traversal, we copy all the objects that we need to copy. However, |
510 | | // the copies will contain indirect object IDs that refer to objects in the foreign file. We |
511 | | // need to replace them with references to objects in the local file. This is what |
512 | | // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with |
513 | | // all the indirect references replaced with new ones in the local context, we can replace the |
514 | | // local reserved object with the copy. This mechanism allows us to copy objects with circular |
515 | | // references in any order. |
516 | | |
517 | | // For streams, rather than copying the objects, we set up the stream data to pull from the |
518 | | // original stream by using a stream data provider. This is done in a manner that doesn't |
519 | | // require the original QPDF object but may require the original source of the stream data with |
520 | | // special handling for immediate_copy_from. This logic is also in |
521 | | // replaceForeignIndirectObjects. |
522 | | |
523 | | // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented |
524 | | // use case to copy pages this way if the intention is to not update the pages tree. |
525 | 0 | if (!foreign.isIndirect()) { |
526 | 0 | QTC::TC("qpdf", "QPDF copyForeign direct"); |
527 | 0 | throw std::logic_error("QPDF::copyForeign called with direct object handle"); |
528 | 0 | } |
529 | 0 | QPDF& other = foreign.getQPDF(); |
530 | 0 | if (&other == this) { |
531 | 0 | QTC::TC("qpdf", "QPDF copyForeign not foreign"); |
532 | 0 | throw std::logic_error("QPDF::copyForeign called with object from this QPDF"); |
533 | 0 | } |
534 | | |
535 | 0 | ObjCopier& obj_copier = m->object_copiers[other.m->unique_id]; |
536 | 0 | if (!obj_copier.visiting.empty()) { |
537 | 0 | throw std::logic_error( |
538 | 0 | "obj_copier.visiting is not empty at the beginning of copyForeignObject"); |
539 | 0 | } |
540 | | |
541 | | // Make sure we have an object in this file for every referenced object in the old file. |
542 | | // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we |
543 | | // have to copy, the local object will be a reservation, unless it is a stream, in which case |
544 | | // the local object will already be a stream. |
545 | 0 | reserveObjects(foreign, obj_copier, true); |
546 | |
|
547 | 0 | if (!obj_copier.visiting.empty()) { |
548 | 0 | throw std::logic_error("obj_copier.visiting is not empty after reserving objects"); |
549 | 0 | } |
550 | | |
551 | | // Copy any new objects and replace the reservations. |
552 | 0 | for (auto& to_copy: obj_copier.to_copy) { |
553 | 0 | QPDFObjectHandle copy = replaceForeignIndirectObjects(to_copy, obj_copier, true); |
554 | 0 | if (!to_copy.isStream()) { |
555 | 0 | QPDFObjGen og(to_copy.getObjGen()); |
556 | 0 | replaceReserved(obj_copier.object_map[og], copy); |
557 | 0 | } |
558 | 0 | } |
559 | 0 | obj_copier.to_copy.clear(); |
560 | |
|
561 | 0 | auto og = foreign.getObjGen(); |
562 | 0 | if (!obj_copier.object_map.contains(og)) { |
563 | 0 | warn(damagedPDF( |
564 | 0 | other.getFilename() + " object " + og.unparse(' '), |
565 | 0 | foreign.getParsedOffset(), |
566 | 0 | "unexpected reference to /Pages object while copying foreign object; replacing with " |
567 | 0 | "null")); |
568 | 0 | return QPDFObjectHandle::newNull(); |
569 | 0 | } |
570 | 0 | return obj_copier.object_map[foreign.getObjGen()]; |
571 | 0 | } |
572 | | |
573 | | void |
574 | | QPDF::reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top) |
575 | 0 | { |
576 | 0 | auto foreign_tc = foreign.getTypeCode(); |
577 | 0 | if (foreign_tc == ::ot_reserved) { |
578 | 0 | throw std::logic_error("QPDF: attempting to copy a foreign reserved object"); |
579 | 0 | } |
580 | | |
581 | 0 | if (foreign.isPagesObject()) { |
582 | 0 | QTC::TC("qpdf", "QPDF not copying pages object"); |
583 | 0 | return; |
584 | 0 | } |
585 | | |
586 | 0 | if (foreign.isIndirect()) { |
587 | 0 | QPDFObjGen foreign_og(foreign.getObjGen()); |
588 | 0 | if (!obj_copier.visiting.add(foreign_og)) { |
589 | 0 | QTC::TC("qpdf", "QPDF loop reserving objects"); |
590 | 0 | return; |
591 | 0 | } |
592 | 0 | if (obj_copier.object_map.contains(foreign_og)) { |
593 | 0 | QTC::TC("qpdf", "QPDF already reserved object"); |
594 | 0 | if (!(top && foreign.isPageObject() && obj_copier.object_map[foreign_og].isNull())) { |
595 | 0 | obj_copier.visiting.erase(foreign); |
596 | 0 | return; |
597 | 0 | } |
598 | 0 | } else { |
599 | 0 | QTC::TC("qpdf", "QPDF copy indirect"); |
600 | 0 | obj_copier.object_map[foreign_og] = |
601 | 0 | foreign.isStream() ? newStream() : newIndirectNull(); |
602 | 0 | if ((!top) && foreign.isPageObject()) { |
603 | 0 | QTC::TC("qpdf", "QPDF not crossing page boundary"); |
604 | 0 | obj_copier.visiting.erase(foreign_og); |
605 | 0 | return; |
606 | 0 | } |
607 | 0 | } |
608 | 0 | obj_copier.to_copy.push_back(foreign); |
609 | 0 | } |
610 | | |
611 | 0 | if (foreign_tc == ::ot_array) { |
612 | 0 | QTC::TC("qpdf", "QPDF reserve array"); |
613 | 0 | for (auto const& item: foreign.as_array()) { |
614 | 0 | reserveObjects(item, obj_copier, false); |
615 | 0 | } |
616 | 0 | } else if (foreign_tc == ::ot_dictionary) { |
617 | 0 | QTC::TC("qpdf", "QPDF reserve dictionary"); |
618 | 0 | for (auto const& item: foreign.as_dictionary()) { |
619 | 0 | if (!item.second.null()) { |
620 | 0 | reserveObjects(item.second, obj_copier, false); |
621 | 0 | } |
622 | 0 | } |
623 | 0 | } else if (foreign_tc == ::ot_stream) { |
624 | 0 | QTC::TC("qpdf", "QPDF reserve stream"); |
625 | 0 | reserveObjects(foreign.getDict(), obj_copier, false); |
626 | 0 | } |
627 | |
|
628 | 0 | obj_copier.visiting.erase(foreign); |
629 | 0 | } |
630 | | |
631 | | QPDFObjectHandle |
632 | | QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top) |
633 | 0 | { |
634 | 0 | auto foreign_tc = foreign.getTypeCode(); |
635 | 0 | QPDFObjectHandle result; |
636 | 0 | if ((!top) && foreign.isIndirect()) { |
637 | 0 | QTC::TC("qpdf", "QPDF replace indirect"); |
638 | 0 | auto mapping = obj_copier.object_map.find(foreign.getObjGen()); |
639 | 0 | if (mapping == obj_copier.object_map.end()) { |
640 | | // This case would occur if this is a reference to a Pages object that we didn't |
641 | | // traverse into. |
642 | 0 | QTC::TC("qpdf", "QPDF replace foreign indirect with null"); |
643 | 0 | result = QPDFObjectHandle::newNull(); |
644 | 0 | } else { |
645 | 0 | result = mapping->second; |
646 | 0 | } |
647 | 0 | } else if (foreign_tc == ::ot_array) { |
648 | 0 | QTC::TC("qpdf", "QPDF replace array"); |
649 | 0 | result = QPDFObjectHandle::newArray(); |
650 | 0 | for (auto const& item: foreign.as_array()) { |
651 | 0 | result.appendItem(replaceForeignIndirectObjects(item, obj_copier, false)); |
652 | 0 | } |
653 | 0 | } else if (foreign_tc == ::ot_dictionary) { |
654 | 0 | QTC::TC("qpdf", "QPDF replace dictionary"); |
655 | 0 | result = QPDFObjectHandle::newDictionary(); |
656 | 0 | for (auto const& [key, value]: foreign.as_dictionary()) { |
657 | 0 | if (!value.null()) { |
658 | 0 | result.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false)); |
659 | 0 | } |
660 | 0 | } |
661 | 0 | } else if (foreign_tc == ::ot_stream) { |
662 | 0 | QTC::TC("qpdf", "QPDF replace stream"); |
663 | 0 | result = obj_copier.object_map[foreign.getObjGen()]; |
664 | 0 | QPDFObjectHandle dict = result.getDict(); |
665 | 0 | QPDFObjectHandle old_dict = foreign.getDict(); |
666 | 0 | for (auto const& [key, value]: old_dict.as_dictionary()) { |
667 | 0 | if (!value.null()) { |
668 | 0 | dict.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false)); |
669 | 0 | } |
670 | 0 | } |
671 | 0 | copyStreamData(result, foreign); |
672 | 0 | } else { |
673 | 0 | foreign.assertScalar(); |
674 | 0 | result = foreign; |
675 | 0 | result.makeDirect(); |
676 | 0 | } |
677 | |
|
678 | 0 | if (top && (!result.isStream()) && result.isIndirect()) { |
679 | 0 | throw std::logic_error("replacement for foreign object is indirect"); |
680 | 0 | } |
681 | | |
682 | 0 | return result; |
683 | 0 | } |
684 | | |
685 | | void |
686 | | QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) |
687 | 0 | { |
688 | | // This method was originally written for copying foreign streams, but it is used by |
689 | | // QPDFObjectHandle to copy streams from the same QPDF object as well. |
690 | |
|
691 | 0 | QPDFObjectHandle dict = result.getDict(); |
692 | 0 | QPDFObjectHandle old_dict = foreign.getDict(); |
693 | 0 | if (m->copied_stream_data_provider == nullptr) { |
694 | 0 | m->copied_stream_data_provider = new CopiedStreamDataProvider(*this); |
695 | 0 | m->copied_streams = |
696 | 0 | std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider); |
697 | 0 | } |
698 | 0 | QPDFObjGen local_og(result.getObjGen()); |
699 | | // Copy information from the foreign stream so we can pipe its data later without keeping the |
700 | | // original QPDF object around. |
701 | |
|
702 | 0 | QPDF& foreign_stream_qpdf = |
703 | 0 | foreign.getQPDF("unable to retrieve owning qpdf from foreign stream"); |
704 | |
|
705 | 0 | auto stream = foreign.as_stream(); |
706 | 0 | if (!stream) { |
707 | 0 | throw std::logic_error("unable to retrieve underlying stream object from foreign stream"); |
708 | 0 | } |
709 | 0 | std::shared_ptr<Buffer> stream_buffer = stream.getStreamDataBuffer(); |
710 | 0 | if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) { |
711 | | // Pull the stream data into a buffer before attempting the copy operation. Do it on the |
712 | | // source stream so that if the source stream is copied multiple times, we don't have to |
713 | | // keep duplicating the memory. |
714 | 0 | QTC::TC("qpdf", "QPDF immediate copy stream data"); |
715 | 0 | foreign.replaceStreamData( |
716 | 0 | foreign.getRawStreamData(), |
717 | 0 | old_dict.getKey("/Filter"), |
718 | 0 | old_dict.getKey("/DecodeParms")); |
719 | 0 | stream_buffer = stream.getStreamDataBuffer(); |
720 | 0 | } |
721 | 0 | std::shared_ptr<QPDFObjectHandle::StreamDataProvider> stream_provider = |
722 | 0 | stream.getStreamDataProvider(); |
723 | 0 | if (stream_buffer.get()) { |
724 | 0 | QTC::TC("qpdf", "QPDF copy foreign stream with buffer"); |
725 | 0 | result.replaceStreamData( |
726 | 0 | stream_buffer, dict.getKey("/Filter"), dict.getKey("/DecodeParms")); |
727 | 0 | } else if (stream_provider.get()) { |
728 | | // In this case, the remote stream's QPDF must stay in scope. |
729 | 0 | QTC::TC("qpdf", "QPDF copy foreign stream with provider"); |
730 | 0 | m->copied_stream_data_provider->registerForeignStream(local_og, foreign); |
731 | 0 | result.replaceStreamData( |
732 | 0 | m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms")); |
733 | 0 | } else { |
734 | 0 | auto foreign_stream_data = std::make_shared<ForeignStreamData>( |
735 | 0 | foreign_stream_qpdf.m->encp, |
736 | 0 | foreign_stream_qpdf.m->file, |
737 | 0 | foreign, |
738 | 0 | foreign.getParsedOffset(), |
739 | 0 | stream.getLength(), |
740 | 0 | dict, |
741 | 0 | stream.isRootMetadata()); |
742 | 0 | m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data); |
743 | 0 | result.replaceStreamData( |
744 | 0 | m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms")); |
745 | 0 | } |
746 | 0 | } |
747 | | |
748 | | unsigned long long |
749 | | QPDF::getUniqueId() const |
750 | 0 | { |
751 | 0 | return m->unique_id; |
752 | 0 | } |
753 | | |
754 | | std::string |
755 | | QPDF::getFilename() const |
756 | 65.3k | { |
757 | 65.3k | return m->file->getName(); |
758 | 65.3k | } |
759 | | |
760 | | PDFVersion |
761 | | QPDF::getVersionAsPDFVersion() |
762 | 0 | { |
763 | 0 | int major = 1; |
764 | 0 | int minor = 3; |
765 | 0 | int extension_level = getExtensionLevel(); |
766 | |
|
767 | 0 | std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)"); |
768 | 0 | std::smatch match; |
769 | 0 | if (std::regex_search(m->pdf_version, match, v)) { |
770 | 0 | major = QUtil::string_to_int(match[1].str().c_str()); |
771 | 0 | minor = QUtil::string_to_int(match[2].str().c_str()); |
772 | 0 | } |
773 | |
|
774 | 0 | return {major, minor, extension_level}; |
775 | 0 | } |
776 | | |
777 | | std::string |
778 | | QPDF::getPDFVersion() const |
779 | 8.15k | { |
780 | 8.15k | return m->pdf_version; |
781 | 8.15k | } |
782 | | |
783 | | int |
784 | | QPDF::getExtensionLevel() |
785 | 8.15k | { |
786 | 8.15k | int result = 0; |
787 | 8.15k | QPDFObjectHandle obj = getRoot(); |
788 | 8.15k | if (obj.hasKey("/Extensions")) { |
789 | 362 | obj = obj.getKey("/Extensions"); |
790 | 362 | if (obj.isDictionary() && obj.hasKey("/ADBE")) { |
791 | 231 | obj = obj.getKey("/ADBE"); |
792 | 231 | if (obj.isDictionary() && obj.hasKey("/ExtensionLevel")) { |
793 | 48 | obj = obj.getKey("/ExtensionLevel"); |
794 | 48 | if (obj.isInteger()) { |
795 | 45 | result = obj.getIntValueAsInt(); |
796 | 45 | } |
797 | 48 | } |
798 | 231 | } |
799 | 362 | } |
800 | 8.15k | return result; |
801 | 8.15k | } |
802 | | |
803 | | QPDFObjectHandle |
804 | | QPDF::getTrailer() |
805 | 32.3k | { |
806 | 32.3k | return m->trailer; |
807 | 32.3k | } |
808 | | |
809 | | QPDFObjectHandle |
810 | | QPDF::getRoot() |
811 | 61.4k | { |
812 | 61.4k | QPDFObjectHandle root = m->trailer.getKey("/Root"); |
813 | 61.4k | if (!root.isDictionary()) { |
814 | 5.12k | throw damagedPDF("", -1, "unable to find /Root dictionary"); |
815 | 56.3k | } else if ( |
816 | | // Check_mode is an interim solution to request #810 pending a more comprehensive review of |
817 | | // the approach to more extensive checks and warning levels. |
818 | 56.3k | m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) { |
819 | 0 | warn(damagedPDF("", -1, "catalog /Type entry missing or invalid")); |
820 | 0 | root.replaceKey("/Type", "/Catalog"_qpdf); |
821 | 0 | } |
822 | 56.3k | return root; |
823 | 61.4k | } |
824 | | |
825 | | std::map<QPDFObjGen, QPDFXRefEntry> |
826 | | QPDF::getXRefTable() |
827 | 0 | { |
828 | 0 | return getXRefTableInternal(); |
829 | 0 | } |
830 | | |
831 | | std::map<QPDFObjGen, QPDFXRefEntry> const& |
832 | | QPDF::getXRefTableInternal() |
833 | 0 | { |
834 | 0 | if (!m->parsed) { |
835 | 0 | throw std::logic_error("QPDF::getXRefTable called before parsing."); |
836 | 0 | } |
837 | | |
838 | 0 | return m->xref_table; |
839 | 0 | } |
840 | | |
841 | | bool |
842 | | QPDF::pipeStreamData( |
843 | | std::shared_ptr<EncryptionParameters> encp, |
844 | | std::shared_ptr<InputSource> file, |
845 | | QPDF& qpdf_for_warning, |
846 | | QPDFObjGen og, |
847 | | qpdf_offset_t offset, |
848 | | size_t length, |
849 | | QPDFObjectHandle stream_dict, |
850 | | bool is_root_metadata, |
851 | | Pipeline* pipeline, |
852 | | bool suppress_warnings, |
853 | | bool will_retry) |
854 | 37.8k | { |
855 | 37.8k | std::unique_ptr<Pipeline> to_delete; |
856 | 37.8k | if (encp->encrypted) { |
857 | 4.24k | decryptStream( |
858 | 4.24k | encp, file, qpdf_for_warning, pipeline, og, stream_dict, is_root_metadata, to_delete); |
859 | 4.24k | } |
860 | | |
861 | 37.8k | bool attempted_finish = false; |
862 | 37.8k | try { |
863 | 37.8k | auto buf = file->read(length, offset); |
864 | 37.8k | if (buf.size() != length) { |
865 | 0 | throw damagedPDF( |
866 | 0 | *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data"); |
867 | 0 | } |
868 | 37.8k | pipeline->write(buf.data(), length); |
869 | 37.8k | attempted_finish = true; |
870 | 37.8k | pipeline->finish(); |
871 | 37.8k | return true; |
872 | 37.8k | } catch (QPDFExc& e) { |
873 | 0 | if (!suppress_warnings) { |
874 | 0 | qpdf_for_warning.warn(e); |
875 | 0 | } |
876 | 9.96k | } catch (std::exception& e) { |
877 | 9.96k | if (!suppress_warnings) { |
878 | 9.96k | QTC::TC("qpdf", "QPDF decoding error warning"); |
879 | 9.96k | qpdf_for_warning.warn( |
880 | | // line-break |
881 | 9.96k | damagedPDF( |
882 | 9.96k | *file, |
883 | 9.96k | "", |
884 | 9.96k | file->getLastOffset(), |
885 | 9.96k | ("error decoding stream data for object " + og.unparse(' ') + ": " + |
886 | 9.96k | e.what()))); |
887 | 9.96k | if (will_retry) { |
888 | 8.49k | qpdf_for_warning.warn( |
889 | | // line-break |
890 | 8.49k | damagedPDF( |
891 | 8.49k | *file, |
892 | 8.49k | "", |
893 | 8.49k | file->getLastOffset(), |
894 | 8.49k | "stream will be re-processed without filtering to avoid data loss")); |
895 | 8.49k | } |
896 | 9.96k | } |
897 | 9.96k | } |
898 | 9.90k | if (!attempted_finish) { |
899 | 4.54k | try { |
900 | 4.54k | pipeline->finish(); |
901 | 4.54k | } catch (std::exception&) { |
902 | | // ignore |
903 | 2.60k | } |
904 | 4.54k | } |
905 | 9.90k | return false; |
906 | 9.90k | } |
907 | | |
908 | | bool |
909 | | QPDF::pipeStreamData( |
910 | | QPDFObjGen og, |
911 | | qpdf_offset_t offset, |
912 | | size_t length, |
913 | | QPDFObjectHandle stream_dict, |
914 | | bool is_root_metadata, |
915 | | Pipeline* pipeline, |
916 | | bool suppress_warnings, |
917 | | bool will_retry) |
918 | 37.8k | { |
919 | 37.8k | return pipeStreamData( |
920 | 37.8k | m->encp, |
921 | 37.8k | m->file, |
922 | 37.8k | *this, |
923 | 37.8k | og, |
924 | 37.8k | offset, |
925 | 37.8k | length, |
926 | 37.8k | stream_dict, |
927 | 37.8k | is_root_metadata, |
928 | 37.8k | pipeline, |
929 | 37.8k | suppress_warnings, |
930 | 37.8k | will_retry); |
931 | 37.8k | } |
932 | | |
933 | | bool |
934 | | QPDF::pipeForeignStreamData( |
935 | | std::shared_ptr<ForeignStreamData> foreign, |
936 | | Pipeline* pipeline, |
937 | | bool suppress_warnings, |
938 | | bool will_retry) |
939 | 0 | { |
940 | 0 | if (foreign->encp->encrypted) { |
941 | 0 | QTC::TC("qpdf", "QPDF pipe foreign encrypted stream"); |
942 | 0 | } |
943 | 0 | return pipeStreamData( |
944 | 0 | foreign->encp, |
945 | 0 | foreign->file, |
946 | 0 | *this, |
947 | 0 | foreign->foreign_og, |
948 | 0 | foreign->offset, |
949 | 0 | foreign->length, |
950 | 0 | foreign->local_dict, |
951 | 0 | foreign->is_root_metadata, |
952 | 0 | pipeline, |
953 | 0 | suppress_warnings, |
954 | 0 | will_retry); |
955 | 0 | } |
956 | | |
957 | | // Throw a generic exception when we lack context for something more specific. New code should not |
958 | | // use this. This method exists to improve somewhat from calling assert in very old code. |
959 | | void |
960 | | QPDF::stopOnError(std::string const& message) |
961 | 16.5k | { |
962 | 16.5k | throw damagedPDF("", message); |
963 | 16.5k | } |
964 | | |
965 | | // Return an exception of type qpdf_e_damaged_pdf. |
966 | | QPDFExc |
967 | | QPDF::damagedPDF( |
968 | | InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message) |
969 | 70.9k | { |
970 | 70.9k | return {qpdf_e_damaged_pdf, input.getName(), object, offset, message, true}; |
971 | 70.9k | } |
972 | | |
973 | | // Return an exception of type qpdf_e_damaged_pdf. The object is taken from |
974 | | // m->last_object_description. |
975 | | QPDFExc |
976 | | QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message) |
977 | 52.4k | { |
978 | 52.4k | return damagedPDF(input, m->last_object_description, offset, message); |
979 | 52.4k | } |
980 | | |
981 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file. |
982 | | QPDFExc |
983 | | QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message) |
984 | 270k | { |
985 | 270k | return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message, true}; |
986 | 270k | } |
987 | | |
988 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the |
989 | | // offset from .m->file->getLastOffset(). |
990 | | QPDFExc |
991 | | QPDF::damagedPDF(std::string const& object, std::string const& message) |
992 | 39.0k | { |
993 | 39.0k | return damagedPDF(object, m->file->getLastOffset(), message); |
994 | 39.0k | } |
995 | | |
996 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object |
997 | | // from .m->last_object_description. |
998 | | QPDFExc |
999 | | QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message) |
1000 | 68.5k | { |
1001 | 68.5k | return damagedPDF(m->last_object_description, offset, message); |
1002 | 68.5k | } |
1003 | | |
1004 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object |
1005 | | // from m->last_object_description and the offset from m->file->getLastOffset(). |
1006 | | QPDFExc |
1007 | | QPDF::damagedPDF(std::string const& message) |
1008 | 43.6k | { |
1009 | 43.6k | return damagedPDF(m->last_object_description, m->file->getLastOffset(), message); |
1010 | 43.6k | } |
1011 | | |
1012 | | bool |
1013 | | QPDF::everCalledGetAllPages() const |
1014 | 0 | { |
1015 | 0 | return m->ever_called_get_all_pages; |
1016 | 0 | } |
1017 | | |
1018 | | bool |
1019 | | QPDF::everPushedInheritedAttributesToPages() const |
1020 | 0 | { |
1021 | 0 | return m->ever_pushed_inherited_attributes_to_pages; |
1022 | 0 | } |
1023 | | |
1024 | | void |
1025 | | QPDF::removeSecurityRestrictions() |
1026 | 0 | { |
1027 | 0 | auto root = getRoot(); |
1028 | 0 | root.removeKey("/Perms"); |
1029 | 0 | auto acroform = root.getKey("/AcroForm"); |
1030 | 0 | if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) { |
1031 | 0 | acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0)); |
1032 | 0 | } |
1033 | 0 | } |