/src/qpdf/libqpdf/QPDF.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/qpdf-config.h> // include first for large file support |
2 | | |
3 | | #include <qpdf/QPDF_private.hh> |
4 | | |
5 | | #include <array> |
6 | | #include <atomic> |
7 | | #include <cstring> |
8 | | #include <limits> |
9 | | #include <map> |
10 | | #include <regex> |
11 | | #include <sstream> |
12 | | #include <vector> |
13 | | |
14 | | #include <qpdf/FileInputSource.hh> |
15 | | #include <qpdf/InputSource_private.hh> |
16 | | #include <qpdf/OffsetInputSource.hh> |
17 | | #include <qpdf/Pipeline.hh> |
18 | | #include <qpdf/QPDFExc.hh> |
19 | | #include <qpdf/QPDFLogger.hh> |
20 | | #include <qpdf/QPDFObjectHandle_private.hh> |
21 | | #include <qpdf/QPDFObject_private.hh> |
22 | | #include <qpdf/QPDFParser.hh> |
23 | | #include <qpdf/QTC.hh> |
24 | | #include <qpdf/QUtil.hh> |
25 | | #include <qpdf/Util.hh> |
26 | | |
27 | | using namespace qpdf; |
28 | | using namespace std::literals; |
29 | | |
30 | | // This must be a fixed value. This API returns a const reference to it, and the C API relies on its |
31 | | // being static as well. |
32 | | std::string const QPDF::qpdf_version(QPDF_VERSION); |
33 | | |
34 | | static char const* EMPTY_PDF = ( |
35 | | // force line break |
36 | | "%PDF-1.3\n" |
37 | | "1 0 obj\n" |
38 | | "<< /Type /Catalog /Pages 2 0 R >>\n" |
39 | | "endobj\n" |
40 | | "2 0 obj\n" |
41 | | "<< /Type /Pages /Kids [] /Count 0 >>\n" |
42 | | "endobj\n" |
43 | | "xref\n" |
44 | | "0 3\n" |
45 | | "0000000000 65535 f \n" |
46 | | "0000000009 00000 n \n" |
47 | | "0000000058 00000 n \n" |
48 | | "trailer << /Size 3 /Root 1 0 R >>\n" |
49 | | "startxref\n" |
50 | | "110\n" |
51 | | "%%EOF\n"); |
52 | | |
53 | | namespace |
54 | | { |
55 | | class InvalidInputSource: public InputSource |
56 | | { |
57 | | public: |
58 | | ~InvalidInputSource() override = default; |
59 | | qpdf_offset_t |
60 | | findAndSkipNextEOL() override |
61 | 0 | { |
62 | 0 | throwException(); |
63 | 0 | return 0; |
64 | 0 | } |
65 | | std::string const& |
66 | | getName() const override |
67 | 0 | { |
68 | 0 | static std::string name("closed input source"); |
69 | 0 | return name; |
70 | 0 | } |
71 | | qpdf_offset_t |
72 | | tell() override |
73 | 0 | { |
74 | 0 | throwException(); |
75 | 0 | return 0; |
76 | 0 | } |
77 | | void |
78 | | seek(qpdf_offset_t offset, int whence) override |
79 | 0 | { |
80 | 0 | throwException(); |
81 | 0 | } |
82 | | void |
83 | | rewind() override |
84 | 0 | { |
85 | 0 | throwException(); |
86 | 0 | } |
87 | | size_t |
88 | | read(char* buffer, size_t length) override |
89 | 0 | { |
90 | 0 | throwException(); |
91 | 0 | return 0; |
92 | 0 | } |
93 | | void |
94 | | unreadCh(char ch) override |
95 | 0 | { |
96 | 0 | throwException(); |
97 | 0 | } |
98 | | |
99 | | private: |
100 | | void |
101 | | throwException() |
102 | 0 | { |
103 | 0 | throw std::logic_error( |
104 | 0 | "QPDF operation attempted on a QPDF object with no input " |
105 | 0 | "source. QPDF operations are invalid before processFile (or " |
106 | 0 | "another process method) or after closeInputSource"); |
107 | 0 | } |
108 | | }; |
109 | | } // namespace |
110 | | |
111 | | QPDF::ForeignStreamData::ForeignStreamData( |
112 | | std::shared_ptr<EncryptionParameters> encp, |
113 | | std::shared_ptr<InputSource> file, |
114 | | QPDFObjGen foreign_og, |
115 | | qpdf_offset_t offset, |
116 | | size_t length, |
117 | | QPDFObjectHandle local_dict, |
118 | | bool is_root_metadata) : |
119 | 0 | encp(encp), |
120 | 0 | file(file), |
121 | 0 | foreign_og(foreign_og), |
122 | 0 | offset(offset), |
123 | 0 | length(length), |
124 | 0 | local_dict(local_dict), |
125 | 0 | is_root_metadata(is_root_metadata) |
126 | 0 | { |
127 | 0 | } |
128 | | |
129 | | QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(QPDF& destination_qpdf) : |
130 | 0 | QPDFObjectHandle::StreamDataProvider(true), |
131 | 0 | destination_qpdf(destination_qpdf) |
132 | 0 | { |
133 | 0 | } |
134 | | |
135 | | bool |
136 | | QPDF::CopiedStreamDataProvider::provideStreamData( |
137 | | QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) |
138 | 0 | { |
139 | 0 | std::shared_ptr<ForeignStreamData> foreign_data = foreign_stream_data[og]; |
140 | 0 | bool result = false; |
141 | 0 | if (foreign_data.get()) { |
142 | 0 | result = destination_qpdf.pipeForeignStreamData( |
143 | 0 | foreign_data, pipeline, suppress_warnings, will_retry); |
144 | 0 | QTC::TC("qpdf", "QPDF copy foreign with data", result ? 0 : 1); |
145 | 0 | } else { |
146 | 0 | auto foreign_stream = foreign_streams[og]; |
147 | 0 | result = foreign_stream.pipeStreamData( |
148 | 0 | pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry); |
149 | 0 | QTC::TC("qpdf", "QPDF copy foreign with foreign_stream", result ? 0 : 1); |
150 | 0 | } |
151 | 0 | return result; |
152 | 0 | } |
153 | | |
154 | | void |
155 | | QPDF::CopiedStreamDataProvider::registerForeignStream( |
156 | | QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream) |
157 | 0 | { |
158 | 0 | this->foreign_streams[local_og] = foreign_stream; |
159 | 0 | } |
160 | | |
161 | | void |
162 | | QPDF::CopiedStreamDataProvider::registerForeignStream( |
163 | | QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData> foreign_stream) |
164 | 0 | { |
165 | 0 | this->foreign_stream_data[local_og] = foreign_stream; |
166 | 0 | } |
167 | | |
168 | | QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) : |
169 | 117k | qpdf(qpdf), |
170 | 117k | og(og) |
171 | 117k | { |
172 | 117k | } |
173 | | |
174 | | std::string const& |
175 | | QPDF::QPDFVersion() |
176 | 0 | { |
177 | | // The C API relies on this being a static value. |
178 | 0 | return QPDF::qpdf_version; |
179 | 0 | } |
180 | | |
181 | | QPDF::Members::Members() : |
182 | 24.5k | log(QPDFLogger::defaultLogger()), |
183 | 24.5k | file(new InvalidInputSource()), |
184 | 24.5k | encp(new EncryptionParameters) |
185 | 24.5k | { |
186 | 24.5k | } |
187 | | |
188 | | QPDF::QPDF() : |
189 | 24.5k | m(std::make_unique<Members>()) |
190 | 24.5k | { |
191 | 24.5k | m->tokenizer.allowEOF(); |
192 | | // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout |
193 | | // the lifetime of this running application. |
194 | 24.5k | static std::atomic<unsigned long long> unique_id{0}; |
195 | 24.5k | m->unique_id = unique_id.fetch_add(1ULL); |
196 | 24.5k | } |
197 | | |
198 | | // Provide access to disconnect(). Disconnect will in due course be merged into the current ObjCache |
199 | | // (future Objects::Entry) to centralize all QPDF access to QPDFObject. |
200 | | class Disconnect: BaseHandle |
201 | | { |
202 | | public: |
203 | | Disconnect(std::shared_ptr<QPDFObject> const& obj) : |
204 | 397k | BaseHandle(obj) |
205 | 397k | { |
206 | 397k | } |
207 | | void |
208 | | disconnect() |
209 | 397k | { |
210 | 397k | BaseHandle::disconnect(false); |
211 | 397k | if (raw_type_code() != ::ot_null) { |
212 | 145k | obj->value = QPDF_Destroyed(); |
213 | 145k | } |
214 | 397k | } |
215 | | }; |
216 | | |
217 | | QPDF::~QPDF() |
218 | 24.5k | { |
219 | | // If two objects are mutually referential (through each object having an array or dictionary |
220 | | // that contains an indirect reference to the other), the circular references in the |
221 | | // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects |
222 | | // in the object cache, which is those objects that we read from the file, and break all |
223 | | // resolved indirect references by replacing them with an internal object type representing that |
224 | | // they have been destroyed. Note that we can't break references like this at any time when the |
225 | | // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that |
226 | | // are reachable from this object to release their association with this QPDF. Direct objects |
227 | | // are not destroyed since they can be moved to other QPDF objects safely. |
228 | | |
229 | | // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear |
230 | | // the xref table anyway just to prevent any possibility of resolve() succeeding. |
231 | 24.5k | m->xref_table.clear(); |
232 | 397k | for (auto const& iter: m->obj_cache) { |
233 | 397k | Disconnect(iter.second.object).disconnect(); |
234 | 397k | } |
235 | 24.5k | } |
236 | | |
237 | | std::shared_ptr<QPDF> |
238 | | QPDF::create() |
239 | 24.5k | { |
240 | 24.5k | return std::make_shared<QPDF>(); |
241 | 24.5k | } |
242 | | |
243 | | void |
244 | | QPDF::processFile(char const* filename, char const* password) |
245 | 0 | { |
246 | 0 | auto* fi = new FileInputSource(filename); |
247 | 0 | processInputSource(std::shared_ptr<InputSource>(fi), password); |
248 | 0 | } |
249 | | |
250 | | void |
251 | | QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password) |
252 | 0 | { |
253 | 0 | auto* fi = new FileInputSource(description, filep, close_file); |
254 | 0 | processInputSource(std::shared_ptr<InputSource>(fi), password); |
255 | 0 | } |
256 | | |
257 | | void |
258 | | QPDF::processMemoryFile( |
259 | | char const* description, char const* buf, size_t length, char const* password) |
260 | 0 | { |
261 | 0 | auto is = std::make_shared<is::OffsetBuffer>(description, std::string_view{buf, length}); |
262 | 0 | processInputSource(is, password); |
263 | 0 | } |
264 | | |
265 | | void |
266 | | QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password) |
267 | 24.5k | { |
268 | 24.5k | m->file = source; |
269 | 24.5k | parse(password); |
270 | 24.5k | } |
271 | | |
272 | | void |
273 | | QPDF::closeInputSource() |
274 | 0 | { |
275 | 0 | m->file = std::shared_ptr<InputSource>(new InvalidInputSource()); |
276 | 0 | } |
277 | | |
278 | | void |
279 | | QPDF::setPasswordIsHexKey(bool val) |
280 | 0 | { |
281 | 0 | m->provided_password_is_hex_key = val; |
282 | 0 | } |
283 | | |
284 | | void |
285 | | QPDF::emptyPDF() |
286 | 0 | { |
287 | 0 | processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF)); |
288 | 0 | } |
289 | | |
290 | | void |
291 | | QPDF::registerStreamFilter( |
292 | | std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory) |
293 | 0 | { |
294 | 0 | qpdf::Stream::registerStreamFilter(filter_name, factory); |
295 | 0 | } |
296 | | |
297 | | void |
298 | | QPDF::setIgnoreXRefStreams(bool val) |
299 | 0 | { |
300 | 0 | m->ignore_xref_streams = val; |
301 | 0 | } |
302 | | |
303 | | std::shared_ptr<QPDFLogger> |
304 | | QPDF::getLogger() |
305 | 0 | { |
306 | 0 | return m->log; |
307 | 0 | } |
308 | | |
309 | | void |
310 | | QPDF::setLogger(std::shared_ptr<QPDFLogger> l) |
311 | 0 | { |
312 | 0 | m->log = l; |
313 | 0 | } |
314 | | |
315 | | void |
316 | | QPDF::setOutputStreams(std::ostream* out, std::ostream* err) |
317 | 0 | { |
318 | 0 | setLogger(QPDFLogger::create()); |
319 | 0 | m->log->setOutputStreams(out, err); |
320 | 0 | } |
321 | | |
322 | | void |
323 | | QPDF::setSuppressWarnings(bool val) |
324 | 0 | { |
325 | 0 | m->suppress_warnings = val; |
326 | 0 | } |
327 | | |
328 | | void |
329 | | QPDF::setMaxWarnings(size_t val) |
330 | 24.5k | { |
331 | 24.5k | m->max_warnings = val; |
332 | 24.5k | } |
333 | | |
334 | | void |
335 | | QPDF::setAttemptRecovery(bool val) |
336 | 0 | { |
337 | 0 | m->attempt_recovery = val; |
338 | 0 | } |
339 | | |
340 | | void |
341 | | QPDF::setImmediateCopyFrom(bool val) |
342 | 0 | { |
343 | 0 | m->immediate_copy_from = val; |
344 | 0 | } |
345 | | |
346 | | std::vector<QPDFExc> |
347 | | QPDF::getWarnings() |
348 | 0 | { |
349 | 0 | std::vector<QPDFExc> result = m->warnings; |
350 | 0 | m->warnings.clear(); |
351 | 0 | return result; |
352 | 0 | } |
353 | | |
354 | | bool |
355 | | QPDF::anyWarnings() const |
356 | 0 | { |
357 | 0 | return !m->warnings.empty(); |
358 | 0 | } |
359 | | |
360 | | size_t |
361 | | QPDF::numWarnings() const |
362 | 0 | { |
363 | 0 | return m->warnings.size(); |
364 | 0 | } |
365 | | |
366 | | bool |
367 | | QPDF::validatePDFVersion(char const*& p, std::string& version) |
368 | 6.83k | { |
369 | 6.83k | if (!util::is_digit(*p)) { |
370 | 3.03k | return false; |
371 | 3.03k | } |
372 | 10.4k | while (util::is_digit(*p)) { |
373 | 6.66k | version.append(1, *p++); |
374 | 6.66k | } |
375 | 3.79k | if (!(*p == '.' && util::is_digit(*(p + 1)))) { |
376 | 1.10k | return false; |
377 | 1.10k | } |
378 | 2.69k | version.append(1, *p++); |
379 | 7.56k | while (util::is_digit(*p)) { |
380 | 4.87k | version.append(1, *p++); |
381 | 4.87k | } |
382 | 2.69k | return true; |
383 | 3.79k | } |
384 | | |
385 | | bool |
386 | | QPDF::findHeader() |
387 | 6.84k | { |
388 | 6.84k | qpdf_offset_t global_offset = m->file->tell(); |
389 | 6.84k | std::string line = m->file->readLine(1024); |
390 | 6.84k | char const* p = line.data(); |
391 | 6.84k | if (strncmp(p, "%PDF-", 5) != 0) { |
392 | 0 | throw std::logic_error("findHeader is not looking at %PDF-"); |
393 | 0 | } |
394 | 6.84k | p += 5; |
395 | 6.84k | std::string version; |
396 | | // Note: The string returned by line.data() is always null-terminated. The code below never |
397 | | // overruns the buffer because a null character always short-circuits further advancement. |
398 | 6.84k | if (!validatePDFVersion(p, version)) { |
399 | 4.14k | return false; |
400 | 4.14k | } |
401 | 2.70k | m->pdf_version = version; |
402 | 2.70k | if (global_offset != 0) { |
403 | | // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is |
404 | | // leading material prior to the PDF header, all explicit offsets in the file are such that |
405 | | // 0 points to the beginning of the header. |
406 | 1.50k | QTC::TC("qpdf", "QPDF global offset"); |
407 | 1.50k | m->file = std::make_shared<OffsetInputSource>(m->file, global_offset); |
408 | 1.50k | } |
409 | 2.70k | return true; |
410 | 6.84k | } |
411 | | |
412 | | void |
413 | | QPDF::warn(QPDFExc const& e) |
414 | 804k | { |
415 | 804k | if (m->max_warnings > 0 && m->warnings.size() >= m->max_warnings) { |
416 | 18.8k | stopOnError("Too many warnings - file is too badly damaged"); |
417 | 18.8k | } |
418 | 804k | m->warnings.push_back(e); |
419 | 804k | if (!m->suppress_warnings) { |
420 | 785k | *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n"; |
421 | 785k | } |
422 | 804k | } |
423 | | |
424 | | void |
425 | | QPDF::warn( |
426 | | qpdf_error_code_e error_code, |
427 | | std::string const& object, |
428 | | qpdf_offset_t offset, |
429 | | std::string const& message) |
430 | 5.56k | { |
431 | 5.56k | warn(QPDFExc(error_code, getFilename(), object, offset, message)); |
432 | 5.56k | } |
433 | | |
434 | | QPDFObjectHandle |
435 | | QPDF::newReserved() |
436 | 0 | { |
437 | 0 | return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Reserved>()); |
438 | 0 | } |
439 | | |
440 | | QPDFObjectHandle |
441 | | QPDF::newIndirectNull() |
442 | 0 | { |
443 | 0 | return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Null>()); |
444 | 0 | } |
445 | | |
446 | | QPDFObjectHandle |
447 | | QPDF::newStream() |
448 | 0 | { |
449 | 0 | return makeIndirectObject( |
450 | 0 | qpdf::Stream(*this, nextObjGen(), QPDFObjectHandle::newDictionary(), 0, 0)); |
451 | 0 | } |
452 | | |
453 | | QPDFObjectHandle |
454 | | QPDF::newStream(std::shared_ptr<Buffer> data) |
455 | 0 | { |
456 | 0 | auto result = newStream(); |
457 | 0 | result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); |
458 | 0 | return result; |
459 | 0 | } |
460 | | |
461 | | QPDFObjectHandle |
462 | | QPDF::newStream(std::string const& data) |
463 | 0 | { |
464 | 0 | auto result = newStream(); |
465 | 0 | result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); |
466 | 0 | return result; |
467 | 0 | } |
468 | | |
469 | | QPDFObjectHandle |
470 | | QPDF::getObject(int objid, int generation) |
471 | 26.5k | { |
472 | 26.5k | return getObject(QPDFObjGen(objid, generation)); |
473 | 26.5k | } |
474 | | |
475 | | QPDFObjectHandle |
476 | | QPDF::getObjectByObjGen(QPDFObjGen og) |
477 | 0 | { |
478 | 0 | return getObject(og); |
479 | 0 | } |
480 | | |
481 | | QPDFObjectHandle |
482 | | QPDF::getObjectByID(int objid, int generation) |
483 | 0 | { |
484 | 0 | return getObject(QPDFObjGen(objid, generation)); |
485 | 0 | } |
486 | | |
487 | | QPDFObjectHandle |
488 | | QPDF::copyForeignObject(QPDFObjectHandle foreign) |
489 | 0 | { |
490 | | // Here's an explanation of what's going on here. |
491 | | // |
492 | | // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and |
493 | | // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a |
494 | | // foreign QPDF into the local QPDF, we have to replace all indirect object references with |
495 | | // references to the corresponding object in the local file. |
496 | | // |
497 | | // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign |
498 | | // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a |
499 | | // mapping from the foreign ObjGen to the local QPDFObjectHandle. |
500 | | // |
501 | | // To copy, we do a deep traversal of the foreign object with loop detection to discover all |
502 | | // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an |
503 | | // indirect object, we check to see if we have already created a local copy of it. If not, we |
504 | | // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the |
505 | | // mapping from the foreign object ID to the new object. While we |
506 | | // do this, we keep a list of objects to copy. |
507 | | // |
508 | | // Once we are done with the traversal, we copy all the objects that we need to copy. However, |
509 | | // the copies will contain indirect object IDs that refer to objects in the foreign file. We |
510 | | // need to replace them with references to objects in the local file. This is what |
511 | | // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with |
512 | | // all the indirect references replaced with new ones in the local context, we can replace the |
513 | | // local reserved object with the copy. This mechanism allows us to copy objects with circular |
514 | | // references in any order. |
515 | | |
516 | | // For streams, rather than copying the objects, we set up the stream data to pull from the |
517 | | // original stream by using a stream data provider. This is done in a manner that doesn't |
518 | | // require the original QPDF object but may require the original source of the stream data with |
519 | | // special handling for immediate_copy_from. This logic is also in |
520 | | // replaceForeignIndirectObjects. |
521 | | |
522 | | // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented |
523 | | // use case to copy pages this way if the intention is to not update the pages tree. |
524 | 0 | if (!foreign.isIndirect()) { |
525 | 0 | QTC::TC("qpdf", "QPDF copyForeign direct"); |
526 | 0 | throw std::logic_error("QPDF::copyForeign called with direct object handle"); |
527 | 0 | } |
528 | 0 | QPDF& other = foreign.getQPDF(); |
529 | 0 | if (&other == this) { |
530 | 0 | QTC::TC("qpdf", "QPDF copyForeign not foreign"); |
531 | 0 | throw std::logic_error("QPDF::copyForeign called with object from this QPDF"); |
532 | 0 | } |
533 | | |
534 | 0 | ObjCopier& obj_copier = m->object_copiers[other.m->unique_id]; |
535 | 0 | if (!obj_copier.visiting.empty()) { |
536 | 0 | throw std::logic_error( |
537 | 0 | "obj_copier.visiting is not empty at the beginning of copyForeignObject"); |
538 | 0 | } |
539 | | |
540 | | // Make sure we have an object in this file for every referenced object in the old file. |
541 | | // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we |
542 | | // have to copy, the local object will be a reservation, unless it is a stream, in which case |
543 | | // the local object will already be a stream. |
544 | 0 | reserveObjects(foreign, obj_copier, true); |
545 | |
|
546 | 0 | if (!obj_copier.visiting.empty()) { |
547 | 0 | throw std::logic_error("obj_copier.visiting is not empty after reserving objects"); |
548 | 0 | } |
549 | | |
550 | | // Copy any new objects and replace the reservations. |
551 | 0 | for (auto& to_copy: obj_copier.to_copy) { |
552 | 0 | QPDFObjectHandle copy = replaceForeignIndirectObjects(to_copy, obj_copier, true); |
553 | 0 | if (!to_copy.isStream()) { |
554 | 0 | QPDFObjGen og(to_copy.getObjGen()); |
555 | 0 | replaceReserved(obj_copier.object_map[og], copy); |
556 | 0 | } |
557 | 0 | } |
558 | 0 | obj_copier.to_copy.clear(); |
559 | |
|
560 | 0 | auto og = foreign.getObjGen(); |
561 | 0 | if (!obj_copier.object_map.contains(og)) { |
562 | 0 | warn(damagedPDF( |
563 | 0 | other.getFilename() + " object " + og.unparse(' '), |
564 | 0 | foreign.getParsedOffset(), |
565 | 0 | "unexpected reference to /Pages object while copying foreign object; replacing with " |
566 | 0 | "null")); |
567 | 0 | return QPDFObjectHandle::newNull(); |
568 | 0 | } |
569 | 0 | return obj_copier.object_map[foreign.getObjGen()]; |
570 | 0 | } |
571 | | |
572 | | void |
573 | | QPDF::reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top) |
574 | 0 | { |
575 | 0 | auto foreign_tc = foreign.getTypeCode(); |
576 | 0 | if (foreign_tc == ::ot_reserved) { |
577 | 0 | throw std::logic_error("QPDF: attempting to copy a foreign reserved object"); |
578 | 0 | } |
579 | | |
580 | 0 | if (foreign.isPagesObject()) { |
581 | 0 | QTC::TC("qpdf", "QPDF not copying pages object"); |
582 | 0 | return; |
583 | 0 | } |
584 | | |
585 | 0 | if (foreign.isIndirect()) { |
586 | 0 | QPDFObjGen foreign_og(foreign.getObjGen()); |
587 | 0 | if (!obj_copier.visiting.add(foreign_og)) { |
588 | 0 | QTC::TC("qpdf", "QPDF loop reserving objects"); |
589 | 0 | return; |
590 | 0 | } |
591 | 0 | if (obj_copier.object_map.contains(foreign_og)) { |
592 | 0 | QTC::TC("qpdf", "QPDF already reserved object"); |
593 | 0 | if (!(top && foreign.isPageObject() && obj_copier.object_map[foreign_og].isNull())) { |
594 | 0 | obj_copier.visiting.erase(foreign); |
595 | 0 | return; |
596 | 0 | } |
597 | 0 | } else { |
598 | 0 | QTC::TC("qpdf", "QPDF copy indirect"); |
599 | 0 | obj_copier.object_map[foreign_og] = |
600 | 0 | foreign.isStream() ? newStream() : newIndirectNull(); |
601 | 0 | if ((!top) && foreign.isPageObject()) { |
602 | 0 | QTC::TC("qpdf", "QPDF not crossing page boundary"); |
603 | 0 | obj_copier.visiting.erase(foreign_og); |
604 | 0 | return; |
605 | 0 | } |
606 | 0 | } |
607 | 0 | obj_copier.to_copy.push_back(foreign); |
608 | 0 | } |
609 | | |
610 | 0 | if (foreign_tc == ::ot_array) { |
611 | 0 | QTC::TC("qpdf", "QPDF reserve array"); |
612 | 0 | for (auto const& item: foreign.as_array()) { |
613 | 0 | reserveObjects(item, obj_copier, false); |
614 | 0 | } |
615 | 0 | } else if (foreign_tc == ::ot_dictionary) { |
616 | 0 | QTC::TC("qpdf", "QPDF reserve dictionary"); |
617 | 0 | for (auto const& item: foreign.as_dictionary()) { |
618 | 0 | if (!item.second.null()) { |
619 | 0 | reserveObjects(item.second, obj_copier, false); |
620 | 0 | } |
621 | 0 | } |
622 | 0 | } else if (foreign_tc == ::ot_stream) { |
623 | 0 | QTC::TC("qpdf", "QPDF reserve stream"); |
624 | 0 | reserveObjects(foreign.getDict(), obj_copier, false); |
625 | 0 | } |
626 | |
|
627 | 0 | obj_copier.visiting.erase(foreign); |
628 | 0 | } |
629 | | |
630 | | QPDFObjectHandle |
631 | | QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top) |
632 | 0 | { |
633 | 0 | auto foreign_tc = foreign.getTypeCode(); |
634 | 0 | QPDFObjectHandle result; |
635 | 0 | if ((!top) && foreign.isIndirect()) { |
636 | 0 | QTC::TC("qpdf", "QPDF replace indirect"); |
637 | 0 | auto mapping = obj_copier.object_map.find(foreign.getObjGen()); |
638 | 0 | if (mapping == obj_copier.object_map.end()) { |
639 | | // This case would occur if this is a reference to a Pages object that we didn't |
640 | | // traverse into. |
641 | 0 | QTC::TC("qpdf", "QPDF replace foreign indirect with null"); |
642 | 0 | result = QPDFObjectHandle::newNull(); |
643 | 0 | } else { |
644 | 0 | result = mapping->second; |
645 | 0 | } |
646 | 0 | } else if (foreign_tc == ::ot_array) { |
647 | 0 | QTC::TC("qpdf", "QPDF replace array"); |
648 | 0 | result = QPDFObjectHandle::newArray(); |
649 | 0 | for (auto const& item: foreign.as_array()) { |
650 | 0 | result.appendItem(replaceForeignIndirectObjects(item, obj_copier, false)); |
651 | 0 | } |
652 | 0 | } else if (foreign_tc == ::ot_dictionary) { |
653 | 0 | QTC::TC("qpdf", "QPDF replace dictionary"); |
654 | 0 | result = QPDFObjectHandle::newDictionary(); |
655 | 0 | for (auto const& [key, value]: foreign.as_dictionary()) { |
656 | 0 | if (!value.null()) { |
657 | 0 | result.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false)); |
658 | 0 | } |
659 | 0 | } |
660 | 0 | } else if (foreign_tc == ::ot_stream) { |
661 | 0 | QTC::TC("qpdf", "QPDF replace stream"); |
662 | 0 | result = obj_copier.object_map[foreign.getObjGen()]; |
663 | 0 | QPDFObjectHandle dict = result.getDict(); |
664 | 0 | QPDFObjectHandle old_dict = foreign.getDict(); |
665 | 0 | for (auto const& [key, value]: old_dict.as_dictionary()) { |
666 | 0 | if (!value.null()) { |
667 | 0 | dict.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false)); |
668 | 0 | } |
669 | 0 | } |
670 | 0 | copyStreamData(result, foreign); |
671 | 0 | } else { |
672 | 0 | foreign.assertScalar(); |
673 | 0 | result = foreign; |
674 | 0 | result.makeDirect(); |
675 | 0 | } |
676 | |
|
677 | 0 | if (top && (!result.isStream()) && result.isIndirect()) { |
678 | 0 | throw std::logic_error("replacement for foreign object is indirect"); |
679 | 0 | } |
680 | | |
681 | 0 | return result; |
682 | 0 | } |
683 | | |
684 | | void |
685 | | QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) |
686 | 0 | { |
687 | | // This method was originally written for copying foreign streams, but it is used by |
688 | | // QPDFObjectHandle to copy streams from the same QPDF object as well. |
689 | |
|
690 | 0 | QPDFObjectHandle dict = result.getDict(); |
691 | 0 | QPDFObjectHandle old_dict = foreign.getDict(); |
692 | 0 | if (m->copied_stream_data_provider == nullptr) { |
693 | 0 | m->copied_stream_data_provider = new CopiedStreamDataProvider(*this); |
694 | 0 | m->copied_streams = |
695 | 0 | std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider); |
696 | 0 | } |
697 | 0 | QPDFObjGen local_og(result.getObjGen()); |
698 | | // Copy information from the foreign stream so we can pipe its data later without keeping the |
699 | | // original QPDF object around. |
700 | |
|
701 | 0 | QPDF& foreign_stream_qpdf = |
702 | 0 | foreign.getQPDF("unable to retrieve owning qpdf from foreign stream"); |
703 | |
|
704 | 0 | auto stream = foreign.as_stream(); |
705 | 0 | if (!stream) { |
706 | 0 | throw std::logic_error("unable to retrieve underlying stream object from foreign stream"); |
707 | 0 | } |
708 | 0 | std::shared_ptr<Buffer> stream_buffer = stream.getStreamDataBuffer(); |
709 | 0 | if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) { |
710 | | // Pull the stream data into a buffer before attempting the copy operation. Do it on the |
711 | | // source stream so that if the source stream is copied multiple times, we don't have to |
712 | | // keep duplicating the memory. |
713 | 0 | QTC::TC("qpdf", "QPDF immediate copy stream data"); |
714 | 0 | foreign.replaceStreamData( |
715 | 0 | foreign.getRawStreamData(), |
716 | 0 | old_dict.getKey("/Filter"), |
717 | 0 | old_dict.getKey("/DecodeParms")); |
718 | 0 | stream_buffer = stream.getStreamDataBuffer(); |
719 | 0 | } |
720 | 0 | std::shared_ptr<QPDFObjectHandle::StreamDataProvider> stream_provider = |
721 | 0 | stream.getStreamDataProvider(); |
722 | 0 | if (stream_buffer.get()) { |
723 | 0 | QTC::TC("qpdf", "QPDF copy foreign stream with buffer"); |
724 | 0 | result.replaceStreamData( |
725 | 0 | stream_buffer, dict.getKey("/Filter"), dict.getKey("/DecodeParms")); |
726 | 0 | } else if (stream_provider.get()) { |
727 | | // In this case, the remote stream's QPDF must stay in scope. |
728 | 0 | QTC::TC("qpdf", "QPDF copy foreign stream with provider"); |
729 | 0 | m->copied_stream_data_provider->registerForeignStream(local_og, foreign); |
730 | 0 | result.replaceStreamData( |
731 | 0 | m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms")); |
732 | 0 | } else { |
733 | 0 | auto foreign_stream_data = std::make_shared<ForeignStreamData>( |
734 | 0 | foreign_stream_qpdf.m->encp, |
735 | 0 | foreign_stream_qpdf.m->file, |
736 | 0 | foreign, |
737 | 0 | foreign.getParsedOffset(), |
738 | 0 | stream.getLength(), |
739 | 0 | dict, |
740 | 0 | stream.isRootMetadata()); |
741 | 0 | m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data); |
742 | 0 | result.replaceStreamData( |
743 | 0 | m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms")); |
744 | 0 | } |
745 | 0 | } |
746 | | |
747 | | unsigned long long |
748 | | QPDF::getUniqueId() const |
749 | 0 | { |
750 | 0 | return m->unique_id; |
751 | 0 | } |
752 | | |
753 | | std::string |
754 | | QPDF::getFilename() const |
755 | 71.0k | { |
756 | 71.0k | return m->file->getName(); |
757 | 71.0k | } |
758 | | |
759 | | PDFVersion |
760 | | QPDF::getVersionAsPDFVersion() |
761 | 0 | { |
762 | 0 | int major = 1; |
763 | 0 | int minor = 3; |
764 | 0 | int extension_level = getExtensionLevel(); |
765 | |
|
766 | 0 | std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)"); |
767 | 0 | std::smatch match; |
768 | 0 | if (std::regex_search(m->pdf_version, match, v)) { |
769 | 0 | major = QUtil::string_to_int(match[1].str().c_str()); |
770 | 0 | minor = QUtil::string_to_int(match[2].str().c_str()); |
771 | 0 | } |
772 | |
|
773 | 0 | return {major, minor, extension_level}; |
774 | 0 | } |
775 | | |
776 | | std::string |
777 | | QPDF::getPDFVersion() const |
778 | 9.32k | { |
779 | 9.32k | return m->pdf_version; |
780 | 9.32k | } |
781 | | |
782 | | int |
783 | | QPDF::getExtensionLevel() |
784 | 9.32k | { |
785 | 9.32k | int result = 0; |
786 | 9.32k | QPDFObjectHandle obj = getRoot(); |
787 | 9.32k | if (obj.hasKey("/Extensions")) { |
788 | 385 | obj = obj.getKey("/Extensions"); |
789 | 385 | if (obj.isDictionary() && obj.hasKey("/ADBE")) { |
790 | 283 | obj = obj.getKey("/ADBE"); |
791 | 283 | if (obj.isDictionary() && obj.hasKey("/ExtensionLevel")) { |
792 | 21 | obj = obj.getKey("/ExtensionLevel"); |
793 | 21 | if (obj.isInteger()) { |
794 | 20 | result = obj.getIntValueAsInt(); |
795 | 20 | } |
796 | 21 | } |
797 | 283 | } |
798 | 385 | } |
799 | 9.32k | return result; |
800 | 9.32k | } |
801 | | |
802 | | QPDFObjectHandle |
803 | | QPDF::getTrailer() |
804 | 36.4k | { |
805 | 36.4k | return m->trailer; |
806 | 36.4k | } |
807 | | |
808 | | QPDFObjectHandle |
809 | | QPDF::getRoot() |
810 | 66.2k | { |
811 | 66.2k | QPDFObjectHandle root = m->trailer.getKey("/Root"); |
812 | 66.2k | if (!root.isDictionary()) { |
813 | 5.15k | throw damagedPDF("", -1, "unable to find /Root dictionary"); |
814 | 61.0k | } else if ( |
815 | | // Check_mode is an interim solution to request #810 pending a more comprehensive review of |
816 | | // the approach to more extensive checks and warning levels. |
817 | 61.0k | m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) { |
818 | 0 | warn(damagedPDF("", -1, "catalog /Type entry missing or invalid")); |
819 | 0 | root.replaceKey("/Type", "/Catalog"_qpdf); |
820 | 0 | } |
821 | 61.0k | return root; |
822 | 66.2k | } |
823 | | |
824 | | std::map<QPDFObjGen, QPDFXRefEntry> |
825 | | QPDF::getXRefTable() |
826 | 0 | { |
827 | 0 | return getXRefTableInternal(); |
828 | 0 | } |
829 | | |
830 | | std::map<QPDFObjGen, QPDFXRefEntry> const& |
831 | | QPDF::getXRefTableInternal() |
832 | 9.34k | { |
833 | 9.34k | if (!m->parsed) { |
834 | 0 | throw std::logic_error("QPDF::getXRefTable called before parsing."); |
835 | 0 | } |
836 | | |
837 | 9.34k | return m->xref_table; |
838 | 9.34k | } |
839 | | |
840 | | bool |
841 | | QPDF::pipeStreamData( |
842 | | std::shared_ptr<EncryptionParameters> encp, |
843 | | std::shared_ptr<InputSource> file, |
844 | | QPDF& qpdf_for_warning, |
845 | | QPDFObjGen og, |
846 | | qpdf_offset_t offset, |
847 | | size_t length, |
848 | | QPDFObjectHandle stream_dict, |
849 | | bool is_root_metadata, |
850 | | Pipeline* pipeline, |
851 | | bool suppress_warnings, |
852 | | bool will_retry) |
853 | 39.5k | { |
854 | 39.5k | std::unique_ptr<Pipeline> to_delete; |
855 | 39.5k | if (encp->encrypted) { |
856 | 5.50k | decryptStream( |
857 | 5.50k | encp, file, qpdf_for_warning, pipeline, og, stream_dict, is_root_metadata, to_delete); |
858 | 5.50k | } |
859 | | |
860 | 39.5k | bool attempted_finish = false; |
861 | 39.5k | try { |
862 | 39.5k | auto buf = file->read(length, offset); |
863 | 39.5k | if (buf.size() != length) { |
864 | 0 | throw damagedPDF( |
865 | 0 | *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data"); |
866 | 0 | } |
867 | 39.5k | pipeline->write(buf.data(), length); |
868 | 39.5k | attempted_finish = true; |
869 | 39.5k | pipeline->finish(); |
870 | 39.5k | return true; |
871 | 39.5k | } catch (QPDFExc& e) { |
872 | 1 | if (!suppress_warnings) { |
873 | 1 | qpdf_for_warning.warn(e); |
874 | 1 | } |
875 | 10.9k | } catch (std::exception& e) { |
876 | 10.9k | if (!suppress_warnings) { |
877 | 10.9k | QTC::TC("qpdf", "QPDF decoding error warning"); |
878 | 10.9k | qpdf_for_warning.warn( |
879 | | // line-break |
880 | 10.9k | damagedPDF( |
881 | 10.9k | *file, |
882 | 10.9k | "", |
883 | 10.9k | file->getLastOffset(), |
884 | 10.9k | ("error decoding stream data for object " + og.unparse(' ') + ": " + |
885 | 10.9k | e.what()))); |
886 | 10.9k | if (will_retry) { |
887 | 9.28k | qpdf_for_warning.warn( |
888 | | // line-break |
889 | 9.28k | damagedPDF( |
890 | 9.28k | *file, |
891 | 9.28k | "", |
892 | 9.28k | file->getLastOffset(), |
893 | 9.28k | "stream will be re-processed without filtering to avoid data loss")); |
894 | 9.28k | } |
895 | 10.9k | } |
896 | 10.9k | } |
897 | 10.8k | if (!attempted_finish) { |
898 | 5.28k | try { |
899 | 5.28k | pipeline->finish(); |
900 | 5.28k | } catch (std::exception&) { |
901 | | // ignore |
902 | 3.43k | } |
903 | 5.28k | } |
904 | 10.8k | return false; |
905 | 10.8k | } |
906 | | |
907 | | bool |
908 | | QPDF::pipeStreamData( |
909 | | QPDFObjGen og, |
910 | | qpdf_offset_t offset, |
911 | | size_t length, |
912 | | QPDFObjectHandle stream_dict, |
913 | | bool is_root_metadata, |
914 | | Pipeline* pipeline, |
915 | | bool suppress_warnings, |
916 | | bool will_retry) |
917 | 39.5k | { |
918 | 39.5k | return pipeStreamData( |
919 | 39.5k | m->encp, |
920 | 39.5k | m->file, |
921 | 39.5k | *this, |
922 | 39.5k | og, |
923 | 39.5k | offset, |
924 | 39.5k | length, |
925 | 39.5k | stream_dict, |
926 | 39.5k | is_root_metadata, |
927 | 39.5k | pipeline, |
928 | 39.5k | suppress_warnings, |
929 | 39.5k | will_retry); |
930 | 39.5k | } |
931 | | |
932 | | bool |
933 | | QPDF::pipeForeignStreamData( |
934 | | std::shared_ptr<ForeignStreamData> foreign, |
935 | | Pipeline* pipeline, |
936 | | bool suppress_warnings, |
937 | | bool will_retry) |
938 | 0 | { |
939 | 0 | if (foreign->encp->encrypted) { |
940 | 0 | QTC::TC("qpdf", "QPDF pipe foreign encrypted stream"); |
941 | 0 | } |
942 | 0 | return pipeStreamData( |
943 | 0 | foreign->encp, |
944 | 0 | foreign->file, |
945 | 0 | *this, |
946 | 0 | foreign->foreign_og, |
947 | 0 | foreign->offset, |
948 | 0 | foreign->length, |
949 | 0 | foreign->local_dict, |
950 | 0 | foreign->is_root_metadata, |
951 | 0 | pipeline, |
952 | 0 | suppress_warnings, |
953 | 0 | will_retry); |
954 | 0 | } |
955 | | |
956 | | // Throw a generic exception when we lack context for something more specific. New code should not |
957 | | // use this. This method exists to improve somewhat from calling assert in very old code. |
958 | | void |
959 | | QPDF::stopOnError(std::string const& message) |
960 | 18.8k | { |
961 | 18.8k | throw damagedPDF("", message); |
962 | 18.8k | } |
963 | | |
964 | | // Return an exception of type qpdf_e_damaged_pdf. |
965 | | QPDFExc |
966 | | QPDF::damagedPDF( |
967 | | InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message) |
968 | 77.0k | { |
969 | 77.0k | return {qpdf_e_damaged_pdf, input.getName(), object, offset, message, true}; |
970 | 77.0k | } |
971 | | |
972 | | // Return an exception of type qpdf_e_damaged_pdf. The object is taken from |
973 | | // m->last_object_description. |
974 | | QPDFExc |
975 | | QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message) |
976 | 56.8k | { |
977 | 56.8k | return damagedPDF(input, m->last_object_description, offset, message); |
978 | 56.8k | } |
979 | | |
980 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file. |
981 | | QPDFExc |
982 | | QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message) |
983 | 283k | { |
984 | 283k | return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message, true}; |
985 | 283k | } |
986 | | |
987 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the |
988 | | // offset from .m->file->getLastOffset(). |
989 | | QPDFExc |
990 | | QPDF::damagedPDF(std::string const& object, std::string const& message) |
991 | 60.9k | { |
992 | 60.9k | return damagedPDF(object, m->file->getLastOffset(), message); |
993 | 60.9k | } |
994 | | |
995 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object |
996 | | // from .m->last_object_description. |
997 | | QPDFExc |
998 | | QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message) |
999 | 46.5k | { |
1000 | 46.5k | return damagedPDF(m->last_object_description, offset, message); |
1001 | 46.5k | } |
1002 | | |
1003 | | // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object |
1004 | | // from m->last_object_description and the offset from m->file->getLastOffset(). |
1005 | | QPDFExc |
1006 | | QPDF::damagedPDF(std::string const& message) |
1007 | 47.6k | { |
1008 | 47.6k | return damagedPDF(m->last_object_description, m->file->getLastOffset(), message); |
1009 | 47.6k | } |
1010 | | |
1011 | | bool |
1012 | | QPDF::everCalledGetAllPages() const |
1013 | 0 | { |
1014 | 0 | return m->ever_called_get_all_pages; |
1015 | 0 | } |
1016 | | |
1017 | | bool |
1018 | | QPDF::everPushedInheritedAttributesToPages() const |
1019 | 0 | { |
1020 | 0 | return m->ever_pushed_inherited_attributes_to_pages; |
1021 | 0 | } |
1022 | | |
1023 | | void |
1024 | | QPDF::removeSecurityRestrictions() |
1025 | 0 | { |
1026 | 0 | auto root = getRoot(); |
1027 | 0 | root.removeKey("/Perms"); |
1028 | 0 | auto acroform = root.getKey("/AcroForm"); |
1029 | 0 | if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) { |
1030 | 0 | acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0)); |
1031 | 0 | } |
1032 | 0 | } |