/src/qpdf/libqpdf/QPDF_Stream.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/QPDFObjectHandle_private.hh> |
2 | | |
3 | | #include <qpdf/ContentNormalizer.hh> |
4 | | #include <qpdf/JSON_writer.hh> |
5 | | #include <qpdf/Pipeline.hh> |
6 | | #include <qpdf/Pl_Base64.hh> |
7 | | #include <qpdf/Pl_Buffer.hh> |
8 | | #include <qpdf/Pl_Count.hh> |
9 | | #include <qpdf/Pl_Discard.hh> |
10 | | #include <qpdf/Pl_Flate.hh> |
11 | | #include <qpdf/Pl_QPDFTokenizer.hh> |
12 | | #include <qpdf/QIntC.hh> |
13 | | #include <qpdf/QPDFExc.hh> |
14 | | #include <qpdf/QPDF_private.hh> |
15 | | #include <qpdf/QTC.hh> |
16 | | #include <qpdf/QUtil.hh> |
17 | | #include <qpdf/SF_ASCII85Decode.hh> |
18 | | #include <qpdf/SF_ASCIIHexDecode.hh> |
19 | | #include <qpdf/SF_DCTDecode.hh> |
20 | | #include <qpdf/SF_FlateLzwDecode.hh> |
21 | | #include <qpdf/SF_RunLengthDecode.hh> |
22 | | |
23 | | #include <stdexcept> |
24 | | |
25 | | using namespace std::literals; |
26 | | using namespace qpdf; |
27 | | |
28 | | namespace |
29 | | { |
30 | | class SF_Crypt final: public QPDFStreamFilter |
31 | | { |
32 | | public: |
33 | 246 | SF_Crypt() = default; |
34 | | ~SF_Crypt() final = default; |
35 | | |
36 | | bool |
37 | | setDecodeParms(QPDFObjectHandle decode_parms) final |
38 | 179 | { |
39 | | // we only validate here - processing happens in decryptStream |
40 | 179 | if (auto dict = decode_parms.as_dictionary(optional)) { |
41 | 508 | for (auto const& [key, value]: dict) { |
42 | 508 | if (key == "/Type" && |
43 | 508 | (value.null() || |
44 | 16 | (value.isName() && value.getName() == "/CryptFilterDecodeParms"))) { |
45 | 6 | continue; |
46 | 6 | } |
47 | 502 | if (key == "/Name") { |
48 | 3 | continue; |
49 | 3 | } |
50 | 499 | if (!value.null()) { |
51 | 25 | return false; |
52 | 25 | } |
53 | 499 | } |
54 | 151 | return true; |
55 | 176 | } |
56 | 3 | return false; |
57 | 179 | } |
58 | | |
59 | | Pipeline* |
60 | | getDecodePipeline(Pipeline*) final |
61 | 151 | { |
62 | | // Not used -- handled by pipeStreamData |
63 | 151 | return nullptr; |
64 | 151 | } |
65 | | }; |
66 | | |
67 | | class StreamBlobProvider |
68 | | { |
69 | | public: |
70 | | StreamBlobProvider(Stream stream, qpdf_stream_decode_level_e decode_level) : |
71 | 0 | stream(stream), |
72 | 0 | decode_level(decode_level) |
73 | 0 | { |
74 | 0 | } |
75 | | void |
76 | | operator()(Pipeline* p) |
77 | 0 | { |
78 | 0 | stream.pipeStreamData(p, nullptr, 0, decode_level, false, false); |
79 | 0 | } |
80 | | |
81 | | private: |
82 | | Stream stream; |
83 | | qpdf_stream_decode_level_e decode_level; |
84 | | }; |
85 | | |
86 | | /// User defined streamfilter factories |
87 | | std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>> filter_factories; |
88 | | bool filter_factories_registered = false; |
89 | | } // namespace |
90 | | |
91 | | std::string |
92 | | QPDF_Stream::Members::expand_filter_name(std::string const& name) const |
93 | 0 | { |
94 | | // The PDF specification provides these filter abbreviations for use in inline images, but |
95 | | // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also |
96 | | // accepts them for stream filters. |
97 | 0 | if (name == "/AHx") { |
98 | 0 | return "/ASCIIHexDecode"; |
99 | 0 | } |
100 | 0 | if (name == "/A85") { |
101 | 0 | return "/ASCII85Decode"; |
102 | 0 | } |
103 | 0 | if (name == "/LZW") { |
104 | 0 | return "/LZWDecode"; |
105 | 0 | } |
106 | 0 | if (name == "/Fl") { |
107 | 0 | return "/FlateDecode"; |
108 | 0 | } |
109 | 0 | if (name == "/RL") { |
110 | 0 | return "/RunLengthDecode"; |
111 | 0 | } |
112 | 0 | if (name == "/CCF") { |
113 | 0 | return "/CCITTFaxDecode"; |
114 | 0 | } |
115 | 0 | if (name == "/DCT") { |
116 | 0 | return "/DCTDecode"; |
117 | 0 | } |
118 | 0 | return name; |
119 | 0 | }; |
120 | | |
121 | | std::function<std::shared_ptr<QPDFStreamFilter>()> |
122 | | QPDF_Stream::Members::filter_factory(std::string const& name) const |
123 | 17.2k | { |
124 | 17.2k | if (filter_factories_registered) [[unlikely]] { |
125 | | // We need to check user provided filters first as we allow users to replace qpdf provided |
126 | | // default filters. This will have a performance impact if the facility to register stream |
127 | | // filters is actually used. We can optimize this away if necessary. |
128 | 0 | auto ff = filter_factories.find(expand_filter_name(name)); |
129 | 0 | if (ff != filter_factories.end()) { |
130 | 0 | return ff->second; |
131 | 0 | } |
132 | 0 | } |
133 | 17.2k | if (name == "/FlateDecode") { |
134 | 3.09k | return SF_FlateLzwDecode::flate_factory; |
135 | 3.09k | } |
136 | 14.1k | if (name == "/Crypt") { |
137 | 246 | return []() { return std::make_shared<SF_Crypt>(); }; |
138 | 246 | } |
139 | 13.9k | if (name == "/LZWDecode") { |
140 | 431 | return SF_FlateLzwDecode::lzw_factory; |
141 | 431 | } |
142 | 13.5k | if (name == "/RunLengthDecode") { |
143 | 64 | return SF_RunLengthDecode::factory; |
144 | 64 | } |
145 | 13.4k | if (name == "/DCTDecode") { |
146 | 10 | return SF_DCTDecode::factory; |
147 | 10 | } |
148 | 13.4k | if (name == "/ASCII85Decode") { |
149 | 582 | return SF_ASCII85Decode::factory; |
150 | 582 | } |
151 | 12.8k | if (name == "/ASCIIHexDecode") { |
152 | 32 | return SF_ASCIIHexDecode::factory; |
153 | 32 | } |
154 | | // The PDF specification provides these filter abbreviations for use in inline images, but |
155 | | // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader |
156 | | // also accepts them for stream filters. |
157 | | |
158 | 12.8k | if (name == "/Fl") { |
159 | 1.99k | return SF_FlateLzwDecode::flate_factory; |
160 | 1.99k | } |
161 | 10.8k | if (name == "/AHx") { |
162 | 446 | return SF_ASCIIHexDecode::factory; |
163 | 446 | } |
164 | 10.3k | if (name == "/A85") { |
165 | 24 | return SF_ASCII85Decode::factory; |
166 | 24 | } |
167 | 10.3k | if (name == "/LZW") { |
168 | 8.57k | return SF_FlateLzwDecode::lzw_factory; |
169 | 8.57k | } |
170 | 1.79k | if (name == "/RL") { |
171 | 1.45k | return SF_RunLengthDecode::factory; |
172 | 1.45k | } |
173 | 335 | if (name == "/DCT") { |
174 | 24 | return SF_DCTDecode::factory; |
175 | 24 | } |
176 | 311 | return nullptr; |
177 | 335 | } |
178 | | |
179 | | Stream::Stream( |
180 | | QPDF& qpdf, QPDFObjGen og, QPDFObjectHandle stream_dict, qpdf_offset_t offset, size_t length) : |
181 | 20.1k | BaseHandle(QPDFObject::create<QPDF_Stream>(&qpdf, og, std::move(stream_dict), length)) |
182 | 20.1k | { |
183 | 20.1k | auto descr = std::make_shared<QPDFObject::Description>( |
184 | 20.1k | qpdf.getFilename() + ", stream object " + og.unparse(' ')); |
185 | 20.1k | obj->setDescription(&qpdf, descr, offset); |
186 | 20.1k | setDictDescription(); |
187 | 20.1k | } |
188 | | |
189 | | void |
190 | | Stream::registerStreamFilter( |
191 | | std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory) |
192 | 0 | { |
193 | 0 | filter_factories[filter_name] = factory; |
194 | 0 | filter_factories_registered = true; |
195 | 0 | } |
196 | | |
197 | | JSON |
198 | | Stream::getStreamJSON( |
199 | | int json_version, |
200 | | qpdf_json_stream_data_e json_data, |
201 | | qpdf_stream_decode_level_e decode_level, |
202 | | Pipeline* p, |
203 | | std::string const& data_filename) |
204 | 0 | { |
205 | 0 | Pl_Buffer pb{"streamjson"}; |
206 | 0 | JSON::Writer jw{&pb, 0}; |
207 | 0 | decode_level = |
208 | 0 | writeStreamJSON(json_version, jw, json_data, decode_level, p, data_filename, true); |
209 | 0 | pb.finish(); |
210 | 0 | auto result = JSON::parse(pb.getString()); |
211 | 0 | if (json_data == qpdf_sj_inline) { |
212 | 0 | result.addDictionaryMember("data", JSON::makeBlob(StreamBlobProvider(*this, decode_level))); |
213 | 0 | } |
214 | 0 | return result; |
215 | 0 | } |
216 | | |
217 | | qpdf_stream_decode_level_e |
218 | | Stream::writeStreamJSON( |
219 | | int json_version, |
220 | | JSON::Writer& jw, |
221 | | qpdf_json_stream_data_e json_data, |
222 | | qpdf_stream_decode_level_e decode_level, |
223 | | Pipeline* p, |
224 | | std::string const& data_filename, |
225 | | bool no_data_key) |
226 | 0 | { |
227 | 0 | auto s = stream(); |
228 | 0 | switch (json_data) { |
229 | 0 | case qpdf_sj_none: |
230 | 0 | case qpdf_sj_inline: |
231 | 0 | if (p != nullptr) { |
232 | 0 | throw std::logic_error( |
233 | 0 | "QPDF_Stream::writeStreamJSON: pipeline should only be supplied " |
234 | 0 | "when json_data is file"); |
235 | 0 | } |
236 | 0 | break; |
237 | 0 | case qpdf_sj_file: |
238 | 0 | if (p == nullptr) { |
239 | 0 | throw std::logic_error( |
240 | 0 | "QPDF_Stream::writeStreamJSON: pipeline must be supplied when json_data is file"); |
241 | 0 | } |
242 | 0 | if (data_filename.empty()) { |
243 | 0 | throw std::logic_error( |
244 | 0 | "QPDF_Stream::writeStreamJSON: data_filename must be supplied " |
245 | 0 | "when json_data is file"); |
246 | 0 | } |
247 | 0 | break; |
248 | 0 | } |
249 | | |
250 | 0 | jw.writeStart('{'); |
251 | |
|
252 | 0 | if (json_data == qpdf_sj_none) { |
253 | 0 | jw.writeNext(); |
254 | 0 | jw << R"("dict": )"; |
255 | 0 | s->stream_dict.writeJSON(json_version, jw); |
256 | 0 | jw.writeEnd('}'); |
257 | 0 | return decode_level; |
258 | 0 | } |
259 | | |
260 | 0 | Pl_Discard discard; |
261 | 0 | Pl_Buffer buf_pl{"stream data"}; |
262 | 0 | Pipeline* data_pipeline = &buf_pl; |
263 | 0 | if (no_data_key && json_data == qpdf_sj_inline) { |
264 | 0 | data_pipeline = &discard; |
265 | 0 | } |
266 | | // pipeStreamData produced valid data. |
267 | 0 | bool buf_pl_ready = false; |
268 | 0 | bool filtered = false; |
269 | 0 | bool filter = (decode_level != qpdf_dl_none); |
270 | 0 | for (int attempt = 1; attempt <= 2; ++attempt) { |
271 | 0 | bool succeeded = |
272 | 0 | pipeStreamData(data_pipeline, &filtered, 0, decode_level, false, (attempt == 1)); |
273 | 0 | if (!succeeded || (filter && !filtered)) { |
274 | | // Try again |
275 | 0 | filter = false; |
276 | 0 | decode_level = qpdf_dl_none; |
277 | 0 | buf_pl.getString(); // reset buf_pl |
278 | 0 | } else { |
279 | 0 | buf_pl_ready = true; |
280 | 0 | break; |
281 | 0 | } |
282 | 0 | } |
283 | 0 | if (!buf_pl_ready) { |
284 | 0 | throw std::logic_error("QPDF_Stream: failed to get stream data"); |
285 | 0 | } |
286 | | // We can use unsafeShallowCopy because we are only touching top-level keys. |
287 | 0 | auto dict = s->stream_dict.unsafeShallowCopy(); |
288 | 0 | dict.removeKey("/Length"); |
289 | 0 | if (filter && filtered) { |
290 | 0 | dict.removeKey("/Filter"); |
291 | 0 | dict.removeKey("/DecodeParms"); |
292 | 0 | } |
293 | 0 | if (json_data == qpdf_sj_file) { |
294 | 0 | jw.writeNext() << R"("datafile": ")" << JSON::Writer::encode_string(data_filename) << "\""; |
295 | 0 | p->writeString(buf_pl.getString()); |
296 | 0 | } else if (json_data == qpdf_sj_inline) { |
297 | 0 | if (!no_data_key) { |
298 | 0 | jw.writeNext() << R"("data": ")"; |
299 | 0 | jw.writeBase64(buf_pl.getString()) << "\""; |
300 | 0 | } |
301 | 0 | } else { |
302 | 0 | throw std::logic_error("QPDF_Stream::writeStreamJSON : unexpected value of json_data"); |
303 | 0 | } |
304 | | |
305 | 0 | jw.writeNext() << R"("dict": )"; |
306 | 0 | dict.writeJSON(json_version, jw); |
307 | 0 | jw.writeEnd('}'); |
308 | |
|
309 | 0 | return decode_level; |
310 | 0 | } |
311 | | |
312 | | void |
313 | | qpdf::Stream::setDictDescription() |
314 | 20.1k | { |
315 | 20.1k | auto s = stream(); |
316 | 20.1k | if (!s->stream_dict.hasObjectDescription()) { |
317 | 0 | s->stream_dict.setObjectDescription( |
318 | 0 | obj->getQPDF(), obj->getDescription() + " -> stream dictionary"); |
319 | 0 | } |
320 | 20.1k | } |
321 | | |
322 | | std::shared_ptr<Buffer> |
323 | | Stream::getStreamData(qpdf_stream_decode_level_e decode_level) |
324 | 9.64k | { |
325 | 9.64k | Pl_Buffer buf("stream data buffer"); |
326 | 9.64k | bool filtered; |
327 | 9.64k | pipeStreamData(&buf, &filtered, 0, decode_level, false, false); |
328 | 9.64k | if (!filtered) { |
329 | 2.24k | throw QPDFExc( |
330 | 2.24k | qpdf_e_unsupported, |
331 | 2.24k | obj->getQPDF()->getFilename(), |
332 | 2.24k | "", |
333 | 2.24k | obj->getParsedOffset(), |
334 | 2.24k | "getStreamData called on unfilterable stream"); |
335 | 2.24k | } |
336 | 7.40k | QTC::TC("qpdf", "QPDF_Stream getStreamData"); |
337 | 7.40k | return buf.getBufferSharedPointer(); |
338 | 9.64k | } |
339 | | |
340 | | std::shared_ptr<Buffer> |
341 | | Stream::getRawStreamData() |
342 | 0 | { |
343 | 0 | Pl_Buffer buf("stream data buffer"); |
344 | 0 | if (!pipeStreamData(&buf, nullptr, 0, qpdf_dl_none, false, false)) { |
345 | 0 | throw QPDFExc( |
346 | 0 | qpdf_e_unsupported, |
347 | 0 | obj->getQPDF()->getFilename(), |
348 | 0 | "", |
349 | 0 | obj->getParsedOffset(), |
350 | 0 | "error getting raw stream data"); |
351 | 0 | } |
352 | 0 | QTC::TC("qpdf", "QPDF_Stream getRawStreamData"); |
353 | 0 | return buf.getBufferSharedPointer(); |
354 | 0 | } |
355 | | |
356 | | bool |
357 | | Stream::isRootMetadata() const |
358 | 9.60k | { |
359 | 9.60k | if (!getDict().isDictionaryOfType("/Metadata", "/XML")) { |
360 | 9.57k | return false; |
361 | 9.57k | } |
362 | 26 | auto root_metadata = qpdf()->getRoot().getKey("/Metadata"); |
363 | 26 | return root_metadata.isSameObjectAs(obj); |
364 | 9.60k | } |
365 | | |
366 | | bool |
367 | | Stream::filterable( |
368 | | qpdf_stream_decode_level_e decode_level, |
369 | | std::vector<std::shared_ptr<QPDFStreamFilter>>& filters) |
370 | 9.64k | { |
371 | 9.64k | auto s = stream(); |
372 | | // Check filters |
373 | | |
374 | 9.64k | auto filter_obj = s->stream_dict.getKey("/Filter"); |
375 | | |
376 | 9.64k | if (filter_obj.isNull()) { |
377 | | // No filters |
378 | 3.27k | return true; |
379 | 3.27k | } |
380 | 6.37k | if (filter_obj.isName()) { |
381 | | // One filter |
382 | 5.09k | auto ff = s->filter_factory(filter_obj.getName()); |
383 | 5.09k | if (!ff) { |
384 | 182 | return false; |
385 | 182 | } |
386 | 4.91k | filters.emplace_back(ff()); |
387 | 4.91k | } else if (auto array = filter_obj.as_array(strict)) { |
388 | | // Potentially multiple filters |
389 | 12.2k | for (auto const& item: array) { |
390 | 12.2k | if (!item.isName()) { |
391 | 20 | warn("stream filter type is not name or array"); |
392 | 20 | return false; |
393 | 20 | } |
394 | 12.1k | auto ff = s->filter_factory(item.getName()); |
395 | 12.1k | if (!ff) { |
396 | 129 | filters.clear(); |
397 | 129 | return false; |
398 | 129 | } |
399 | 12.0k | filters.emplace_back(ff()); |
400 | 12.0k | } |
401 | 1.27k | } else { |
402 | 5 | warn("stream filter type is not name or array"); |
403 | 5 | return false; |
404 | 5 | } |
405 | | |
406 | | // filters now contains a list of filters to be applied in order. See which ones we can support. |
407 | | // See if we can support any decode parameters that are specified. |
408 | | |
409 | 6.03k | auto decode_obj = s->stream_dict.getKey("/DecodeParms"); |
410 | | |
411 | 6.03k | auto can_filter = // linebreak |
412 | 14.2k | [](auto d_level, auto& filter, auto& d_obj) -> bool { |
413 | 14.2k | if (!filter.setDecodeParms(d_obj) || |
414 | 14.2k | (d_level < qpdf_dl_all && filter.isLossyCompression()) || |
415 | 14.2k | (d_level < qpdf_dl_specialized && filter.isSpecializedCompression())) { |
416 | 179 | return false; |
417 | 179 | } |
418 | 14.0k | return true; |
419 | 14.2k | }; |
420 | | |
421 | 6.03k | auto decode_array = decode_obj.as_array(strict); |
422 | 6.03k | if (!decode_array || decode_array.size() == 0) { |
423 | 6.01k | if (decode_array) { |
424 | 3 | decode_obj = QPDFObjectHandle::newNull(); |
425 | 3 | } |
426 | | |
427 | 14.2k | for (auto& filter: filters) { |
428 | 14.2k | if (!can_filter(decode_level, *filter, decode_obj)) { |
429 | 176 | return false; |
430 | 176 | } |
431 | 14.2k | } |
432 | 6.01k | } else { |
433 | | // Ignore /DecodeParms entirely if /Filters is empty. At least one case of a file whose |
434 | | // /DecodeParms was [ << >> ] when /Filters was empty has been seen in the wild. |
435 | 17 | if (!filters.empty() && QIntC::to_size(decode_array.size()) != filters.size()) { |
436 | 4 | warn("stream /DecodeParms length is inconsistent with filters"); |
437 | 4 | return false; |
438 | 4 | } |
439 | | |
440 | 13 | int i = -1; |
441 | 26 | for (auto& filter: filters) { |
442 | 26 | auto d_obj = decode_array.at(++i).second; |
443 | 26 | if (!can_filter(decode_level, *filter, d_obj)) { |
444 | 3 | return false; |
445 | 3 | } |
446 | 26 | } |
447 | 13 | } |
448 | | |
449 | 5.85k | return true; |
450 | 6.03k | } |
451 | | |
452 | | bool |
453 | | Stream::pipeStreamData( |
454 | | Pipeline* pipeline, |
455 | | bool* filterp, |
456 | | int encode_flags, |
457 | | qpdf_stream_decode_level_e decode_level, |
458 | | bool suppress_warnings, |
459 | | bool will_retry) |
460 | 9.64k | { |
461 | 9.64k | auto s = stream(); |
462 | 9.64k | std::vector<std::shared_ptr<QPDFStreamFilter>> filters; |
463 | 9.64k | bool ignored; |
464 | 9.64k | if (!filterp) { |
465 | 0 | filterp = &ignored; |
466 | 0 | } |
467 | 9.64k | bool& filter = *filterp; |
468 | | |
469 | 9.64k | const bool empty_stream = !s->stream_provider && !s->stream_data && s->length == 0; |
470 | 9.64k | const bool empty_stream_data = s->stream_data && s->stream_data->getSize() == 0; |
471 | 9.64k | const bool empty = empty_stream || empty_stream_data; |
472 | | |
473 | 9.64k | if (empty_stream || empty_stream_data) { |
474 | 491 | filter = true; |
475 | 491 | } |
476 | | |
477 | 9.64k | filter = empty || encode_flags || decode_level != qpdf_dl_none; |
478 | 9.64k | if (filter) { |
479 | 9.64k | filter = filterable(decode_level, filters); |
480 | 9.64k | } |
481 | | |
482 | 9.64k | if (!pipeline) { |
483 | 0 | QTC::TC("qpdf", "QPDF_Stream pipeStreamData with null pipeline"); |
484 | | // Return value is whether we can filter in this case. |
485 | 0 | return filter; |
486 | 0 | } |
487 | | |
488 | | // Construct the pipeline in reverse order. Force pipelines we create to be deleted when this |
489 | | // function finishes. Pipelines created by QPDFStreamFilter objects will be deleted by those |
490 | | // objects. |
491 | 9.64k | std::vector<std::unique_ptr<Pipeline>> to_delete; |
492 | | |
493 | 9.64k | ContentNormalizer normalizer; |
494 | 9.64k | if (filter) { |
495 | 9.12k | if (encode_flags & qpdf_ef_compress) { |
496 | 0 | auto new_pipeline = |
497 | 0 | std::make_unique<Pl_Flate>("compress stream", pipeline, Pl_Flate::a_deflate); |
498 | 0 | pipeline = new_pipeline.get(); |
499 | 0 | to_delete.push_back(std::move(new_pipeline)); |
500 | 0 | } |
501 | | |
502 | 9.12k | if (encode_flags & qpdf_ef_normalize) { |
503 | 0 | auto new_pipeline = |
504 | 0 | std::make_unique<Pl_QPDFTokenizer>("normalizer", &normalizer, pipeline); |
505 | 0 | pipeline = new_pipeline.get(); |
506 | 0 | to_delete.push_back(std::move(new_pipeline)); |
507 | 0 | } |
508 | | |
509 | 9.12k | for (auto iter = s->token_filters.rbegin(); iter != s->token_filters.rend(); ++iter) { |
510 | 0 | auto new_pipeline = |
511 | 0 | std::make_unique<Pl_QPDFTokenizer>("token filter", (*iter).get(), pipeline); |
512 | 0 | pipeline = new_pipeline.get(); |
513 | 0 | to_delete.push_back(std::move(new_pipeline)); |
514 | 0 | } |
515 | | |
516 | 23.1k | for (auto f_iter = filters.rbegin(); f_iter != filters.rend(); ++f_iter) { |
517 | 14.0k | if (auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline)) { |
518 | 13.8k | pipeline = decode_pipeline; |
519 | 13.8k | } |
520 | 14.0k | auto* flate = dynamic_cast<Pl_Flate*>(pipeline); |
521 | 14.0k | if (flate) { |
522 | 4.99k | flate->setWarnCallback([this](char const* msg, int code) { warn(msg); }); |
523 | 4.99k | } |
524 | 14.0k | } |
525 | 9.12k | } |
526 | | |
527 | 9.64k | if (s->stream_data.get()) { |
528 | 0 | QTC::TC("qpdf", "QPDF_Stream pipe replaced stream data"); |
529 | 0 | pipeline->write(s->stream_data->getBuffer(), s->stream_data->getSize()); |
530 | 0 | pipeline->finish(); |
531 | 9.64k | } else if (s->stream_provider.get()) { |
532 | 0 | Pl_Count count("stream provider count", pipeline); |
533 | 0 | if (s->stream_provider->supportsRetry()) { |
534 | 0 | if (!s->stream_provider->provideStreamData( |
535 | 0 | obj->getObjGen(), &count, suppress_warnings, will_retry)) { |
536 | 0 | filter = false; |
537 | 0 | return false; |
538 | 0 | } |
539 | 0 | } else { |
540 | 0 | s->stream_provider->provideStreamData(obj->getObjGen(), &count); |
541 | 0 | } |
542 | 0 | qpdf_offset_t actual_length = count.getCount(); |
543 | 0 | if (s->stream_dict.hasKey("/Length")) { |
544 | 0 | auto desired_length = s->stream_dict.getKey("/Length").getIntValue(); |
545 | 0 | if (actual_length != desired_length) { |
546 | 0 | QTC::TC("qpdf", "QPDF_Stream provider length mismatch"); |
547 | | // This would be caused by programmer error on the part of a library user, not by |
548 | | // invalid input data. |
549 | 0 | throw std::runtime_error( |
550 | 0 | "stream data provider for " + obj->getObjGen().unparse(' ') + " provided " + |
551 | 0 | std::to_string(actual_length) + " bytes instead of expected " + |
552 | 0 | std::to_string(desired_length) + " bytes"); |
553 | 0 | } |
554 | 0 | } else { |
555 | 0 | QTC::TC("qpdf", "QPDF_Stream provider length not provided"); |
556 | 0 | s->stream_dict.replaceKey("/Length", QPDFObjectHandle::newInteger(actual_length)); |
557 | 0 | } |
558 | 9.64k | } else { |
559 | 9.64k | if (obj->getParsedOffset() == 0) { |
560 | 0 | QTC::TC("qpdf", "QPDF_Stream pipe no stream data"); |
561 | 0 | throw std::logic_error("pipeStreamData called for stream with no data"); |
562 | 0 | } |
563 | 9.64k | QTC::TC("qpdf", "QPDF_Stream pipe original stream data"); |
564 | 9.64k | if (!QPDF::Pipe::pipeStreamData( |
565 | 9.64k | obj->getQPDF(), |
566 | 9.64k | obj->getObjGen(), |
567 | 9.64k | obj->getParsedOffset(), |
568 | 9.64k | s->length, |
569 | 9.64k | s->stream_dict, |
570 | 9.64k | isRootMetadata(), |
571 | 9.64k | pipeline, |
572 | 9.64k | suppress_warnings, |
573 | 9.64k | will_retry)) { |
574 | 1.73k | filter = false; |
575 | 1.73k | return false; |
576 | 1.73k | } |
577 | 9.64k | } |
578 | | |
579 | 7.91k | if (filter && !suppress_warnings && normalizer.anyBadTokens()) { |
580 | 0 | warn("content normalization encountered bad tokens"); |
581 | 0 | if (normalizer.lastTokenWasBad()) { |
582 | 0 | QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize"); |
583 | 0 | warn( |
584 | 0 | "normalized content ended with a bad token; you may be able to resolve this by " |
585 | 0 | "coalescing content streams in combination with normalizing content. From the " |
586 | 0 | "command line, specify --coalesce-contents"); |
587 | 0 | } |
588 | 0 | warn( |
589 | 0 | "Resulting stream data may be corrupted but is may still useful for manual " |
590 | 0 | "inspection. For more information on this warning, search for content normalization " |
591 | 0 | "in the manual."); |
592 | 0 | } |
593 | | |
594 | 7.91k | return true; |
595 | 9.64k | } |
596 | | |
597 | | void |
598 | | Stream::replaceStreamData( |
599 | | std::shared_ptr<Buffer> data, |
600 | | QPDFObjectHandle const& filter, |
601 | | QPDFObjectHandle const& decode_parms) |
602 | 0 | { |
603 | 0 | auto s = stream(); |
604 | 0 | s->stream_data = data; |
605 | 0 | s->stream_provider = nullptr; |
606 | 0 | replaceFilterData(filter, decode_parms, data->getSize()); |
607 | 0 | } |
608 | | |
609 | | void |
610 | | Stream::replaceStreamData( |
611 | | std::shared_ptr<QPDFObjectHandle::StreamDataProvider> provider, |
612 | | QPDFObjectHandle const& filter, |
613 | | QPDFObjectHandle const& decode_parms) |
614 | 0 | { |
615 | 0 | auto s = stream(); |
616 | 0 | s->stream_provider = provider; |
617 | 0 | s->stream_data = nullptr; |
618 | 0 | replaceFilterData(filter, decode_parms, 0); |
619 | 0 | } |
620 | | |
621 | | void |
622 | | Stream::replaceFilterData( |
623 | | QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms, size_t length) |
624 | 0 | { |
625 | 0 | auto s = stream(); |
626 | 0 | if (filter) { |
627 | 0 | s->stream_dict.replaceKey("/Filter", filter); |
628 | 0 | } |
629 | 0 | if (decode_parms) { |
630 | 0 | s->stream_dict.replaceKey("/DecodeParms", decode_parms); |
631 | 0 | } |
632 | 0 | if (length == 0) { |
633 | 0 | QTC::TC("qpdf", "QPDF_Stream unknown stream length"); |
634 | 0 | s->stream_dict.removeKey("/Length"); |
635 | 0 | } else { |
636 | 0 | s->stream_dict.replaceKey( |
637 | 0 | "/Length", QPDFObjectHandle::newInteger(QIntC::to_longlong(length))); |
638 | 0 | } |
639 | 0 | } |
640 | | |
641 | | void |
642 | | Stream::warn(std::string const& message) |
643 | 2.12k | { |
644 | 2.12k | obj->getQPDF()->warn(qpdf_e_damaged_pdf, "", obj->getParsedOffset(), message); |
645 | 2.12k | } |
646 | | |
647 | | QPDFObjectHandle |
648 | | QPDFObjectHandle::getDict() const |
649 | 41.8k | { |
650 | 41.8k | return as_stream(error).getDict(); |
651 | 41.8k | } |
652 | | |
653 | | void |
654 | | QPDFObjectHandle::setFilterOnWrite(bool val) |
655 | 0 | { |
656 | 0 | as_stream(error).setFilterOnWrite(val); |
657 | 0 | } |
658 | | |
659 | | bool |
660 | | QPDFObjectHandle::getFilterOnWrite() |
661 | 0 | { |
662 | 0 | return as_stream(error).getFilterOnWrite(); |
663 | 0 | } |
664 | | |
665 | | bool |
666 | | QPDFObjectHandle::isDataModified() |
667 | 0 | { |
668 | 0 | return as_stream(error).isDataModified(); |
669 | 0 | } |
670 | | |
671 | | void |
672 | | QPDFObjectHandle::replaceDict(QPDFObjectHandle const& new_dict) |
673 | 0 | { |
674 | 0 | as_stream(error).replaceDict(new_dict); |
675 | 0 | } |
676 | | |
677 | | bool |
678 | | QPDFObjectHandle::isRootMetadata() const |
679 | 0 | { |
680 | 0 | return as_stream(error).isRootMetadata(); |
681 | 0 | } |
682 | | |
683 | | std::shared_ptr<Buffer> |
684 | | QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level) |
685 | 7.60k | { |
686 | 7.60k | return as_stream(error).getStreamData(level); |
687 | 7.60k | } |
688 | | |
689 | | std::shared_ptr<Buffer> |
690 | | QPDFObjectHandle::getRawStreamData() |
691 | 0 | { |
692 | 0 | return as_stream(error).getRawStreamData(); |
693 | 0 | } |
694 | | |
695 | | bool |
696 | | QPDFObjectHandle::pipeStreamData( |
697 | | Pipeline* p, |
698 | | bool* filtering_attempted, |
699 | | int encode_flags, |
700 | | qpdf_stream_decode_level_e decode_level, |
701 | | bool suppress_warnings, |
702 | | bool will_retry) |
703 | 0 | { |
704 | 0 | return as_stream(error).pipeStreamData( |
705 | 0 | p, filtering_attempted, encode_flags, decode_level, suppress_warnings, will_retry); |
706 | 0 | } |
707 | | |
708 | | bool |
709 | | QPDFObjectHandle::pipeStreamData( |
710 | | Pipeline* p, |
711 | | int encode_flags, |
712 | | qpdf_stream_decode_level_e decode_level, |
713 | | bool suppress_warnings, |
714 | | bool will_retry) |
715 | 0 | { |
716 | 0 | bool filtering_attempted; |
717 | 0 | as_stream(error).pipeStreamData( |
718 | 0 | p, &filtering_attempted, encode_flags, decode_level, suppress_warnings, will_retry); |
719 | 0 | return filtering_attempted; |
720 | 0 | } |
721 | | |
722 | | bool |
723 | | QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter, bool normalize, bool compress) |
724 | 0 | { |
725 | 0 | int encode_flags = 0; |
726 | 0 | qpdf_stream_decode_level_e decode_level = qpdf_dl_none; |
727 | 0 | if (filter) { |
728 | 0 | decode_level = qpdf_dl_generalized; |
729 | 0 | if (normalize) { |
730 | 0 | encode_flags |= qpdf_ef_normalize; |
731 | 0 | } |
732 | 0 | if (compress) { |
733 | 0 | encode_flags |= qpdf_ef_compress; |
734 | 0 | } |
735 | 0 | } |
736 | 0 | return pipeStreamData(p, encode_flags, decode_level, false); |
737 | 0 | } |
738 | | |
739 | | void |
740 | | QPDFObjectHandle::replaceStreamData( |
741 | | std::shared_ptr<Buffer> data, |
742 | | QPDFObjectHandle const& filter, |
743 | | QPDFObjectHandle const& decode_parms) |
744 | 0 | { |
745 | 0 | as_stream(error).replaceStreamData(data, filter, decode_parms); |
746 | 0 | } |
747 | | |
748 | | void |
749 | | QPDFObjectHandle::replaceStreamData( |
750 | | std::string const& data, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms) |
751 | 0 | { |
752 | 0 | auto b = std::make_shared<Buffer>(data.length()); |
753 | 0 | unsigned char* bp = b->getBuffer(); |
754 | 0 | if (bp) { |
755 | 0 | memcpy(bp, data.c_str(), data.length()); |
756 | 0 | } |
757 | 0 | as_stream(error).replaceStreamData(b, filter, decode_parms); |
758 | 0 | } |
759 | | |
760 | | void |
761 | | QPDFObjectHandle::replaceStreamData( |
762 | | std::shared_ptr<StreamDataProvider> provider, |
763 | | QPDFObjectHandle const& filter, |
764 | | QPDFObjectHandle const& decode_parms) |
765 | 0 | { |
766 | 0 | as_stream(error).replaceStreamData(provider, filter, decode_parms); |
767 | 0 | } |
768 | | |
769 | | namespace |
770 | | { |
771 | | class FunctionProvider: public QPDFObjectHandle::StreamDataProvider |
772 | | { |
773 | | public: |
774 | | FunctionProvider(std::function<void(Pipeline*)> provider) : |
775 | 0 | StreamDataProvider(false), |
776 | 0 | p1(provider), |
777 | 0 | p2(nullptr) |
778 | 0 | { |
779 | 0 | } |
780 | | FunctionProvider(std::function<bool(Pipeline*, bool, bool)> provider) : |
781 | 0 | StreamDataProvider(true), |
782 | 0 | p1(nullptr), |
783 | 0 | p2(provider) |
784 | 0 | { |
785 | 0 | } |
786 | | |
787 | | void |
788 | | provideStreamData(QPDFObjGen const&, Pipeline* pipeline) override |
789 | 0 | { |
790 | 0 | p1(pipeline); |
791 | 0 | } |
792 | | |
793 | | bool |
794 | | provideStreamData( |
795 | | QPDFObjGen const&, Pipeline* pipeline, bool suppress_warnings, bool will_retry) override |
796 | 0 | { |
797 | 0 | return p2(pipeline, suppress_warnings, will_retry); |
798 | 0 | } |
799 | | |
800 | | private: |
801 | | std::function<void(Pipeline*)> p1; |
802 | | std::function<bool(Pipeline*, bool, bool)> p2; |
803 | | }; |
804 | | } // namespace |
805 | | |
806 | | void |
807 | | QPDFObjectHandle::replaceStreamData( |
808 | | std::function<void(Pipeline*)> provider, |
809 | | QPDFObjectHandle const& filter, |
810 | | QPDFObjectHandle const& decode_parms) |
811 | 0 | { |
812 | 0 | auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider)); |
813 | 0 | as_stream(error).replaceStreamData(sdp, filter, decode_parms); |
814 | 0 | } |
815 | | |
816 | | void |
817 | | QPDFObjectHandle::replaceStreamData( |
818 | | std::function<bool(Pipeline*, bool, bool)> provider, |
819 | | QPDFObjectHandle const& filter, |
820 | | QPDFObjectHandle const& decode_parms) |
821 | 0 | { |
822 | 0 | auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider)); |
823 | 0 | as_stream(error).replaceStreamData(sdp, filter, decode_parms); |
824 | 0 | } |
825 | | |
826 | | JSON |
827 | | QPDFObjectHandle::getStreamJSON( |
828 | | int json_version, |
829 | | qpdf_json_stream_data_e json_data, |
830 | | qpdf_stream_decode_level_e decode_level, |
831 | | Pipeline* p, |
832 | | std::string const& data_filename) |
833 | 0 | { |
834 | 0 | return as_stream(error).getStreamJSON(json_version, json_data, decode_level, p, data_filename); |
835 | 0 | } |