/src/qpdf/libqpdf/QPDF_Stream.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/QPDFObjectHandle_private.hh> |
2 | | |
3 | | #include <qpdf/ContentNormalizer.hh> |
4 | | #include <qpdf/JSON_writer.hh> |
5 | | #include <qpdf/Pipeline.hh> |
6 | | #include <qpdf/Pl_Base64.hh> |
7 | | #include <qpdf/Pl_Buffer.hh> |
8 | | #include <qpdf/Pl_Count.hh> |
9 | | #include <qpdf/Pl_Discard.hh> |
10 | | #include <qpdf/Pl_Flate.hh> |
11 | | #include <qpdf/Pl_QPDFTokenizer.hh> |
12 | | #include <qpdf/QIntC.hh> |
13 | | #include <qpdf/QPDFExc.hh> |
14 | | #include <qpdf/QPDF_private.hh> |
15 | | #include <qpdf/QTC.hh> |
16 | | #include <qpdf/QUtil.hh> |
17 | | #include <qpdf/SF_ASCII85Decode.hh> |
18 | | #include <qpdf/SF_ASCIIHexDecode.hh> |
19 | | #include <qpdf/SF_DCTDecode.hh> |
20 | | #include <qpdf/SF_FlateLzwDecode.hh> |
21 | | #include <qpdf/SF_RunLengthDecode.hh> |
22 | | |
23 | | #include <stdexcept> |
24 | | |
25 | | using namespace std::literals; |
26 | | using namespace qpdf; |
27 | | |
28 | | namespace |
29 | | { |
30 | | class SF_Crypt final: public QPDFStreamFilter |
31 | | { |
32 | | public: |
33 | 6.88k | SF_Crypt() = default; |
34 | | ~SF_Crypt() final = default; |
35 | | |
36 | | bool |
37 | | setDecodeParms(QPDFObjectHandle decode_parms) final |
38 | 4.11k | { |
39 | | // we only validate here - processing happens in decryptStream |
40 | 4.11k | if (auto dict = decode_parms.as_dictionary(optional)) { |
41 | 5.60k | for (auto const& [key, value]: dict) { |
42 | 5.60k | if (key == "/Type" && |
43 | 5.60k | (value.null() || |
44 | 368 | (value.isName() && value.getName() == "/CryptFilterDecodeParms"))) { |
45 | 126 | continue; |
46 | 126 | } |
47 | 5.47k | if (key == "/Name") { |
48 | 165 | continue; |
49 | 165 | } |
50 | 5.31k | if (!value.null()) { |
51 | 540 | return false; |
52 | 540 | } |
53 | 5.31k | } |
54 | 3.43k | return true; |
55 | 3.97k | } |
56 | 143 | return false; |
57 | 4.11k | } |
58 | | |
59 | | Pipeline* |
60 | | getDecodePipeline(Pipeline*) final |
61 | 3.16k | { |
62 | | // Not used -- handled by pipeStreamData |
63 | 3.16k | return nullptr; |
64 | 3.16k | } |
65 | | }; |
66 | | |
67 | | class StreamBlobProvider |
68 | | { |
69 | | public: |
70 | | StreamBlobProvider(Stream stream, qpdf_stream_decode_level_e decode_level) : |
71 | 0 | stream(stream), |
72 | 0 | decode_level(decode_level) |
73 | 0 | { |
74 | 0 | } |
75 | | void |
76 | | operator()(Pipeline* p) |
77 | 0 | { |
78 | 0 | stream.pipeStreamData(p, nullptr, 0, decode_level, false, false); |
79 | 0 | } |
80 | | |
81 | | private: |
82 | | Stream stream; |
83 | | qpdf_stream_decode_level_e decode_level; |
84 | | }; |
85 | | |
86 | | /// User defined streamfilter factories |
87 | | std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>> filter_factories; |
88 | | bool filter_factories_registered = false; |
89 | | } // namespace |
90 | | |
91 | | std::string |
92 | | QPDF_Stream::Members::expand_filter_name(std::string const& name) const |
93 | 0 | { |
94 | | // The PDF specification provides these filter abbreviations for use in inline images, but |
95 | | // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also |
96 | | // accepts them for stream filters. |
97 | 0 | if (name == "/AHx") { |
98 | 0 | return "/ASCIIHexDecode"; |
99 | 0 | } |
100 | 0 | if (name == "/A85") { |
101 | 0 | return "/ASCII85Decode"; |
102 | 0 | } |
103 | 0 | if (name == "/LZW") { |
104 | 0 | return "/LZWDecode"; |
105 | 0 | } |
106 | 0 | if (name == "/Fl") { |
107 | 0 | return "/FlateDecode"; |
108 | 0 | } |
109 | 0 | if (name == "/RL") { |
110 | 0 | return "/RunLengthDecode"; |
111 | 0 | } |
112 | 0 | if (name == "/CCF") { |
113 | 0 | return "/CCITTFaxDecode"; |
114 | 0 | } |
115 | 0 | if (name == "/DCT") { |
116 | 0 | return "/DCTDecode"; |
117 | 0 | } |
118 | 0 | return name; |
119 | 0 | }; |
120 | | |
121 | | std::function<std::shared_ptr<QPDFStreamFilter>()> |
122 | | QPDF_Stream::Members::filter_factory(std::string const& name) const |
123 | 378k | { |
124 | 378k | if (filter_factories_registered) [[unlikely]] { |
125 | | // We need to check user provided filters first as we allow users to replace qpdf provided |
126 | | // default filters. This will have a performance impact if the facility to register stream |
127 | | // filters is actually used. We can optimize this away if necessary. |
128 | 0 | auto ff = filter_factories.find(expand_filter_name(name)); |
129 | 0 | if (ff != filter_factories.end()) { |
130 | 0 | return ff->second; |
131 | 0 | } |
132 | 0 | } |
133 | 378k | if (name == "/FlateDecode") { |
134 | 137k | return SF_FlateLzwDecode::flate_factory; |
135 | 137k | } |
136 | 241k | if (name == "/Crypt") { |
137 | 6.88k | return []() { return std::make_shared<SF_Crypt>(); }; |
138 | 6.88k | } |
139 | 234k | if (name == "/LZWDecode") { |
140 | 14.3k | return SF_FlateLzwDecode::lzw_factory; |
141 | 14.3k | } |
142 | 219k | if (name == "/RunLengthDecode") { |
143 | 1.00k | return SF_RunLengthDecode::factory; |
144 | 1.00k | } |
145 | 218k | if (name == "/DCTDecode") { |
146 | 48.9k | return SF_DCTDecode::factory; |
147 | 48.9k | } |
148 | 169k | if (name == "/ASCII85Decode") { |
149 | 27.0k | return SF_ASCII85Decode::factory; |
150 | 27.0k | } |
151 | 142k | if (name == "/ASCIIHexDecode") { |
152 | 6.76k | return SF_ASCIIHexDecode::factory; |
153 | 6.76k | } |
154 | | // The PDF specification provides these filter abbreviations for use in inline images, but |
155 | | // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader |
156 | | // also accepts them for stream filters. |
157 | | |
158 | 136k | if (name == "/Fl") { |
159 | 20.0k | return SF_FlateLzwDecode::flate_factory; |
160 | 20.0k | } |
161 | 116k | if (name == "/AHx") { |
162 | 15.7k | return SF_ASCIIHexDecode::factory; |
163 | 15.7k | } |
164 | 100k | if (name == "/A85") { |
165 | 6.75k | return SF_ASCII85Decode::factory; |
166 | 6.75k | } |
167 | 93.5k | if (name == "/LZW") { |
168 | 28.9k | return SF_FlateLzwDecode::lzw_factory; |
169 | 28.9k | } |
170 | 64.6k | if (name == "/RL") { |
171 | 49.5k | return SF_RunLengthDecode::factory; |
172 | 49.5k | } |
173 | 15.0k | if (name == "/DCT") { |
174 | 3.33k | return SF_DCTDecode::factory; |
175 | 3.33k | } |
176 | 11.7k | return nullptr; |
177 | 15.0k | } |
178 | | |
179 | | Stream::Stream( |
180 | | QPDF& qpdf, QPDFObjGen og, QPDFObjectHandle stream_dict, qpdf_offset_t offset, size_t length) : |
181 | 493k | BaseHandle(QPDFObject::create<QPDF_Stream>(&qpdf, og, std::move(stream_dict), length)) |
182 | 493k | { |
183 | 493k | auto descr = std::make_shared<QPDFObject::Description>( |
184 | 493k | qpdf.getFilename() + ", stream object " + og.unparse(' ')); |
185 | 493k | obj->setDescription(&qpdf, descr, offset); |
186 | 493k | setDictDescription(); |
187 | 493k | } |
188 | | |
189 | | void |
190 | | Stream::registerStreamFilter( |
191 | | std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory) |
192 | 0 | { |
193 | 0 | filter_factories[filter_name] = factory; |
194 | 0 | filter_factories_registered = true; |
195 | 0 | } |
196 | | |
197 | | JSON |
198 | | Stream::getStreamJSON( |
199 | | int json_version, |
200 | | qpdf_json_stream_data_e json_data, |
201 | | qpdf_stream_decode_level_e decode_level, |
202 | | Pipeline* p, |
203 | | std::string const& data_filename) |
204 | 0 | { |
205 | 0 | Pl_Buffer pb{"streamjson"}; |
206 | 0 | JSON::Writer jw{&pb, 0}; |
207 | 0 | decode_level = |
208 | 0 | writeStreamJSON(json_version, jw, json_data, decode_level, p, data_filename, true); |
209 | 0 | pb.finish(); |
210 | 0 | auto result = JSON::parse(pb.getString()); |
211 | 0 | if (json_data == qpdf_sj_inline) { |
212 | 0 | result.addDictionaryMember("data", JSON::makeBlob(StreamBlobProvider(*this, decode_level))); |
213 | 0 | } |
214 | 0 | return result; |
215 | 0 | } |
216 | | |
217 | | qpdf_stream_decode_level_e |
218 | | Stream::writeStreamJSON( |
219 | | int json_version, |
220 | | JSON::Writer& jw, |
221 | | qpdf_json_stream_data_e json_data, |
222 | | qpdf_stream_decode_level_e decode_level, |
223 | | Pipeline* p, |
224 | | std::string const& data_filename, |
225 | | bool no_data_key) |
226 | 0 | { |
227 | 0 | auto s = stream(); |
228 | 0 | switch (json_data) { |
229 | 0 | case qpdf_sj_none: |
230 | 0 | case qpdf_sj_inline: |
231 | 0 | if (p != nullptr) { |
232 | 0 | throw std::logic_error( |
233 | 0 | "QPDF_Stream::writeStreamJSON: pipeline should only be supplied " |
234 | 0 | "when json_data is file"); |
235 | 0 | } |
236 | 0 | break; |
237 | 0 | case qpdf_sj_file: |
238 | 0 | if (p == nullptr) { |
239 | 0 | throw std::logic_error( |
240 | 0 | "QPDF_Stream::writeStreamJSON: pipeline must be supplied when json_data is file"); |
241 | 0 | } |
242 | 0 | if (data_filename.empty()) { |
243 | 0 | throw std::logic_error( |
244 | 0 | "QPDF_Stream::writeStreamJSON: data_filename must be supplied " |
245 | 0 | "when json_data is file"); |
246 | 0 | } |
247 | 0 | break; |
248 | 0 | } |
249 | | |
250 | 0 | jw.writeStart('{'); |
251 | |
|
252 | 0 | if (json_data == qpdf_sj_none) { |
253 | 0 | jw.writeNext(); |
254 | 0 | jw << R"("dict": )"; |
255 | 0 | s->stream_dict.writeJSON(json_version, jw); |
256 | 0 | jw.writeEnd('}'); |
257 | 0 | return decode_level; |
258 | 0 | } |
259 | | |
260 | 0 | Pl_Discard discard; |
261 | 0 | Pl_Buffer buf_pl{"stream data"}; |
262 | 0 | Pipeline* data_pipeline = &buf_pl; |
263 | 0 | if (no_data_key && json_data == qpdf_sj_inline) { |
264 | 0 | data_pipeline = &discard; |
265 | 0 | } |
266 | | // pipeStreamData produced valid data. |
267 | 0 | bool buf_pl_ready = false; |
268 | 0 | bool filtered = false; |
269 | 0 | bool filter = (decode_level != qpdf_dl_none); |
270 | 0 | for (int attempt = 1; attempt <= 2; ++attempt) { |
271 | 0 | bool succeeded = |
272 | 0 | pipeStreamData(data_pipeline, &filtered, 0, decode_level, false, (attempt == 1)); |
273 | 0 | if (!succeeded || (filter && !filtered)) { |
274 | | // Try again |
275 | 0 | filter = false; |
276 | 0 | decode_level = qpdf_dl_none; |
277 | 0 | buf_pl.getString(); // reset buf_pl |
278 | 0 | } else { |
279 | 0 | buf_pl_ready = true; |
280 | 0 | break; |
281 | 0 | } |
282 | 0 | } |
283 | 0 | if (!buf_pl_ready) { |
284 | 0 | throw std::logic_error("QPDF_Stream: failed to get stream data"); |
285 | 0 | } |
286 | | // We can use unsafeShallowCopy because we are only touching top-level keys. |
287 | 0 | auto dict = s->stream_dict.unsafeShallowCopy(); |
288 | 0 | dict.removeKey("/Length"); |
289 | 0 | if (filter && filtered) { |
290 | 0 | dict.removeKey("/Filter"); |
291 | 0 | dict.removeKey("/DecodeParms"); |
292 | 0 | } |
293 | 0 | if (json_data == qpdf_sj_file) { |
294 | 0 | jw.writeNext() << R"("datafile": ")" << JSON::Writer::encode_string(data_filename) << "\""; |
295 | 0 | p->writeString(buf_pl.getString()); |
296 | 0 | } else if (json_data == qpdf_sj_inline) { |
297 | 0 | if (!no_data_key) { |
298 | 0 | jw.writeNext() << R"("data": ")"; |
299 | 0 | jw.writeBase64(buf_pl.getString()) << "\""; |
300 | 0 | } |
301 | 0 | } else { |
302 | 0 | throw std::logic_error("QPDF_Stream::writeStreamJSON : unexpected value of json_data"); |
303 | 0 | } |
304 | | |
305 | 0 | jw.writeNext() << R"("dict": )"; |
306 | 0 | dict.writeJSON(json_version, jw); |
307 | 0 | jw.writeEnd('}'); |
308 | |
|
309 | 0 | return decode_level; |
310 | 0 | } |
311 | | |
312 | | void |
313 | | qpdf::Stream::setDictDescription() |
314 | 505k | { |
315 | 505k | auto s = stream(); |
316 | 505k | if (!s->stream_dict.hasObjectDescription()) { |
317 | 32.8k | s->stream_dict.setObjectDescription( |
318 | 32.8k | obj->getQPDF(), obj->getDescription() + " -> stream dictionary"); |
319 | 32.8k | } |
320 | 505k | } |
321 | | |
322 | | std::shared_ptr<Buffer> |
323 | | Stream::getStreamData(qpdf_stream_decode_level_e decode_level) |
324 | 89.8k | { |
325 | 89.8k | Pl_Buffer buf("stream data buffer"); |
326 | 89.8k | bool filtered; |
327 | 89.8k | pipeStreamData(&buf, &filtered, 0, decode_level, false, false); |
328 | 89.8k | if (!filtered) { |
329 | 20.2k | throw QPDFExc( |
330 | 20.2k | qpdf_e_unsupported, |
331 | 20.2k | obj->getQPDF()->getFilename(), |
332 | 20.2k | "", |
333 | 20.2k | obj->getParsedOffset(), |
334 | 20.2k | "getStreamData called on unfilterable stream"); |
335 | 20.2k | } |
336 | 69.6k | QTC::TC("qpdf", "QPDF_Stream getStreamData"); |
337 | 69.6k | return buf.getBufferSharedPointer(); |
338 | 89.8k | } |
339 | | |
340 | | std::shared_ptr<Buffer> |
341 | | Stream::getRawStreamData() |
342 | 0 | { |
343 | 0 | Pl_Buffer buf("stream data buffer"); |
344 | 0 | if (!pipeStreamData(&buf, nullptr, 0, qpdf_dl_none, false, false)) { |
345 | 0 | throw QPDFExc( |
346 | 0 | qpdf_e_unsupported, |
347 | 0 | obj->getQPDF()->getFilename(), |
348 | 0 | "", |
349 | 0 | obj->getParsedOffset(), |
350 | 0 | "error getting raw stream data"); |
351 | 0 | } |
352 | 0 | QTC::TC("qpdf", "QPDF_Stream getRawStreamData"); |
353 | 0 | return buf.getBufferSharedPointer(); |
354 | 0 | } |
355 | | |
356 | | bool |
357 | | Stream::isRootMetadata() const |
358 | 931k | { |
359 | 931k | if (!getDict().isDictionaryOfType("/Metadata", "/XML")) { |
360 | 924k | return false; |
361 | 924k | } |
362 | 6.76k | auto root_metadata = qpdf()->getRoot().getKey("/Metadata"); |
363 | 6.76k | return root_metadata.isSameObjectAs(obj); |
364 | 931k | } |
365 | | |
366 | | bool |
367 | | Stream::filterable( |
368 | | qpdf_stream_decode_level_e decode_level, |
369 | | std::vector<std::shared_ptr<QPDFStreamFilter>>& filters) |
370 | 381k | { |
371 | 381k | auto s = stream(); |
372 | | // Check filters |
373 | | |
374 | 381k | auto filter_obj = s->stream_dict.getKey("/Filter"); |
375 | | |
376 | 381k | if (filter_obj.isNull()) { |
377 | | // No filters |
378 | 139k | return true; |
379 | 139k | } |
380 | 242k | if (filter_obj.isName()) { |
381 | | // One filter |
382 | 138k | auto ff = s->filter_factory(filter_obj.getName()); |
383 | 138k | if (!ff) { |
384 | 6.06k | return false; |
385 | 6.06k | } |
386 | 132k | filters.emplace_back(ff()); |
387 | 132k | } else if (auto array = filter_obj.as_array(strict)) { |
388 | | // Potentially multiple filters |
389 | 241k | for (auto const& item: array) { |
390 | 241k | if (!item.isName()) { |
391 | 989 | warn("stream filter type is not name or array"); |
392 | 989 | return false; |
393 | 989 | } |
394 | 240k | auto ff = s->filter_factory(item.getName()); |
395 | 240k | if (!ff) { |
396 | 5.68k | filters.clear(); |
397 | 5.68k | return false; |
398 | 5.68k | } |
399 | 234k | filters.emplace_back(ff()); |
400 | 234k | } |
401 | 104k | } else { |
402 | 224 | warn("stream filter type is not name or array"); |
403 | 224 | return false; |
404 | 224 | } |
405 | | |
406 | | // filters now contains a list of filters to be applied in order. See which ones we can support. |
407 | | // See if we can support any decode parameters that are specified. |
408 | | |
409 | 229k | auto decode_obj = s->stream_dict.getKey("/DecodeParms"); |
410 | | |
411 | 229k | auto can_filter = // linebreak |
412 | 327k | [](auto d_level, auto& filter, auto& d_obj) -> bool { |
413 | 327k | if (!filter.setDecodeParms(d_obj) || |
414 | 327k | (d_level < qpdf_dl_all && filter.isLossyCompression()) || |
415 | 327k | (d_level < qpdf_dl_specialized && filter.isSpecializedCompression())) { |
416 | 2.96k | return false; |
417 | 2.96k | } |
418 | 324k | return true; |
419 | 327k | }; |
420 | | |
421 | 229k | auto decode_array = decode_obj.as_array(strict); |
422 | 229k | if (!decode_array || decode_array.size() == 0) { |
423 | 225k | if (decode_array) { |
424 | 77 | decode_obj = QPDFObjectHandle::newNull(); |
425 | 77 | } |
426 | | |
427 | 322k | for (auto& filter: filters) { |
428 | 322k | if (!can_filter(decode_level, *filter, decode_obj)) { |
429 | 2.54k | return false; |
430 | 2.54k | } |
431 | 322k | } |
432 | 225k | } else { |
433 | | // Ignore /DecodeParms entirely if /Filters is empty. At least one case of a file whose |
434 | | // /DecodeParms was [ << >> ] when /Filters was empty has been seen in the wild. |
435 | 4.65k | if (!filters.empty() && QIntC::to_size(decode_array.size()) != filters.size()) { |
436 | 213 | warn("stream /DecodeParms length is inconsistent with filters"); |
437 | 213 | return false; |
438 | 213 | } |
439 | | |
440 | 4.43k | int i = -1; |
441 | 4.57k | for (auto& filter: filters) { |
442 | 4.57k | auto d_obj = decode_array.at(++i).second; |
443 | 4.57k | if (!can_filter(decode_level, *filter, d_obj)) { |
444 | 419 | return false; |
445 | 419 | } |
446 | 4.57k | } |
447 | 4.43k | } |
448 | | |
449 | 226k | return true; |
450 | 229k | } |
451 | | |
452 | | bool |
453 | | Stream::pipeStreamData( |
454 | | Pipeline* pipeline, |
455 | | bool* filterp, |
456 | | int encode_flags, |
457 | | qpdf_stream_decode_level_e decode_level, |
458 | | bool suppress_warnings, |
459 | | bool will_retry) |
460 | 570k | { |
461 | 570k | auto s = stream(); |
462 | 570k | std::vector<std::shared_ptr<QPDFStreamFilter>> filters; |
463 | 570k | bool ignored; |
464 | 570k | if (!filterp) { |
465 | 0 | filterp = &ignored; |
466 | 0 | } |
467 | 570k | bool& filter = *filterp; |
468 | | |
469 | 570k | const bool empty_stream = !s->stream_provider && !s->stream_data && s->length == 0; |
470 | 570k | const bool empty_stream_data = s->stream_data && s->stream_data->getSize() == 0; |
471 | 570k | const bool empty = empty_stream || empty_stream_data; |
472 | | |
473 | 570k | if (empty_stream || empty_stream_data) { |
474 | 14.8k | filter = true; |
475 | 14.8k | } |
476 | | |
477 | 570k | filter = empty || encode_flags || decode_level != qpdf_dl_none; |
478 | 570k | if (filter) { |
479 | 381k | filter = filterable(decode_level, filters); |
480 | 381k | } |
481 | | |
482 | 570k | if (!pipeline) { |
483 | 0 | QTC::TC("qpdf", "QPDF_Stream pipeStreamData with null pipeline"); |
484 | | // Return value is whether we can filter in this case. |
485 | 0 | return filter; |
486 | 0 | } |
487 | | |
488 | | // Construct the pipeline in reverse order. Force pipelines we create to be deleted when this |
489 | | // function finishes. Pipelines created by QPDFStreamFilter objects will be deleted by those |
490 | | // objects. |
491 | 570k | std::vector<std::unique_ptr<Pipeline>> to_delete; |
492 | | |
493 | 570k | ContentNormalizer normalizer; |
494 | 570k | if (filter) { |
495 | 365k | if (encode_flags & qpdf_ef_compress) { |
496 | 193k | auto new_pipeline = |
497 | 193k | std::make_unique<Pl_Flate>("compress stream", pipeline, Pl_Flate::a_deflate); |
498 | 193k | pipeline = new_pipeline.get(); |
499 | 193k | to_delete.push_back(std::move(new_pipeline)); |
500 | 193k | } |
501 | | |
502 | 365k | if (encode_flags & qpdf_ef_normalize) { |
503 | 7.52k | auto new_pipeline = |
504 | 7.52k | std::make_unique<Pl_QPDFTokenizer>("normalizer", &normalizer, pipeline); |
505 | 7.52k | pipeline = new_pipeline.get(); |
506 | 7.52k | to_delete.push_back(std::move(new_pipeline)); |
507 | 7.52k | } |
508 | | |
509 | 393k | for (auto iter = s->token_filters.rbegin(); iter != s->token_filters.rend(); ++iter) { |
510 | 27.9k | auto new_pipeline = |
511 | 27.9k | std::make_unique<Pl_QPDFTokenizer>("token filter", (*iter).get(), pipeline); |
512 | 27.9k | pipeline = new_pipeline.get(); |
513 | 27.9k | to_delete.push_back(std::move(new_pipeline)); |
514 | 27.9k | } |
515 | | |
516 | 688k | for (auto f_iter = filters.rbegin(); f_iter != filters.rend(); ++f_iter) { |
517 | 322k | if (auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline)) { |
518 | 319k | pipeline = decode_pipeline; |
519 | 319k | } |
520 | 322k | auto* flate = dynamic_cast<Pl_Flate*>(pipeline); |
521 | 322k | if (flate) { |
522 | 152k | flate->setWarnCallback([this](char const* msg, int code) { warn(msg); }); |
523 | 152k | } |
524 | 322k | } |
525 | 365k | } |
526 | | |
527 | 570k | if (s->stream_data.get()) { |
528 | 10.3k | QTC::TC("qpdf", "QPDF_Stream pipe replaced stream data"); |
529 | 10.3k | pipeline->write(s->stream_data->getBuffer(), s->stream_data->getSize()); |
530 | 10.3k | pipeline->finish(); |
531 | 560k | } else if (s->stream_provider.get()) { |
532 | 6.32k | Pl_Count count("stream provider count", pipeline); |
533 | 6.32k | if (s->stream_provider->supportsRetry()) { |
534 | 0 | if (!s->stream_provider->provideStreamData( |
535 | 0 | obj->getObjGen(), &count, suppress_warnings, will_retry)) { |
536 | 0 | filter = false; |
537 | 0 | return false; |
538 | 0 | } |
539 | 6.32k | } else { |
540 | 6.32k | s->stream_provider->provideStreamData(obj->getObjGen(), &count); |
541 | 6.32k | } |
542 | 6.32k | qpdf_offset_t actual_length = count.getCount(); |
543 | 6.32k | if (s->stream_dict.hasKey("/Length")) { |
544 | 0 | auto desired_length = s->stream_dict.getKey("/Length").getIntValue(); |
545 | 0 | if (actual_length != desired_length) { |
546 | 0 | QTC::TC("qpdf", "QPDF_Stream provider length mismatch"); |
547 | | // This would be caused by programmer error on the part of a library user, not by |
548 | | // invalid input data. |
549 | 0 | throw std::runtime_error( |
550 | 0 | "stream data provider for " + obj->getObjGen().unparse(' ') + " provided " + |
551 | 0 | std::to_string(actual_length) + " bytes instead of expected " + |
552 | 0 | std::to_string(desired_length) + " bytes"); |
553 | 0 | } |
554 | 6.32k | } else { |
555 | 6.32k | QTC::TC("qpdf", "QPDF_Stream provider length not provided"); |
556 | 6.32k | s->stream_dict.replaceKey("/Length", QPDFObjectHandle::newInteger(actual_length)); |
557 | 6.32k | } |
558 | 553k | } else { |
559 | 553k | if (obj->getParsedOffset() == 0) { |
560 | 0 | QTC::TC("qpdf", "QPDF_Stream pipe no stream data"); |
561 | 0 | throw std::logic_error("pipeStreamData called for stream with no data"); |
562 | 0 | } |
563 | 553k | QTC::TC("qpdf", "QPDF_Stream pipe original stream data"); |
564 | 553k | if (!QPDF::Pipe::pipeStreamData( |
565 | 553k | obj->getQPDF(), |
566 | 553k | obj->getObjGen(), |
567 | 553k | obj->getParsedOffset(), |
568 | 553k | s->length, |
569 | 553k | s->stream_dict, |
570 | 553k | isRootMetadata(), |
571 | 553k | pipeline, |
572 | 553k | suppress_warnings, |
573 | 553k | will_retry)) { |
574 | 80.2k | filter = false; |
575 | 80.2k | return false; |
576 | 80.2k | } |
577 | 553k | } |
578 | | |
579 | 490k | if (filter && !suppress_warnings && normalizer.anyBadTokens()) { |
580 | 1.61k | warn("content normalization encountered bad tokens"); |
581 | 1.61k | if (normalizer.lastTokenWasBad()) { |
582 | 868 | QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize"); |
583 | 868 | warn( |
584 | 868 | "normalized content ended with a bad token; you may be able to resolve this by " |
585 | 868 | "coalescing content streams in combination with normalizing content. From the " |
586 | 868 | "command line, specify --coalesce-contents"); |
587 | 868 | } |
588 | 1.61k | warn( |
589 | 1.61k | "Resulting stream data may be corrupted but is may still useful for manual " |
590 | 1.61k | "inspection. For more information on this warning, search for content normalization " |
591 | 1.61k | "in the manual."); |
592 | 1.61k | } |
593 | | |
594 | 490k | return true; |
595 | 570k | } |
596 | | |
597 | | void |
598 | | Stream::replaceStreamData( |
599 | | std::shared_ptr<Buffer> data, |
600 | | QPDFObjectHandle const& filter, |
601 | | QPDFObjectHandle const& decode_parms) |
602 | 22.0k | { |
603 | 22.0k | auto s = stream(); |
604 | 22.0k | s->stream_data = data; |
605 | 22.0k | s->stream_provider = nullptr; |
606 | 22.0k | replaceFilterData(filter, decode_parms, data->getSize()); |
607 | 22.0k | } |
608 | | |
609 | | void |
610 | | Stream::replaceStreamData( |
611 | | std::shared_ptr<QPDFObjectHandle::StreamDataProvider> provider, |
612 | | QPDFObjectHandle const& filter, |
613 | | QPDFObjectHandle const& decode_parms) |
614 | 20.9k | { |
615 | 20.9k | auto s = stream(); |
616 | 20.9k | s->stream_provider = provider; |
617 | 20.9k | s->stream_data = nullptr; |
618 | 20.9k | replaceFilterData(filter, decode_parms, 0); |
619 | 20.9k | } |
620 | | |
621 | | void |
622 | | Stream::replaceFilterData( |
623 | | QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms, size_t length) |
624 | 42.9k | { |
625 | 42.9k | auto s = stream(); |
626 | 42.9k | if (filter) { |
627 | 22.7k | s->stream_dict.replaceKey("/Filter", filter); |
628 | 22.7k | } |
629 | 42.9k | if (decode_parms) { |
630 | 22.7k | s->stream_dict.replaceKey("/DecodeParms", decode_parms); |
631 | 22.7k | } |
632 | 42.9k | if (length == 0) { |
633 | 26.4k | QTC::TC("qpdf", "QPDF_Stream unknown stream length"); |
634 | 26.4k | s->stream_dict.removeKey("/Length"); |
635 | 26.4k | } else { |
636 | 16.4k | s->stream_dict.replaceKey( |
637 | 16.4k | "/Length", QPDFObjectHandle::newInteger(QIntC::to_longlong(length))); |
638 | 16.4k | } |
639 | 42.9k | } |
640 | | |
641 | | void |
642 | | Stream::warn(std::string const& message) |
643 | 36.6k | { |
644 | 36.6k | obj->getQPDF()->warn(qpdf_e_damaged_pdf, "", obj->getParsedOffset(), message); |
645 | 36.6k | } |
646 | | |
647 | | QPDFObjectHandle |
648 | | QPDFObjectHandle::getDict() const |
649 | 1.92M | { |
650 | 1.92M | return as_stream(error).getDict(); |
651 | 1.92M | } |
652 | | |
653 | | void |
654 | | QPDFObjectHandle::setFilterOnWrite(bool val) |
655 | 71.3k | { |
656 | 71.3k | as_stream(error).setFilterOnWrite(val); |
657 | 71.3k | } |
658 | | |
659 | | bool |
660 | | QPDFObjectHandle::getFilterOnWrite() |
661 | 378k | { |
662 | 378k | return as_stream(error).getFilterOnWrite(); |
663 | 378k | } |
664 | | |
665 | | bool |
666 | | QPDFObjectHandle::isDataModified() |
667 | 656k | { |
668 | 656k | return as_stream(error).isDataModified(); |
669 | 656k | } |
670 | | |
671 | | void |
672 | | QPDFObjectHandle::replaceDict(QPDFObjectHandle const& new_dict) |
673 | 12.2k | { |
674 | 12.2k | as_stream(error).replaceDict(new_dict); |
675 | 12.2k | } |
676 | | |
677 | | bool |
678 | | QPDFObjectHandle::isRootMetadata() const |
679 | 378k | { |
680 | 378k | return as_stream(error).isRootMetadata(); |
681 | 378k | } |
682 | | |
683 | | std::shared_ptr<Buffer> |
684 | | QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level) |
685 | 70.6k | { |
686 | 70.6k | return as_stream(error).getStreamData(level); |
687 | 70.6k | } |
688 | | |
689 | | std::shared_ptr<Buffer> |
690 | | QPDFObjectHandle::getRawStreamData() |
691 | 0 | { |
692 | 0 | return as_stream(error).getRawStreamData(); |
693 | 0 | } |
694 | | |
695 | | bool |
696 | | QPDFObjectHandle::pipeStreamData( |
697 | | Pipeline* p, |
698 | | bool* filtering_attempted, |
699 | | int encode_flags, |
700 | | qpdf_stream_decode_level_e decode_level, |
701 | | bool suppress_warnings, |
702 | | bool will_retry) |
703 | 0 | { |
704 | 0 | return as_stream(error).pipeStreamData( |
705 | 0 | p, filtering_attempted, encode_flags, decode_level, suppress_warnings, will_retry); |
706 | 0 | } |
707 | | |
708 | | bool |
709 | | QPDFObjectHandle::pipeStreamData( |
710 | | Pipeline* p, |
711 | | int encode_flags, |
712 | | qpdf_stream_decode_level_e decode_level, |
713 | | bool suppress_warnings, |
714 | | bool will_retry) |
715 | 480k | { |
716 | 480k | bool filtering_attempted; |
717 | 480k | as_stream(error).pipeStreamData( |
718 | 480k | p, &filtering_attempted, encode_flags, decode_level, suppress_warnings, will_retry); |
719 | 480k | return filtering_attempted; |
720 | 480k | } |
721 | | |
722 | | bool |
723 | | QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter, bool normalize, bool compress) |
724 | 0 | { |
725 | 0 | int encode_flags = 0; |
726 | 0 | qpdf_stream_decode_level_e decode_level = qpdf_dl_none; |
727 | 0 | if (filter) { |
728 | 0 | decode_level = qpdf_dl_generalized; |
729 | 0 | if (normalize) { |
730 | 0 | encode_flags |= qpdf_ef_normalize; |
731 | 0 | } |
732 | 0 | if (compress) { |
733 | 0 | encode_flags |= qpdf_ef_compress; |
734 | 0 | } |
735 | 0 | } |
736 | 0 | return pipeStreamData(p, encode_flags, decode_level, false); |
737 | 0 | } |
738 | | |
739 | | void |
740 | | QPDFObjectHandle::replaceStreamData( |
741 | | std::shared_ptr<Buffer> data, |
742 | | QPDFObjectHandle const& filter, |
743 | | QPDFObjectHandle const& decode_parms) |
744 | 0 | { |
745 | 0 | as_stream(error).replaceStreamData(data, filter, decode_parms); |
746 | 0 | } |
747 | | |
748 | | void |
749 | | QPDFObjectHandle::replaceStreamData( |
750 | | std::string const& data, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms) |
751 | 22.0k | { |
752 | 22.0k | auto b = std::make_shared<Buffer>(data.length()); |
753 | 22.0k | unsigned char* bp = b->getBuffer(); |
754 | 22.0k | if (bp) { |
755 | 16.4k | memcpy(bp, data.c_str(), data.length()); |
756 | 16.4k | } |
757 | 22.0k | as_stream(error).replaceStreamData(b, filter, decode_parms); |
758 | 22.0k | } |
759 | | |
760 | | void |
761 | | QPDFObjectHandle::replaceStreamData( |
762 | | std::shared_ptr<StreamDataProvider> provider, |
763 | | QPDFObjectHandle const& filter, |
764 | | QPDFObjectHandle const& decode_parms) |
765 | 6.32k | { |
766 | 6.32k | as_stream(error).replaceStreamData(provider, filter, decode_parms); |
767 | 6.32k | } |
768 | | |
769 | | namespace |
770 | | { |
771 | | class FunctionProvider: public QPDFObjectHandle::StreamDataProvider |
772 | | { |
773 | | public: |
774 | | FunctionProvider(std::function<void(Pipeline*)> provider) : |
775 | 14.5k | StreamDataProvider(false), |
776 | 14.5k | p1(provider), |
777 | 14.5k | p2(nullptr) |
778 | 14.5k | { |
779 | 14.5k | } |
780 | | FunctionProvider(std::function<bool(Pipeline*, bool, bool)> provider) : |
781 | 0 | StreamDataProvider(true), |
782 | 0 | p1(nullptr), |
783 | 0 | p2(provider) |
784 | 0 | { |
785 | 0 | } |
786 | | |
787 | | void |
788 | | provideStreamData(QPDFObjGen const&, Pipeline* pipeline) override |
789 | 0 | { |
790 | 0 | p1(pipeline); |
791 | 0 | } |
792 | | |
793 | | bool |
794 | | provideStreamData( |
795 | | QPDFObjGen const&, Pipeline* pipeline, bool suppress_warnings, bool will_retry) override |
796 | 0 | { |
797 | 0 | return p2(pipeline, suppress_warnings, will_retry); |
798 | 0 | } |
799 | | |
800 | | private: |
801 | | std::function<void(Pipeline*)> p1; |
802 | | std::function<bool(Pipeline*, bool, bool)> p2; |
803 | | }; |
804 | | } // namespace |
805 | | |
806 | | void |
807 | | QPDFObjectHandle::replaceStreamData( |
808 | | std::function<void(Pipeline*)> provider, |
809 | | QPDFObjectHandle const& filter, |
810 | | QPDFObjectHandle const& decode_parms) |
811 | 14.5k | { |
812 | 14.5k | auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider)); |
813 | 14.5k | as_stream(error).replaceStreamData(sdp, filter, decode_parms); |
814 | 14.5k | } |
815 | | |
816 | | void |
817 | | QPDFObjectHandle::replaceStreamData( |
818 | | std::function<bool(Pipeline*, bool, bool)> provider, |
819 | | QPDFObjectHandle const& filter, |
820 | | QPDFObjectHandle const& decode_parms) |
821 | 0 | { |
822 | 0 | auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider)); |
823 | 0 | as_stream(error).replaceStreamData(sdp, filter, decode_parms); |
824 | 0 | } |
825 | | |
826 | | JSON |
827 | | QPDFObjectHandle::getStreamJSON( |
828 | | int json_version, |
829 | | qpdf_json_stream_data_e json_data, |
830 | | qpdf_stream_decode_level_e decode_level, |
831 | | Pipeline* p, |
832 | | std::string const& data_filename) |
833 | 0 | { |
834 | 0 | return as_stream(error).getStreamJSON(json_version, json_data, decode_level, p, data_filename); |
835 | 0 | } |