/src/qpdf/libqpdf/QPDF_Stream.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/QPDFObjectHandle_private.hh> |
2 | | |
3 | | #include <qpdf/ContentNormalizer.hh> |
4 | | #include <qpdf/JSON_writer.hh> |
5 | | #include <qpdf/Pipeline.hh> |
6 | | #include <qpdf/Pipeline_private.hh> |
7 | | #include <qpdf/Pl_Base64.hh> |
8 | | #include <qpdf/Pl_Buffer.hh> |
9 | | #include <qpdf/Pl_Count.hh> |
10 | | #include <qpdf/Pl_Discard.hh> |
11 | | #include <qpdf/Pl_Flate.hh> |
12 | | #include <qpdf/Pl_QPDFTokenizer.hh> |
13 | | #include <qpdf/QIntC.hh> |
14 | | #include <qpdf/QPDFExc.hh> |
15 | | #include <qpdf/QPDF_private.hh> |
16 | | #include <qpdf/QTC.hh> |
17 | | #include <qpdf/QUtil.hh> |
18 | | #include <qpdf/SF_ASCII85Decode.hh> |
19 | | #include <qpdf/SF_ASCIIHexDecode.hh> |
20 | | #include <qpdf/SF_DCTDecode.hh> |
21 | | #include <qpdf/SF_FlateLzwDecode.hh> |
22 | | #include <qpdf/SF_RunLengthDecode.hh> |
23 | | |
24 | | #include <stdexcept> |
25 | | |
26 | | using namespace std::literals; |
27 | | using namespace qpdf; |
28 | | |
29 | | namespace |
30 | | { |
31 | | class SF_Crypt final: public QPDFStreamFilter |
32 | | { |
33 | | public: |
34 | 190 | SF_Crypt() = default; |
35 | | ~SF_Crypt() final = default; |
36 | | |
37 | | bool |
38 | | setDecodeParms(QPDFObjectHandle decode_parms) final |
39 | 162 | { |
40 | | // we only validate here - processing happens in decryptStream |
41 | 162 | if (auto dict = decode_parms.as_dictionary(optional)) { |
42 | 411 | for (auto const& [key, value]: dict) { |
43 | 411 | if (key == "/Type" && |
44 | 411 | (value.null() || |
45 | 35 | (value.isName() && value.getName() == "/CryptFilterDecodeParms"))) { |
46 | 4 | continue; |
47 | 4 | } |
48 | 407 | if (key == "/Name") { |
49 | 4 | continue; |
50 | 4 | } |
51 | 403 | if (!value.null()) { |
52 | 73 | return false; |
53 | 73 | } |
54 | 403 | } |
55 | 80 | return true; |
56 | 153 | } |
57 | 9 | return false; |
58 | 162 | } |
59 | | |
60 | | Pipeline* |
61 | | getDecodePipeline(Pipeline*) final |
62 | 80 | { |
63 | | // Not used -- handled by pipeStreamData |
64 | 80 | return nullptr; |
65 | 80 | } |
66 | | }; |
67 | | |
68 | | class StreamBlobProvider |
69 | | { |
70 | | public: |
71 | | StreamBlobProvider(Stream stream, qpdf_stream_decode_level_e decode_level) : |
72 | 0 | stream(stream), |
73 | 0 | decode_level(decode_level) |
74 | 0 | { |
75 | 0 | } |
76 | | void |
77 | | operator()(Pipeline* p) |
78 | 0 | { |
79 | 0 | stream.pipeStreamData(p, nullptr, 0, decode_level, false, false); |
80 | 0 | } |
81 | | |
82 | | private: |
83 | | Stream stream; |
84 | | qpdf_stream_decode_level_e decode_level; |
85 | | }; |
86 | | |
87 | | /// User defined streamfilter factories |
88 | | std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>> filter_factories; |
89 | | bool filter_factories_registered = false; |
90 | | } // namespace |
91 | | |
92 | | std::string |
93 | | QPDF_Stream::Members::expand_filter_name(std::string const& name) const |
94 | 0 | { |
95 | | // The PDF specification provides these filter abbreviations for use in inline images, but |
96 | | // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also |
97 | | // accepts them for stream filters. |
98 | 0 | if (name == "/AHx") { |
99 | 0 | return "/ASCIIHexDecode"; |
100 | 0 | } |
101 | 0 | if (name == "/A85") { |
102 | 0 | return "/ASCII85Decode"; |
103 | 0 | } |
104 | 0 | if (name == "/LZW") { |
105 | 0 | return "/LZWDecode"; |
106 | 0 | } |
107 | 0 | if (name == "/Fl") { |
108 | 0 | return "/FlateDecode"; |
109 | 0 | } |
110 | 0 | if (name == "/RL") { |
111 | 0 | return "/RunLengthDecode"; |
112 | 0 | } |
113 | 0 | if (name == "/CCF") { |
114 | 0 | return "/CCITTFaxDecode"; |
115 | 0 | } |
116 | 0 | if (name == "/DCT") { |
117 | 0 | return "/DCTDecode"; |
118 | 0 | } |
119 | 0 | return name; |
120 | 0 | }; |
121 | | |
122 | | std::function<std::shared_ptr<QPDFStreamFilter>()> |
123 | | QPDF_Stream::Members::filter_factory(std::string const& name) const |
124 | 39.5k | { |
125 | 39.5k | if (filter_factories_registered) [[unlikely]] { |
126 | | // We need to check user provided filters first as we allow users to replace qpdf provided |
127 | | // default filters. This will have a performance impact if the facility to register stream |
128 | | // filters is actually used. We can optimize this away if necessary. |
129 | 0 | auto ff = filter_factories.find(expand_filter_name(name)); |
130 | 0 | if (ff != filter_factories.end()) { |
131 | 0 | return ff->second; |
132 | 0 | } |
133 | 0 | } |
134 | 39.5k | if (name == "/FlateDecode") { |
135 | 18.9k | return SF_FlateLzwDecode::flate_factory; |
136 | 18.9k | } |
137 | 20.5k | if (name == "/Crypt") { |
138 | 190 | return []() { return std::make_shared<SF_Crypt>(); }; |
139 | 190 | } |
140 | 20.3k | if (name == "/LZWDecode") { |
141 | 853 | return SF_FlateLzwDecode::lzw_factory; |
142 | 853 | } |
143 | 19.4k | if (name == "/RunLengthDecode") { |
144 | 16 | return SF_RunLengthDecode::factory; |
145 | 16 | } |
146 | 19.4k | if (name == "/DCTDecode") { |
147 | 6.31k | return SF_DCTDecode::factory; |
148 | 6.31k | } |
149 | 13.1k | if (name == "/ASCII85Decode") { |
150 | 1.66k | return SF_ASCII85Decode::factory; |
151 | 1.66k | } |
152 | 11.4k | if (name == "/ASCIIHexDecode") { |
153 | 480 | return SF_ASCIIHexDecode::factory; |
154 | 480 | } |
155 | | // The PDF specification provides these filter abbreviations for use in inline images, but |
156 | | // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader |
157 | | // also accepts them for stream filters. |
158 | | |
159 | 11.0k | if (name == "/Fl") { |
160 | 1.36k | return SF_FlateLzwDecode::flate_factory; |
161 | 1.36k | } |
162 | 9.64k | if (name == "/AHx") { |
163 | 1.22k | return SF_ASCIIHexDecode::factory; |
164 | 1.22k | } |
165 | 8.41k | if (name == "/A85") { |
166 | 400 | return SF_ASCII85Decode::factory; |
167 | 400 | } |
168 | 8.01k | if (name == "/LZW") { |
169 | 2.43k | return SF_FlateLzwDecode::lzw_factory; |
170 | 2.43k | } |
171 | 5.58k | if (name == "/RL") { |
172 | 3.99k | return SF_RunLengthDecode::factory; |
173 | 3.99k | } |
174 | 1.58k | if (name == "/DCT") { |
175 | 657 | return SF_DCTDecode::factory; |
176 | 657 | } |
177 | 927 | return nullptr; |
178 | 1.58k | } |
179 | | |
180 | | Stream::Stream( |
181 | | QPDF& qpdf, QPDFObjGen og, QPDFObjectHandle stream_dict, qpdf_offset_t offset, size_t length) : |
182 | 47.4k | BaseHandle(QPDFObject::create<QPDF_Stream>(&qpdf, og, std::move(stream_dict), length)) |
183 | 47.4k | { |
184 | 47.4k | auto descr = std::make_shared<QPDFObject::Description>( |
185 | 47.4k | qpdf.getFilename() + ", stream object " + og.unparse(' ')); |
186 | 47.4k | obj->setDescription(&qpdf, descr, offset); |
187 | 47.4k | setDictDescription(); |
188 | 47.4k | } |
189 | | |
190 | | void |
191 | | Stream::registerStreamFilter( |
192 | | std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory) |
193 | 0 | { |
194 | 0 | filter_factories[filter_name] = factory; |
195 | 0 | filter_factories_registered = true; |
196 | 0 | } |
197 | | |
198 | | JSON |
199 | | Stream::getStreamJSON( |
200 | | int json_version, |
201 | | qpdf_json_stream_data_e json_data, |
202 | | qpdf_stream_decode_level_e decode_level, |
203 | | Pipeline* p, |
204 | | std::string const& data_filename) |
205 | 0 | { |
206 | 0 | Pl_Buffer pb{"streamjson"}; |
207 | 0 | JSON::Writer jw{&pb, 0}; |
208 | 0 | decode_level = |
209 | 0 | writeStreamJSON(json_version, jw, json_data, decode_level, p, data_filename, true); |
210 | 0 | pb.finish(); |
211 | 0 | auto result = JSON::parse(pb.getString()); |
212 | 0 | if (json_data == qpdf_sj_inline) { |
213 | 0 | result.addDictionaryMember("data", JSON::makeBlob(StreamBlobProvider(*this, decode_level))); |
214 | 0 | } |
215 | 0 | return result; |
216 | 0 | } |
217 | | |
218 | | qpdf_stream_decode_level_e |
219 | | Stream::writeStreamJSON( |
220 | | int json_version, |
221 | | JSON::Writer& jw, |
222 | | qpdf_json_stream_data_e json_data, |
223 | | qpdf_stream_decode_level_e decode_level, |
224 | | Pipeline* p, |
225 | | std::string const& data_filename, |
226 | | bool no_data_key) |
227 | 0 | { |
228 | 0 | auto s = stream(); |
229 | 0 | switch (json_data) { |
230 | 0 | case qpdf_sj_none: |
231 | 0 | case qpdf_sj_inline: |
232 | 0 | if (p != nullptr) { |
233 | 0 | throw std::logic_error( |
234 | 0 | "QPDF_Stream::writeStreamJSON: pipeline should only be supplied " |
235 | 0 | "when json_data is file"); |
236 | 0 | } |
237 | 0 | break; |
238 | 0 | case qpdf_sj_file: |
239 | 0 | if (p == nullptr) { |
240 | 0 | throw std::logic_error( |
241 | 0 | "QPDF_Stream::writeStreamJSON: pipeline must be supplied when json_data is file"); |
242 | 0 | } |
243 | 0 | if (data_filename.empty()) { |
244 | 0 | throw std::logic_error( |
245 | 0 | "QPDF_Stream::writeStreamJSON: data_filename must be supplied " |
246 | 0 | "when json_data is file"); |
247 | 0 | } |
248 | 0 | break; |
249 | 0 | } |
250 | | |
251 | 0 | jw.writeStart('{'); |
252 | |
|
253 | 0 | if (json_data == qpdf_sj_none) { |
254 | 0 | jw.writeNext(); |
255 | 0 | jw << R"("dict": )"; |
256 | 0 | s->stream_dict.writeJSON(json_version, jw); |
257 | 0 | jw.writeEnd('}'); |
258 | 0 | return decode_level; |
259 | 0 | } |
260 | | |
261 | 0 | Pl_Discard discard; |
262 | 0 | Pl_Buffer buf_pl{"stream data"}; |
263 | 0 | Pipeline* data_pipeline = &buf_pl; |
264 | 0 | if (no_data_key && json_data == qpdf_sj_inline) { |
265 | 0 | data_pipeline = &discard; |
266 | 0 | } |
267 | | // pipeStreamData produced valid data. |
268 | 0 | bool buf_pl_ready = false; |
269 | 0 | bool filtered = false; |
270 | 0 | bool filter = (decode_level != qpdf_dl_none); |
271 | 0 | for (int attempt = 1; attempt <= 2; ++attempt) { |
272 | 0 | bool succeeded = |
273 | 0 | pipeStreamData(data_pipeline, &filtered, 0, decode_level, false, (attempt == 1)); |
274 | 0 | if (!succeeded || (filter && !filtered)) { |
275 | | // Try again |
276 | 0 | filter = false; |
277 | 0 | decode_level = qpdf_dl_none; |
278 | 0 | buf_pl.getString(); // reset buf_pl |
279 | 0 | } else { |
280 | 0 | buf_pl_ready = true; |
281 | 0 | break; |
282 | 0 | } |
283 | 0 | } |
284 | 0 | if (!buf_pl_ready) { |
285 | 0 | throw std::logic_error("QPDF_Stream: failed to get stream data"); |
286 | 0 | } |
287 | | // We can use unsafeShallowCopy because we are only touching top-level keys. |
288 | 0 | auto dict = s->stream_dict.unsafeShallowCopy(); |
289 | 0 | dict.removeKey("/Length"); |
290 | 0 | if (filter && filtered) { |
291 | 0 | dict.removeKey("/Filter"); |
292 | 0 | dict.removeKey("/DecodeParms"); |
293 | 0 | } |
294 | 0 | if (json_data == qpdf_sj_file) { |
295 | 0 | jw.writeNext() << R"("datafile": ")" << JSON::Writer::encode_string(data_filename) << "\""; |
296 | 0 | p->writeString(buf_pl.getString()); |
297 | 0 | } else if (json_data == qpdf_sj_inline) { |
298 | 0 | if (!no_data_key) { |
299 | 0 | jw.writeNext() << R"("data": ")"; |
300 | 0 | jw.writeBase64(buf_pl.getString()) << "\""; |
301 | 0 | } |
302 | 0 | } else { |
303 | 0 | throw std::logic_error("QPDF_Stream::writeStreamJSON : unexpected value of json_data"); |
304 | 0 | } |
305 | | |
306 | 0 | jw.writeNext() << R"("dict": )"; |
307 | 0 | dict.writeJSON(json_version, jw); |
308 | 0 | jw.writeEnd('}'); |
309 | |
|
310 | 0 | return decode_level; |
311 | 0 | } |
312 | | |
313 | | void |
314 | | qpdf::Stream::setDictDescription() |
315 | 47.4k | { |
316 | 47.4k | auto s = stream(); |
317 | 47.4k | if (!s->stream_dict.hasObjectDescription()) { |
318 | 0 | s->stream_dict.setObjectDescription( |
319 | 0 | obj->getQPDF(), obj->getDescription() + " -> stream dictionary"); |
320 | 0 | } |
321 | 47.4k | } |
322 | | |
323 | | std::string |
324 | | Stream::getStreamData(qpdf_stream_decode_level_e decode_level) |
325 | 8.03k | { |
326 | 8.03k | std::string result; |
327 | 8.03k | pl::String buf(result); |
328 | 8.03k | bool filtered; |
329 | 8.03k | pipeStreamData(&buf, &filtered, 0, decode_level, false, false); |
330 | 8.03k | if (!filtered) { |
331 | 1.61k | throw QPDFExc( |
332 | 1.61k | qpdf_e_unsupported, |
333 | 1.61k | obj->getQPDF()->getFilename(), |
334 | 1.61k | "", |
335 | 1.61k | obj->getParsedOffset(), |
336 | 1.61k | "getStreamData called on unfilterable stream"); |
337 | 1.61k | } |
338 | 6.41k | QTC::TC("qpdf", "QPDF_Stream getStreamData"); |
339 | 6.41k | return result; |
340 | 8.03k | } |
341 | | |
342 | | std::string |
343 | | Stream::getRawStreamData() |
344 | 0 | { |
345 | 0 | std::string result; |
346 | 0 | pl::String buf(result); |
347 | 0 | if (!pipeStreamData(&buf, nullptr, 0, qpdf_dl_none, false, false)) { |
348 | 0 | throw QPDFExc( |
349 | 0 | qpdf_e_unsupported, |
350 | 0 | obj->getQPDF()->getFilename(), |
351 | 0 | "", |
352 | 0 | obj->getParsedOffset(), |
353 | 0 | "error getting raw stream data"); |
354 | 0 | } |
355 | 0 | QTC::TC("qpdf", "QPDF_Stream getRawStreamData"); |
356 | 0 | return result; |
357 | 0 | } |
358 | | |
359 | | bool |
360 | | Stream::isRootMetadata() const |
361 | 70.9k | { |
362 | 70.9k | if (!getDict().isDictionaryOfType("/Metadata", "/XML")) { |
363 | 70.6k | return false; |
364 | 70.6k | } |
365 | 277 | auto root_metadata = qpdf()->getRoot().getKey("/Metadata"); |
366 | 277 | return root_metadata.isSameObjectAs(obj); |
367 | 70.9k | } |
368 | | |
369 | | bool |
370 | | Stream::filterable( |
371 | | qpdf_stream_decode_level_e decode_level, |
372 | | std::vector<std::shared_ptr<QPDFStreamFilter>>& filters) |
373 | 34.2k | { |
374 | 34.2k | auto s = stream(); |
375 | | // Check filters |
376 | | |
377 | 34.2k | auto filter_obj = s->stream_dict.getKey("/Filter"); |
378 | | |
379 | 34.2k | if (filter_obj.isNull()) { |
380 | | // No filters |
381 | 5.14k | return true; |
382 | 5.14k | } |
383 | 29.0k | if (filter_obj.isName()) { |
384 | | // One filter |
385 | 22.4k | auto ff = s->filter_factory(filter_obj.getName()); |
386 | 22.4k | if (!ff) { |
387 | 446 | return false; |
388 | 446 | } |
389 | 21.9k | filters.emplace_back(ff()); |
390 | 21.9k | } else if (auto array = filter_obj.as_array(strict)) { |
391 | | // Potentially multiple filters |
392 | 17.2k | for (auto const& item: array) { |
393 | 17.2k | if (!item.isName()) { |
394 | 144 | warn("stream filter type is not name or array"); |
395 | 144 | return false; |
396 | 144 | } |
397 | 17.0k | auto ff = s->filter_factory(item.getName()); |
398 | 17.0k | if (!ff) { |
399 | 481 | filters.clear(); |
400 | 481 | return false; |
401 | 481 | } |
402 | 16.6k | filters.emplace_back(ff()); |
403 | 16.6k | } |
404 | 6.62k | } else { |
405 | 18 | warn("stream filter type is not name or array"); |
406 | 18 | return false; |
407 | 18 | } |
408 | | |
409 | | // filters now contains a list of filters to be applied in order. See which ones we can support. |
410 | | // See if we can support any decode parameters that are specified. |
411 | | |
412 | 27.9k | auto decode_obj = s->stream_dict.getKey("/DecodeParms"); |
413 | | |
414 | 27.9k | auto can_filter = // linebreak |
415 | 35.6k | [](auto d_level, auto& filter, auto& d_obj) -> bool { |
416 | 35.6k | if (!filter.setDecodeParms(d_obj) || |
417 | 35.6k | (d_level < qpdf_dl_all && filter.isLossyCompression()) || |
418 | 35.6k | (d_level < qpdf_dl_specialized && filter.isSpecializedCompression())) { |
419 | 281 | return false; |
420 | 281 | } |
421 | 35.3k | return true; |
422 | 35.6k | }; |
423 | | |
424 | 27.9k | auto decode_array = decode_obj.as_array(strict); |
425 | 27.9k | if (!decode_array || decode_array.size() == 0) { |
426 | 27.8k | if (decode_array) { |
427 | 6 | decode_obj = QPDFObjectHandle::newNull(); |
428 | 6 | } |
429 | | |
430 | 35.5k | for (auto& filter: filters) { |
431 | 35.5k | if (!can_filter(decode_level, *filter, decode_obj)) { |
432 | 265 | return false; |
433 | 265 | } |
434 | 35.5k | } |
435 | 27.8k | } else { |
436 | | // Ignore /DecodeParms entirely if /Filters is empty. At least one case of a file whose |
437 | | // /DecodeParms was [ << >> ] when /Filters was empty has been seen in the wild. |
438 | 126 | if (!filters.empty() && QIntC::to_size(decode_array.size()) != filters.size()) { |
439 | 15 | warn("stream /DecodeParms length is inconsistent with filters"); |
440 | 15 | return false; |
441 | 15 | } |
442 | | |
443 | 111 | int i = -1; |
444 | 127 | for (auto& filter: filters) { |
445 | 127 | auto d_obj = decode_array.get(++i); |
446 | 127 | if (!can_filter(decode_level, *filter, d_obj)) { |
447 | 16 | return false; |
448 | 16 | } |
449 | 127 | } |
450 | 111 | } |
451 | | |
452 | 27.7k | return true; |
453 | 27.9k | } |
454 | | |
455 | | bool |
456 | | Stream::pipeStreamData( |
457 | | Pipeline* pipeline, |
458 | | bool* filterp, |
459 | | int encode_flags, |
460 | | qpdf_stream_decode_level_e decode_level, |
461 | | bool suppress_warnings, |
462 | | bool will_retry) |
463 | 44.9k | { |
464 | 44.9k | auto s = stream(); |
465 | 44.9k | std::vector<std::shared_ptr<QPDFStreamFilter>> filters; |
466 | 44.9k | bool ignored; |
467 | 44.9k | if (!filterp) { |
468 | 0 | filterp = &ignored; |
469 | 0 | } |
470 | 44.9k | bool& filter = *filterp; |
471 | | |
472 | 44.9k | const bool empty_stream = !s->stream_provider && !s->stream_data && s->length == 0; |
473 | 44.9k | const bool empty_stream_data = s->stream_data && s->stream_data->getSize() == 0; |
474 | 44.9k | const bool empty = empty_stream || empty_stream_data; |
475 | | |
476 | 44.9k | if (empty_stream || empty_stream_data) { |
477 | 1.06k | filter = true; |
478 | 1.06k | } |
479 | | |
480 | 44.9k | filter = empty || encode_flags || decode_level != qpdf_dl_none; |
481 | 44.9k | if (filter) { |
482 | 34.2k | filter = filterable(decode_level, filters); |
483 | 34.2k | } |
484 | | |
485 | 44.9k | if (!pipeline) { |
486 | 0 | QTC::TC("qpdf", "QPDF_Stream pipeStreamData with null pipeline"); |
487 | | // Return value is whether we can filter in this case. |
488 | 0 | return filter; |
489 | 0 | } |
490 | | |
491 | | // Construct the pipeline in reverse order. Force pipelines we create to be deleted when this |
492 | | // function finishes. Pipelines created by QPDFStreamFilter objects will be deleted by those |
493 | | // objects. |
494 | 44.9k | std::vector<std::unique_ptr<Pipeline>> to_delete; |
495 | | |
496 | 44.9k | ContentNormalizer normalizer; |
497 | 44.9k | if (filter) { |
498 | 32.8k | if (encode_flags & qpdf_ef_compress) { |
499 | 0 | auto new_pipeline = |
500 | 0 | std::make_unique<Pl_Flate>("compress stream", pipeline, Pl_Flate::a_deflate); |
501 | 0 | pipeline = new_pipeline.get(); |
502 | 0 | to_delete.push_back(std::move(new_pipeline)); |
503 | 0 | } |
504 | | |
505 | 32.8k | if (encode_flags & qpdf_ef_normalize) { |
506 | 3.60k | auto new_pipeline = |
507 | 3.60k | std::make_unique<Pl_QPDFTokenizer>("normalizer", &normalizer, pipeline); |
508 | 3.60k | pipeline = new_pipeline.get(); |
509 | 3.60k | to_delete.push_back(std::move(new_pipeline)); |
510 | 3.60k | } |
511 | | |
512 | 32.8k | for (auto iter = s->token_filters.rbegin(); iter != s->token_filters.rend(); ++iter) { |
513 | 0 | auto new_pipeline = |
514 | 0 | std::make_unique<Pl_QPDFTokenizer>("token filter", (*iter).get(), pipeline); |
515 | 0 | pipeline = new_pipeline.get(); |
516 | 0 | to_delete.push_back(std::move(new_pipeline)); |
517 | 0 | } |
518 | | |
519 | 68.1k | for (auto f_iter = filters.rbegin(); f_iter != filters.rend(); ++f_iter) { |
520 | 35.3k | if (auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline)) { |
521 | 35.1k | pipeline = decode_pipeline; |
522 | 35.1k | } |
523 | 35.3k | auto* flate = dynamic_cast<Pl_Flate*>(pipeline); |
524 | 35.3k | if (flate) { |
525 | 20.0k | flate->setWarnCallback([this](char const* msg, int code) { warn(msg); }); |
526 | 20.0k | } |
527 | 35.3k | } |
528 | 32.8k | } |
529 | | |
530 | 44.9k | if (s->stream_data.get()) { |
531 | 0 | QTC::TC("qpdf", "QPDF_Stream pipe replaced stream data"); |
532 | 0 | pipeline->write(s->stream_data->getBuffer(), s->stream_data->getSize()); |
533 | 0 | pipeline->finish(); |
534 | 44.9k | } else if (s->stream_provider.get()) { |
535 | 0 | Pl_Count count("stream provider count", pipeline); |
536 | 0 | if (s->stream_provider->supportsRetry()) { |
537 | 0 | if (!s->stream_provider->provideStreamData( |
538 | 0 | obj->getObjGen(), &count, suppress_warnings, will_retry)) { |
539 | 0 | filter = false; |
540 | 0 | return false; |
541 | 0 | } |
542 | 0 | } else { |
543 | 0 | s->stream_provider->provideStreamData(obj->getObjGen(), &count); |
544 | 0 | } |
545 | 0 | qpdf_offset_t actual_length = count.getCount(); |
546 | 0 | if (s->stream_dict.hasKey("/Length")) { |
547 | 0 | auto desired_length = s->stream_dict.getKey("/Length").getIntValue(); |
548 | 0 | if (actual_length != desired_length) { |
549 | 0 | QTC::TC("qpdf", "QPDF_Stream provider length mismatch"); |
550 | | // This would be caused by programmer error on the part of a library user, not by |
551 | | // invalid input data. |
552 | 0 | throw std::runtime_error( |
553 | 0 | "stream data provider for " + obj->getObjGen().unparse(' ') + " provided " + |
554 | 0 | std::to_string(actual_length) + " bytes instead of expected " + |
555 | 0 | std::to_string(desired_length) + " bytes"); |
556 | 0 | } |
557 | 0 | } else { |
558 | 0 | QTC::TC("qpdf", "QPDF_Stream provider length not provided"); |
559 | 0 | s->stream_dict.replaceKey("/Length", QPDFObjectHandle::newInteger(actual_length)); |
560 | 0 | } |
561 | 44.9k | } else { |
562 | 44.9k | if (obj->getParsedOffset() == 0) { |
563 | 0 | QTC::TC("qpdf", "QPDF_Stream pipe no stream data"); |
564 | 0 | throw std::logic_error("pipeStreamData called for stream with no data"); |
565 | 0 | } |
566 | 44.9k | QTC::TC("qpdf", "QPDF_Stream pipe original stream data"); |
567 | 44.9k | if (!QPDF::Pipe::pipeStreamData( |
568 | 44.9k | obj->getQPDF(), |
569 | 44.9k | obj->getObjGen(), |
570 | 44.9k | obj->getParsedOffset(), |
571 | 44.9k | s->length, |
572 | 44.9k | s->stream_dict, |
573 | 44.9k | isRootMetadata(), |
574 | 44.9k | pipeline, |
575 | 44.9k | suppress_warnings, |
576 | 44.9k | will_retry)) { |
577 | 11.3k | filter = false; |
578 | 11.3k | return false; |
579 | 11.3k | } |
580 | 44.9k | } |
581 | | |
582 | 33.6k | if (filter && !suppress_warnings && normalizer.anyBadTokens()) { |
583 | 874 | warn("content normalization encountered bad tokens"); |
584 | 874 | if (normalizer.lastTokenWasBad()) { |
585 | 473 | QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize"); |
586 | 473 | warn( |
587 | 473 | "normalized content ended with a bad token; you may be able to resolve this by " |
588 | 473 | "coalescing content streams in combination with normalizing content. From the " |
589 | 473 | "command line, specify --coalesce-contents"); |
590 | 473 | } |
591 | 874 | warn( |
592 | 874 | "Resulting stream data may be corrupted but is may still useful for manual " |
593 | 874 | "inspection. For more information on this warning, search for content normalization " |
594 | 874 | "in the manual."); |
595 | 874 | } |
596 | | |
597 | 33.6k | return true; |
598 | 44.9k | } |
599 | | |
600 | | void |
601 | | Stream::replaceStreamData( |
602 | | std::shared_ptr<Buffer> data, |
603 | | QPDFObjectHandle const& filter, |
604 | | QPDFObjectHandle const& decode_parms) |
605 | 0 | { |
606 | 0 | auto s = stream(); |
607 | 0 | s->stream_data = data; |
608 | 0 | s->stream_provider = nullptr; |
609 | 0 | replaceFilterData(filter, decode_parms, data->getSize()); |
610 | 0 | } |
611 | | |
612 | | void |
613 | | Stream::replaceStreamData( |
614 | | std::shared_ptr<QPDFObjectHandle::StreamDataProvider> provider, |
615 | | QPDFObjectHandle const& filter, |
616 | | QPDFObjectHandle const& decode_parms) |
617 | 0 | { |
618 | 0 | auto s = stream(); |
619 | 0 | s->stream_provider = provider; |
620 | 0 | s->stream_data = nullptr; |
621 | 0 | replaceFilterData(filter, decode_parms, 0); |
622 | 0 | } |
623 | | |
624 | | void |
625 | | Stream::replaceFilterData( |
626 | | QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms, size_t length) |
627 | 0 | { |
628 | 0 | auto s = stream(); |
629 | 0 | if (filter) { |
630 | 0 | s->stream_dict.replaceKey("/Filter", filter); |
631 | 0 | } |
632 | 0 | if (decode_parms) { |
633 | 0 | s->stream_dict.replaceKey("/DecodeParms", decode_parms); |
634 | 0 | } |
635 | 0 | if (length == 0) { |
636 | 0 | QTC::TC("qpdf", "QPDF_Stream unknown stream length"); |
637 | 0 | s->stream_dict.removeKey("/Length"); |
638 | 0 | } else { |
639 | 0 | s->stream_dict.replaceKey( |
640 | 0 | "/Length", QPDFObjectHandle::newInteger(QIntC::to_longlong(length))); |
641 | 0 | } |
642 | 0 | } |
643 | | |
644 | | void |
645 | | Stream::warn(std::string const& message) |
646 | 5.54k | { |
647 | 5.54k | obj->getQPDF()->warn(qpdf_e_damaged_pdf, "", obj->getParsedOffset(), message); |
648 | 5.54k | } |
649 | | |
650 | | QPDFObjectHandle |
651 | | QPDFObjectHandle::getDict() const |
652 | 154k | { |
653 | 154k | return as_stream(error).getDict(); |
654 | 154k | } |
655 | | |
656 | | void |
657 | | QPDFObjectHandle::setFilterOnWrite(bool val) |
658 | 10.9k | { |
659 | 10.9k | as_stream(error).setFilterOnWrite(val); |
660 | 10.9k | } |
661 | | |
662 | | bool |
663 | | QPDFObjectHandle::getFilterOnWrite() |
664 | 26.0k | { |
665 | 26.0k | return as_stream(error).getFilterOnWrite(); |
666 | 26.0k | } |
667 | | |
668 | | bool |
669 | | QPDFObjectHandle::isDataModified() |
670 | 26.0k | { |
671 | 26.0k | return as_stream(error).isDataModified(); |
672 | 26.0k | } |
673 | | |
674 | | void |
675 | | QPDFObjectHandle::replaceDict(QPDFObjectHandle const& new_dict) |
676 | 0 | { |
677 | 0 | as_stream(error).replaceDict(new_dict); |
678 | 0 | } |
679 | | |
680 | | bool |
681 | | QPDFObjectHandle::isRootMetadata() const |
682 | 26.0k | { |
683 | 26.0k | return as_stream(error).isRootMetadata(); |
684 | 26.0k | } |
685 | | |
686 | | std::shared_ptr<Buffer> |
687 | | QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level) |
688 | 6.38k | { |
689 | 6.38k | return std::make_shared<Buffer>(as_stream(error).getStreamData(level)); |
690 | 6.38k | } |
691 | | |
692 | | std::shared_ptr<Buffer> |
693 | | QPDFObjectHandle::getRawStreamData() |
694 | 0 | { |
695 | 0 | return std::make_shared<Buffer>(as_stream(error).getRawStreamData()); |
696 | 0 | } |
697 | | |
698 | | bool |
699 | | QPDFObjectHandle::pipeStreamData( |
700 | | Pipeline* p, |
701 | | bool* filtering_attempted, |
702 | | int encode_flags, |
703 | | qpdf_stream_decode_level_e decode_level, |
704 | | bool suppress_warnings, |
705 | | bool will_retry) |
706 | 0 | { |
707 | 0 | return as_stream(error).pipeStreamData( |
708 | 0 | p, filtering_attempted, encode_flags, decode_level, suppress_warnings, will_retry); |
709 | 0 | } |
710 | | |
711 | | bool |
712 | | QPDFObjectHandle::pipeStreamData( |
713 | | Pipeline* p, |
714 | | int encode_flags, |
715 | | qpdf_stream_decode_level_e decode_level, |
716 | | bool suppress_warnings, |
717 | | bool will_retry) |
718 | 36.9k | { |
719 | 36.9k | bool filtering_attempted; |
720 | 36.9k | as_stream(error).pipeStreamData( |
721 | 36.9k | p, &filtering_attempted, encode_flags, decode_level, suppress_warnings, will_retry); |
722 | 36.9k | return filtering_attempted; |
723 | 36.9k | } |
724 | | |
725 | | bool |
726 | | QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter, bool normalize, bool compress) |
727 | 0 | { |
728 | 0 | int encode_flags = 0; |
729 | 0 | qpdf_stream_decode_level_e decode_level = qpdf_dl_none; |
730 | 0 | if (filter) { |
731 | 0 | decode_level = qpdf_dl_generalized; |
732 | 0 | if (normalize) { |
733 | 0 | encode_flags |= qpdf_ef_normalize; |
734 | 0 | } |
735 | 0 | if (compress) { |
736 | 0 | encode_flags |= qpdf_ef_compress; |
737 | 0 | } |
738 | 0 | } |
739 | 0 | return pipeStreamData(p, encode_flags, decode_level, false); |
740 | 0 | } |
741 | | |
742 | | void |
743 | | QPDFObjectHandle::replaceStreamData( |
744 | | std::shared_ptr<Buffer> data, |
745 | | QPDFObjectHandle const& filter, |
746 | | QPDFObjectHandle const& decode_parms) |
747 | 0 | { |
748 | 0 | as_stream(error).replaceStreamData(data, filter, decode_parms); |
749 | 0 | } |
750 | | |
751 | | void |
752 | | QPDFObjectHandle::replaceStreamData( |
753 | | std::string const& data, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms) |
754 | 0 | { |
755 | 0 | auto b = std::make_shared<Buffer>(data.length()); |
756 | 0 | unsigned char* bp = b->getBuffer(); |
757 | 0 | if (bp) { |
758 | 0 | memcpy(bp, data.c_str(), data.length()); |
759 | 0 | } |
760 | 0 | as_stream(error).replaceStreamData(b, filter, decode_parms); |
761 | 0 | } |
762 | | |
763 | | void |
764 | | QPDFObjectHandle::replaceStreamData( |
765 | | std::shared_ptr<StreamDataProvider> provider, |
766 | | QPDFObjectHandle const& filter, |
767 | | QPDFObjectHandle const& decode_parms) |
768 | 0 | { |
769 | 0 | as_stream(error).replaceStreamData(provider, filter, decode_parms); |
770 | 0 | } |
771 | | |
772 | | namespace |
773 | | { |
774 | | class FunctionProvider: public QPDFObjectHandle::StreamDataProvider |
775 | | { |
776 | | public: |
777 | | FunctionProvider(std::function<void(Pipeline*)> provider) : |
778 | 0 | StreamDataProvider(false), |
779 | 0 | p1(provider), |
780 | 0 | p2(nullptr) |
781 | 0 | { |
782 | 0 | } |
783 | | FunctionProvider(std::function<bool(Pipeline*, bool, bool)> provider) : |
784 | 0 | StreamDataProvider(true), |
785 | 0 | p1(nullptr), |
786 | 0 | p2(provider) |
787 | 0 | { |
788 | 0 | } |
789 | | |
790 | | void |
791 | | provideStreamData(QPDFObjGen const&, Pipeline* pipeline) override |
792 | 0 | { |
793 | 0 | p1(pipeline); |
794 | 0 | } |
795 | | |
796 | | bool |
797 | | provideStreamData( |
798 | | QPDFObjGen const&, Pipeline* pipeline, bool suppress_warnings, bool will_retry) override |
799 | 0 | { |
800 | 0 | return p2(pipeline, suppress_warnings, will_retry); |
801 | 0 | } |
802 | | |
803 | | private: |
804 | | std::function<void(Pipeline*)> p1; |
805 | | std::function<bool(Pipeline*, bool, bool)> p2; |
806 | | }; |
807 | | } // namespace |
808 | | |
809 | | void |
810 | | QPDFObjectHandle::replaceStreamData( |
811 | | std::function<void(Pipeline*)> provider, |
812 | | QPDFObjectHandle const& filter, |
813 | | QPDFObjectHandle const& decode_parms) |
814 | 0 | { |
815 | 0 | auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider)); |
816 | 0 | as_stream(error).replaceStreamData(sdp, filter, decode_parms); |
817 | 0 | } |
818 | | |
819 | | void |
820 | | QPDFObjectHandle::replaceStreamData( |
821 | | std::function<bool(Pipeline*, bool, bool)> provider, |
822 | | QPDFObjectHandle const& filter, |
823 | | QPDFObjectHandle const& decode_parms) |
824 | 0 | { |
825 | 0 | auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider)); |
826 | 0 | as_stream(error).replaceStreamData(sdp, filter, decode_parms); |
827 | 0 | } |
828 | | |
829 | | JSON |
830 | | QPDFObjectHandle::getStreamJSON( |
831 | | int json_version, |
832 | | qpdf_json_stream_data_e json_data, |
833 | | qpdf_stream_decode_level_e decode_level, |
834 | | Pipeline* p, |
835 | | std::string const& data_filename) |
836 | 0 | { |
837 | 0 | return as_stream(error).getStreamJSON(json_version, json_data, decode_level, p, data_filename); |
838 | 0 | } |