/src/qpdf/libqpdf/QPDF_Stream.cc
Line | Count | Source |
1 | | #include <qpdf/QPDFObjectHandle_private.hh> |
2 | | |
3 | | #include <qpdf/ContentNormalizer.hh> |
4 | | #include <qpdf/JSON_writer.hh> |
5 | | #include <qpdf/Pipeline.hh> |
6 | | #include <qpdf/Pipeline_private.hh> |
7 | | #include <qpdf/Pl_Buffer.hh> |
8 | | #include <qpdf/Pl_Count.hh> |
9 | | #include <qpdf/Pl_Discard.hh> |
10 | | #include <qpdf/Pl_Flate.hh> |
11 | | #include <qpdf/Pl_QPDFTokenizer.hh> |
12 | | #include <qpdf/QIntC.hh> |
13 | | #include <qpdf/QPDFExc.hh> |
14 | | #include <qpdf/QPDF_private.hh> |
15 | | #include <qpdf/QTC.hh> |
16 | | #include <qpdf/QUtil.hh> |
17 | | #include <qpdf/SF_ASCII85Decode.hh> |
18 | | #include <qpdf/SF_ASCIIHexDecode.hh> |
19 | | #include <qpdf/SF_DCTDecode.hh> |
20 | | #include <qpdf/SF_FlateLzwDecode.hh> |
21 | | #include <qpdf/SF_RunLengthDecode.hh> |
22 | | |
23 | | #include <stdexcept> |
24 | | |
25 | | using namespace std::literals; |
26 | | using namespace qpdf; |
27 | | |
28 | | using Streams = QPDF::Doc::Objects::Streams; |
29 | | |
30 | | bool |
31 | | Streams::immediate_copy_from() const |
32 | 0 | { |
33 | 0 | return qpdf_.m->immediate_copy_from; |
34 | 0 | } |
35 | | |
36 | | class Streams::Copier final: public QPDFObjectHandle::StreamDataProvider |
37 | | { |
38 | | class Data |
39 | | { |
40 | | friend class Streams; |
41 | | |
42 | | public: |
43 | | Data(Stream& source, Dictionary const& dest_dict) : |
44 | 0 | encp(source.qpdf()->m->encp), |
45 | 0 | file(source.qpdf()->m->file), |
46 | 0 | source_og(source.id_gen()), |
47 | 0 | offset(source.offset()), |
48 | 0 | length(source.getLength()), |
49 | 0 | dest_dict(dest_dict), |
50 | 0 | is_root_metadata(source.isRootMetadata()) |
51 | 0 | { |
52 | 0 | } |
53 | | |
54 | | private: |
55 | | std::shared_ptr<EncryptionParameters> encp; |
56 | | std::shared_ptr<InputSource> file; |
57 | | QPDFObjGen source_og; |
58 | | qpdf_offset_t offset; |
59 | | size_t length; |
60 | | QPDFObjectHandle dest_dict; |
61 | | bool is_root_metadata{false}; |
62 | | }; |
63 | | |
64 | | public: |
65 | | Copier() = delete; |
66 | | Copier(StreamDataProvider const&) = delete; |
67 | | Copier(StreamDataProvider&&) = delete; |
68 | | Copier& operator=(StreamDataProvider const&) = delete; |
69 | | Copier& operator=(StreamDataProvider&&) = delete; |
70 | 24.3k | ~Copier() final = default; |
71 | | |
72 | | Copier(Streams& streams) : |
73 | 24.3k | QPDFObjectHandle::StreamDataProvider(true), |
74 | 24.3k | streams(streams) |
75 | 24.3k | { |
76 | 24.3k | } |
77 | | |
78 | | bool |
79 | | provideStreamData( |
80 | | QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) final |
81 | 0 | { |
82 | 0 | auto data = copied_data.find(og); |
83 | 0 | if (data != copied_data.end()) { |
84 | 0 | auto& fd = data->second; |
85 | 0 | QTC::TC("qpdf", "QPDF pipe foreign encrypted stream", fd.encp->encrypted ? 0 : 1); |
86 | 0 | if (streams.qpdf().pipeStreamData( |
87 | 0 | fd.encp, |
88 | 0 | fd.file, |
89 | 0 | streams.qpdf(), |
90 | 0 | fd.source_og, |
91 | 0 | fd.offset, |
92 | 0 | fd.length, |
93 | 0 | fd.dest_dict, |
94 | 0 | fd.is_root_metadata, |
95 | 0 | pipeline, |
96 | 0 | suppress_warnings, |
97 | 0 | will_retry)) { |
98 | 0 | return true; // for CI coverage |
99 | 0 | } else { |
100 | 0 | return false; |
101 | 0 | } |
102 | 0 | } |
103 | 0 | auto stream = copied_streams.find(og); |
104 | 0 | qpdf_invariant(stream == copied_streams.end() || stream->second); |
105 | 0 | if (stream != copied_streams.end() && |
106 | 0 | stream->second.pipeStreamData( |
107 | 0 | pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry)) { |
108 | 0 | return true; // for CI coverage |
109 | 0 | } |
110 | 0 | return false; |
111 | 0 | } |
112 | | |
113 | | void |
114 | | register_copy(Stream& dest, Stream& source, bool provider) |
115 | 0 | { |
116 | 0 | qpdf_expect(source); |
117 | 0 | qpdf_expect(dest); |
118 | 0 | if (provider) { |
119 | 0 | copied_streams.insert_or_assign(dest, source); |
120 | 0 | } else { |
121 | 0 | copied_data.insert_or_assign(dest, Data(source, dest.getDict())); |
122 | 0 | } |
123 | 0 | } |
124 | | |
125 | | private: |
126 | | Streams& streams; |
127 | | std::map<QPDFObjGen, Stream> copied_streams; |
128 | | std::map<QPDFObjGen, Data> copied_data; |
129 | | }; |
130 | | |
131 | | Streams::Streams(QPDF& qpdf) : |
132 | 24.3k | qpdf_(qpdf), |
133 | 24.3k | copier_(std::make_shared<Copier>(*this)) |
134 | 24.3k | { |
135 | 24.3k | } |
136 | | |
137 | | namespace |
138 | | { |
139 | | class SF_Crypt final: public QPDFStreamFilter |
140 | | { |
141 | | public: |
142 | 2.00k | SF_Crypt() = default; |
143 | | ~SF_Crypt() final = default; |
144 | | |
145 | | bool |
146 | | setDecodeParms(QPDFObjectHandle decode_parms) final |
147 | 1.21k | { |
148 | | // we only validate here - processing happens in decryptStream |
149 | 1.21k | if (Dictionary dict = decode_parms) { |
150 | 642 | for (auto const& [key, value]: dict) { |
151 | 642 | if (key == "/Type" && |
152 | 45 | (value.null() || Name(value) == "/CryptFilterDecodeParms")) { |
153 | 10 | continue; |
154 | 10 | } |
155 | 632 | if (key == "/Name") { |
156 | 10 | continue; |
157 | 10 | } |
158 | 622 | if (!value.null()) { |
159 | 93 | return false; |
160 | 93 | } |
161 | 622 | } |
162 | 82 | return true; |
163 | 175 | } |
164 | 1.04k | return decode_parms.null(); |
165 | 1.21k | } |
166 | | |
167 | | Pipeline* |
168 | | getDecodePipeline(Pipeline*) final |
169 | 1.04k | { |
170 | | // Not used -- handled by pipeStreamData |
171 | 1.04k | return nullptr; |
172 | 1.04k | } |
173 | | }; |
174 | | |
175 | | class StreamBlobProvider |
176 | | { |
177 | | public: |
178 | | StreamBlobProvider(Stream stream, qpdf_stream_decode_level_e decode_level) : |
179 | 0 | stream(stream), |
180 | 0 | decode_level(decode_level) |
181 | 0 | { |
182 | 0 | } |
183 | | void |
184 | | operator()(Pipeline* p) |
185 | 0 | { |
186 | 0 | stream.pipeStreamData(p, nullptr, 0, decode_level, false, false); |
187 | 0 | } |
188 | | |
189 | | private: |
190 | | Stream stream; |
191 | | qpdf_stream_decode_level_e decode_level; |
192 | | }; |
193 | | |
194 | | /// User defined streamfilter factories |
195 | | std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>> filter_factories; |
196 | | bool filter_factories_registered = false; |
197 | | } // namespace |
198 | | |
199 | | std::string |
200 | | QPDF_Stream::Members::expand_filter_name(std::string const& name) const |
201 | 0 | { |
202 | | // The PDF specification provides these filter abbreviations for use in inline images, but |
203 | | // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also |
204 | | // accepts them for stream filters. |
205 | 0 | if (name == "/AHx") { |
206 | 0 | return "/ASCIIHexDecode"; |
207 | 0 | } |
208 | 0 | if (name == "/A85") { |
209 | 0 | return "/ASCII85Decode"; |
210 | 0 | } |
211 | 0 | if (name == "/LZW") { |
212 | 0 | return "/LZWDecode"; |
213 | 0 | } |
214 | 0 | if (name == "/Fl") { |
215 | 0 | return "/FlateDecode"; |
216 | 0 | } |
217 | 0 | if (name == "/RL") { |
218 | 0 | return "/RunLengthDecode"; |
219 | 0 | } |
220 | 0 | if (name == "/CCF") { |
221 | 0 | return "/CCITTFaxDecode"; |
222 | 0 | } |
223 | 0 | if (name == "/DCT") { |
224 | 0 | return "/DCTDecode"; |
225 | 0 | } |
226 | 0 | return name; |
227 | 0 | }; |
228 | | |
229 | | std::function<std::shared_ptr<QPDFStreamFilter>()> |
230 | | QPDF_Stream::Members::filter_factory(std::string const& name) const |
231 | 47.2k | { |
232 | 47.2k | if (filter_factories_registered) [[unlikely]] { |
233 | | // We need to check user provided filters first as we allow users to replace qpdf provided |
234 | | // default filters. This will have a performance impact if the facility to register stream |
235 | | // filters is actually used. We can optimize this away if necessary. |
236 | 0 | auto ff = filter_factories.find(expand_filter_name(name)); |
237 | 0 | if (ff != filter_factories.end()) { |
238 | 0 | return ff->second; |
239 | 0 | } |
240 | 0 | } |
241 | 47.2k | if (name == "/FlateDecode") { |
242 | 13.7k | return SF_FlateLzwDecode::flate_factory; |
243 | 13.7k | } |
244 | 33.4k | if (name == "/Crypt") { |
245 | 2.00k | return []() { return std::make_shared<SF_Crypt>(); }; |
246 | 2.00k | } |
247 | 31.4k | if (name == "/LZWDecode") { |
248 | 2.20k | return SF_FlateLzwDecode::lzw_factory; |
249 | 2.20k | } |
250 | 29.2k | if (name == "/RunLengthDecode") { |
251 | 73 | return SF_RunLengthDecode::factory; |
252 | 73 | } |
253 | 29.2k | if (name == "/DCTDecode") { |
254 | 7.65k | return SF_DCTDecode::factory; |
255 | 7.65k | } |
256 | 21.5k | if (name == "/ASCII85Decode") { |
257 | 2.62k | return SF_ASCII85Decode::factory; |
258 | 2.62k | } |
259 | 18.9k | if (name == "/ASCIIHexDecode") { |
260 | 843 | return SF_ASCIIHexDecode::factory; |
261 | 843 | } |
262 | | // The PDF specification provides these filter abbreviations for use in inline images, but |
263 | | // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader |
264 | | // also accepts them for stream filters. |
265 | | |
266 | 18.0k | if (name == "/Fl") { |
267 | 1.70k | return SF_FlateLzwDecode::flate_factory; |
268 | 1.70k | } |
269 | 16.3k | if (name == "/AHx") { |
270 | 3.41k | return SF_ASCIIHexDecode::factory; |
271 | 3.41k | } |
272 | 12.9k | if (name == "/A85") { |
273 | 703 | return SF_ASCII85Decode::factory; |
274 | 703 | } |
275 | 12.2k | if (name == "/LZW") { |
276 | 1.86k | return SF_FlateLzwDecode::lzw_factory; |
277 | 1.86k | } |
278 | 10.3k | if (name == "/RL") { |
279 | 6.79k | return SF_RunLengthDecode::factory; |
280 | 6.79k | } |
281 | 3.60k | if (name == "/DCT") { |
282 | 1.58k | return SF_DCTDecode::factory; |
283 | 1.58k | } |
284 | 2.02k | return nullptr; |
285 | 3.60k | } |
286 | | |
287 | | Stream::Stream( |
288 | | QPDF& qpdf, QPDFObjGen og, QPDFObjectHandle stream_dict, qpdf_offset_t offset, size_t length) : |
289 | 46.0k | BaseHandle(QPDFObject::create<QPDF_Stream>(&qpdf, og, std::move(stream_dict), length)) |
290 | 46.0k | { |
291 | 46.0k | auto descr = std::make_shared<QPDFObject::Description>( |
292 | 46.0k | qpdf.getFilename() + ", stream object " + og.unparse(' ')); |
293 | 46.0k | obj->setDescription(&qpdf, descr, offset); |
294 | 46.0k | setDictDescription(); |
295 | 46.0k | } |
296 | | |
297 | | Stream |
298 | | Stream::copy() |
299 | 0 | { |
300 | 0 | Stream result = qpdf()->newStream(); |
301 | 0 | result.stream()->stream_dict = getDict().copy(); |
302 | 0 | copy_data_to(result); |
303 | 0 | return result; |
304 | 0 | } |
305 | | |
306 | | void |
307 | | Stream::copy_data_to(Stream& dest) |
308 | 0 | { |
309 | 0 | qpdf_expect(dest); |
310 | 0 | auto s = stream(); |
311 | 0 | auto& streams = qpdf()->doc().objects().streams(); |
312 | 0 | auto& d_streams = dest.qpdf()->doc().objects().streams(); |
313 | |
|
314 | 0 | auto dict = dest.getDict(); |
315 | | |
316 | | // Copy information from the foreign stream so we can pipe its data later without keeping the |
317 | | // original QPDF object around. |
318 | 0 | if (streams.immediate_copy_from() && !s->stream_data) { |
319 | | // Pull the stream data into a buffer before attempting the copy operation. Do it on the |
320 | | // source stream so that if the source stream is copied multiple times, we don't have to |
321 | | // keep duplicating the memory. |
322 | 0 | replaceStreamData( |
323 | 0 | getRawStreamData(), s->stream_dict["/Filter"], s->stream_dict["/DecodeParms"]); |
324 | 0 | } |
325 | 0 | if (s->stream_data) { |
326 | 0 | dest.replaceStreamData(s->stream_data, dict["/Filter"], dict["/DecodeParms"]); |
327 | 0 | } else { |
328 | 0 | d_streams.copier()->register_copy(dest, *this, s->stream_provider.get()); |
329 | 0 | dest.replaceStreamData(d_streams.copier(), dict["/Filter"], dict["/DecodeParms"]); |
330 | 0 | } |
331 | 0 | } |
332 | | |
333 | | void |
334 | | Stream::registerStreamFilter( |
335 | | std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory) |
336 | 0 | { |
337 | 0 | filter_factories[filter_name] = factory; |
338 | 0 | filter_factories_registered = true; |
339 | 0 | } |
340 | | |
341 | | JSON |
342 | | Stream::getStreamJSON( |
343 | | int json_version, |
344 | | qpdf_json_stream_data_e json_data, |
345 | | qpdf_stream_decode_level_e decode_level, |
346 | | Pipeline* p, |
347 | | std::string const& data_filename) |
348 | 0 | { |
349 | 0 | Pl_Buffer pb{"streamjson"}; |
350 | 0 | JSON::Writer jw{&pb, 0}; |
351 | 0 | decode_level = |
352 | 0 | writeStreamJSON(json_version, jw, json_data, decode_level, p, data_filename, true); |
353 | 0 | pb.finish(); |
354 | 0 | auto result = JSON::parse(pb.getString()); |
355 | 0 | if (json_data == qpdf_sj_inline) { |
356 | 0 | result.addDictionaryMember("data", JSON::makeBlob(StreamBlobProvider(*this, decode_level))); |
357 | 0 | } |
358 | 0 | return result; |
359 | 0 | } |
360 | | |
361 | | qpdf_stream_decode_level_e |
362 | | Stream::writeStreamJSON( |
363 | | int json_version, |
364 | | JSON::Writer& jw, |
365 | | qpdf_json_stream_data_e json_data, |
366 | | qpdf_stream_decode_level_e decode_level, |
367 | | Pipeline* p, |
368 | | std::string const& data_filename, |
369 | | bool no_data_key) |
370 | 0 | { |
371 | 0 | auto s = stream(); |
372 | 0 | switch (json_data) { |
373 | 0 | case qpdf_sj_none: |
374 | 0 | case qpdf_sj_inline: |
375 | 0 | if (p != nullptr) { |
376 | 0 | throw std::logic_error( |
377 | 0 | "QPDF_Stream::writeStreamJSON: pipeline should only be supplied " |
378 | 0 | "when json_data is file"); |
379 | 0 | } |
380 | 0 | break; |
381 | 0 | case qpdf_sj_file: |
382 | 0 | if (p == nullptr) { |
383 | 0 | throw std::logic_error( |
384 | 0 | "QPDF_Stream::writeStreamJSON: pipeline must be supplied when json_data is file"); |
385 | 0 | } |
386 | 0 | if (data_filename.empty()) { |
387 | 0 | throw std::logic_error( |
388 | 0 | "QPDF_Stream::writeStreamJSON: data_filename must be supplied " |
389 | 0 | "when json_data is file"); |
390 | 0 | } |
391 | 0 | break; |
392 | 0 | } |
393 | | |
394 | 0 | jw.writeStart('{'); |
395 | |
|
396 | 0 | if (json_data == qpdf_sj_none) { |
397 | 0 | jw.writeNext(); |
398 | 0 | jw << R"("dict": )"; |
399 | 0 | s->stream_dict.writeJSON(json_version, jw); |
400 | 0 | jw.writeEnd('}'); |
401 | 0 | return decode_level; |
402 | 0 | } |
403 | | |
404 | 0 | Pl_Discard discard; |
405 | 0 | Pl_Buffer buf_pl{"stream data"}; |
406 | 0 | Pipeline* data_pipeline = &buf_pl; |
407 | 0 | if (no_data_key && json_data == qpdf_sj_inline) { |
408 | 0 | data_pipeline = &discard; |
409 | 0 | } |
410 | | // pipeStreamData produced valid data. |
411 | 0 | bool buf_pl_ready = false; |
412 | 0 | bool filtered = false; |
413 | 0 | bool filter = (decode_level != qpdf_dl_none); |
414 | 0 | for (int attempt = 1; attempt <= 2; ++attempt) { |
415 | 0 | bool succeeded = |
416 | 0 | pipeStreamData(data_pipeline, &filtered, 0, decode_level, false, (attempt == 1)); |
417 | 0 | if (!succeeded || (filter && !filtered)) { |
418 | | // Try again |
419 | 0 | filter = false; |
420 | 0 | decode_level = qpdf_dl_none; |
421 | 0 | buf_pl.getString(); // reset buf_pl |
422 | 0 | } else { |
423 | 0 | buf_pl_ready = true; |
424 | 0 | break; |
425 | 0 | } |
426 | 0 | } |
427 | 0 | if (!buf_pl_ready) { |
428 | 0 | throw std::logic_error("QPDF_Stream: failed to get stream data"); |
429 | 0 | } |
430 | | // We can use unsafeShallowCopy because we are only touching top-level keys. |
431 | 0 | auto dict = s->stream_dict.unsafeShallowCopy(); |
432 | 0 | dict.removeKey("/Length"); |
433 | 0 | if (filter && filtered) { |
434 | 0 | dict.removeKey("/Filter"); |
435 | 0 | dict.removeKey("/DecodeParms"); |
436 | 0 | } |
437 | 0 | if (json_data == qpdf_sj_file) { |
438 | 0 | jw.writeNext() << R"("datafile": ")" << JSON::Writer::encode_string(data_filename) << "\""; |
439 | 0 | p->writeString(buf_pl.getString()); |
440 | 0 | } else if (json_data == qpdf_sj_inline) { |
441 | 0 | if (!no_data_key) { |
442 | 0 | jw.writeNext() << R"("data": ")"; |
443 | 0 | jw.writeBase64(buf_pl.getString()) << "\""; |
444 | 0 | } |
445 | 0 | } else { |
446 | 0 | throw std::logic_error("QPDF_Stream::writeStreamJSON : unexpected value of json_data"); |
447 | 0 | } |
448 | | |
449 | 0 | jw.writeNext() << R"("dict": )"; |
450 | 0 | dict.writeJSON(json_version, jw); |
451 | 0 | jw.writeEnd('}'); |
452 | |
|
453 | 0 | return decode_level; |
454 | 0 | } |
455 | | |
456 | | void |
457 | | qpdf::Stream::setDictDescription() |
458 | 46.0k | { |
459 | 46.0k | auto s = stream(); |
460 | 46.0k | if (!s->stream_dict.hasObjectDescription()) { |
461 | 0 | s->stream_dict.setObjectDescription( |
462 | 0 | obj->getQPDF(), obj->getDescription() + " -> stream dictionary"); |
463 | 0 | } |
464 | 46.0k | } |
465 | | |
466 | | std::string |
467 | | Stream::getStreamData(qpdf_stream_decode_level_e decode_level) |
468 | 7.54k | { |
469 | 7.54k | std::string result; |
470 | 7.54k | pl::String buf(result); |
471 | 7.54k | bool filtered; |
472 | 7.54k | pipeStreamData(&buf, &filtered, 0, decode_level, false, false); |
473 | 7.54k | if (!filtered) { |
474 | 1.49k | throw QPDFExc( |
475 | 1.49k | qpdf_e_unsupported, |
476 | 1.49k | qpdf()->getFilename(), |
477 | 1.49k | "", |
478 | 1.49k | offset(), |
479 | 1.49k | "getStreamData called on unfilterable stream"); |
480 | 1.49k | } |
481 | 6.04k | return result; |
482 | 7.54k | } |
483 | | |
484 | | std::string |
485 | | Stream::getRawStreamData() |
486 | 0 | { |
487 | 0 | std::string result; |
488 | 0 | pl::String buf(result); |
489 | 0 | if (!pipeStreamData(&buf, nullptr, 0, qpdf_dl_none, false, false)) { |
490 | 0 | throw QPDFExc( |
491 | 0 | qpdf_e_unsupported, |
492 | 0 | qpdf()->getFilename(), |
493 | 0 | "", |
494 | 0 | offset(), |
495 | 0 | "error getting raw stream data"); |
496 | 0 | } |
497 | 0 | return result; |
498 | 0 | } |
499 | | |
500 | | bool |
501 | | Stream::isRootMetadata() const |
502 | 157k | { |
503 | 157k | if (!stream()->stream_dict.isDictionaryOfType("/Metadata", "/XML")) { |
504 | 156k | return false; |
505 | 156k | } |
506 | 933 | return qpdf()->getRoot()["/Metadata"].isSameObjectAs(obj); |
507 | 157k | } |
508 | | |
509 | | bool |
510 | | Stream::filterable( |
511 | | qpdf_stream_decode_level_e decode_level, |
512 | | std::vector<std::shared_ptr<QPDFStreamFilter>>& filters) |
513 | 60.4k | { |
514 | 60.4k | auto s = stream(); |
515 | | // Check filters |
516 | | |
517 | 60.4k | auto const& filter_obj = s->stream_dict["/Filter"]; |
518 | | |
519 | 60.4k | if (filter_obj.null()) { |
520 | | // No filters |
521 | 30.8k | return true; |
522 | 30.8k | } |
523 | 29.6k | if (filter_obj.isName()) { |
524 | | // One filter |
525 | 15.7k | auto ff = s->filter_factory(filter_obj.getName()); |
526 | 15.7k | if (!ff) { |
527 | 1.05k | return false; |
528 | 1.05k | } |
529 | 14.7k | filters.emplace_back(ff()); |
530 | 14.7k | } else if (Array array = filter_obj) { |
531 | | // Potentially multiple filters |
532 | 31.5k | for (Name item: array) { |
533 | 31.5k | if (!item) { |
534 | 115 | warn("stream filter type is not name or array"); |
535 | 115 | return false; |
536 | 115 | } |
537 | 31.4k | auto ff = s->filter_factory(item); |
538 | 31.4k | if (!ff) { |
539 | 965 | filters.clear(); |
540 | 965 | return false; |
541 | 965 | } |
542 | 30.5k | filters.emplace_back(ff()); |
543 | 30.5k | } |
544 | 13.7k | } else { |
545 | 35 | warn("stream filter type is not name or array"); |
546 | 35 | return false; |
547 | 35 | } |
548 | | |
549 | | // filters now contains a list of filters to be applied in order. See which ones we can support. |
550 | | // See if we can support any decode parameters that are specified. |
551 | | |
552 | 27.4k | auto decode_obj = s->stream_dict.getKey("/DecodeParms"); |
553 | | |
554 | 27.4k | auto can_filter = // linebreak |
555 | 40.2k | [](auto d_level, auto& filter, auto& d_obj) -> bool { |
556 | 40.2k | if (!filter.setDecodeParms(d_obj) || |
557 | 39.8k | (d_level < qpdf_dl_all && filter.isLossyCompression()) || |
558 | 39.8k | (d_level < qpdf_dl_specialized && filter.isSpecializedCompression())) { |
559 | 398 | return false; |
560 | 398 | } |
561 | 39.8k | return true; |
562 | 40.2k | }; |
563 | | |
564 | 27.4k | auto decode_array = decode_obj.as_array(strict); |
565 | 27.4k | if (!decode_array || decode_array.size() == 0) { |
566 | 26.6k | if (decode_array) { |
567 | 10 | decode_obj = QPDFObjectHandle::newNull(); |
568 | 10 | } |
569 | | |
570 | 39.4k | for (auto& filter: filters) { |
571 | 39.4k | if (!can_filter(decode_level, *filter, decode_obj)) { |
572 | 330 | return false; |
573 | 330 | } |
574 | 39.4k | } |
575 | 26.6k | } else { |
576 | | // Ignore /DecodeParms entirely if /Filters is empty. At least one case of a file whose |
577 | | // /DecodeParms was [ << >> ] when /Filters was empty has been seen in the wild. |
578 | 784 | if (!filters.empty() && QIntC::to_size(decode_array.size()) != filters.size()) { |
579 | 78 | warn("stream /DecodeParms length is inconsistent with filters"); |
580 | 78 | return false; |
581 | 78 | } |
582 | | |
583 | 706 | int i = -1; |
584 | 706 | for (auto& filter: filters) { |
585 | 705 | auto d_obj = decode_array.get(++i); |
586 | 705 | if (!can_filter(decode_level, *filter, d_obj)) { |
587 | 68 | return false; |
588 | 68 | } |
589 | 705 | } |
590 | 706 | } |
591 | | |
592 | 26.9k | return true; |
593 | 27.4k | } |
594 | | |
595 | | bool |
596 | | Stream::pipeStreamData( |
597 | | Pipeline* pipeline, |
598 | | bool* filterp, |
599 | | int encode_flags, |
600 | | qpdf_stream_decode_level_e decode_level, |
601 | | bool suppress_warnings, |
602 | | bool will_retry) |
603 | 87.5k | { |
604 | 87.5k | auto s = stream(); |
605 | 87.5k | std::vector<std::shared_ptr<QPDFStreamFilter>> filters; |
606 | 87.5k | bool ignored; |
607 | 87.5k | if (!filterp) { |
608 | 0 | filterp = &ignored; |
609 | 0 | } |
610 | 87.5k | bool& filter = *filterp; |
611 | | |
612 | 87.5k | const bool empty_stream = !s->stream_provider && !s->stream_data && s->length == 0; |
613 | 87.5k | const bool empty_stream_data = s->stream_data && s->stream_data->getSize() == 0; |
614 | 87.5k | const bool empty = empty_stream || empty_stream_data; |
615 | | |
616 | 87.5k | if (empty_stream || empty_stream_data) { |
617 | 3.03k | filter = true; |
618 | 3.03k | } |
619 | | |
620 | 87.5k | filter = empty || encode_flags || decode_level != qpdf_dl_none; |
621 | 87.5k | if (filter) { |
622 | 60.4k | filter = filterable(decode_level, filters); |
623 | 60.4k | } |
624 | | |
625 | 87.5k | if (!pipeline) { |
626 | 0 | QTC::TC("qpdf", "QPDF_Stream pipeStreamData with null pipeline"); |
627 | | // Return value is whether we can filter in this case. |
628 | 0 | return filter; |
629 | 0 | } |
630 | | |
631 | | // Construct the pipeline in reverse order. Force pipelines we create to be deleted when this |
632 | | // function finishes. Pipelines created by QPDFStreamFilter objects will be deleted by those |
633 | | // objects. |
634 | 87.5k | std::vector<std::unique_ptr<Pipeline>> to_delete; |
635 | | |
636 | 87.5k | ContentNormalizer normalizer; |
637 | 87.5k | if (filter) { |
638 | 57.7k | if (encode_flags & qpdf_ef_compress) { |
639 | 45.8k | auto new_pipeline = |
640 | 45.8k | std::make_unique<Pl_Flate>("compress stream", pipeline, Pl_Flate::a_deflate); |
641 | 45.8k | pipeline = new_pipeline.get(); |
642 | 45.8k | to_delete.push_back(std::move(new_pipeline)); |
643 | 45.8k | } |
644 | | |
645 | 57.7k | if (encode_flags & qpdf_ef_normalize) { |
646 | 0 | auto new_pipeline = |
647 | 0 | std::make_unique<Pl_QPDFTokenizer>("normalizer", &normalizer, pipeline); |
648 | 0 | pipeline = new_pipeline.get(); |
649 | 0 | to_delete.push_back(std::move(new_pipeline)); |
650 | 0 | } |
651 | | |
652 | 57.7k | for (auto iter = s->token_filters.rbegin(); iter != s->token_filters.rend(); ++iter) { |
653 | 0 | auto new_pipeline = |
654 | 0 | std::make_unique<Pl_QPDFTokenizer>("token filter", (*iter).get(), pipeline); |
655 | 0 | pipeline = new_pipeline.get(); |
656 | 0 | to_delete.push_back(std::move(new_pipeline)); |
657 | 0 | } |
658 | | |
659 | 97.5k | for (auto f_iter = filters.rbegin(); f_iter != filters.rend(); ++f_iter) { |
660 | 39.7k | if (auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline)) { |
661 | 38.6k | pipeline = decode_pipeline; |
662 | 38.6k | } |
663 | 39.7k | auto* flate = dynamic_cast<Pl_Flate*>(pipeline); |
664 | 39.7k | if (flate) { |
665 | 15.8k | flate->setWarnCallback([this](char const* msg, int code) { warn(msg); }); |
666 | 15.8k | } |
667 | 39.7k | } |
668 | 57.7k | } |
669 | | |
670 | 87.5k | if (s->stream_data.get()) { |
671 | 0 | QTC::TC("qpdf", "QPDF_Stream pipe replaced stream data"); |
672 | 0 | pipeline->write(s->stream_data->getBuffer(), s->stream_data->getSize()); |
673 | 0 | pipeline->finish(); |
674 | 87.5k | } else if (s->stream_provider.get()) { |
675 | 0 | Pl_Count count("stream provider count", pipeline); |
676 | 0 | if (s->stream_provider->supportsRetry()) { |
677 | 0 | if (!s->stream_provider->provideStreamData( |
678 | 0 | obj->getObjGen(), &count, suppress_warnings, will_retry)) { |
679 | 0 | filter = false; |
680 | 0 | return false; |
681 | 0 | } |
682 | 0 | } else { |
683 | 0 | s->stream_provider->provideStreamData(obj->getObjGen(), &count); |
684 | 0 | } |
685 | 0 | qpdf_offset_t actual_length = count.getCount(); |
686 | 0 | if (s->stream_dict.hasKey("/Length")) { |
687 | 0 | auto desired_length = s->stream_dict.getKey("/Length").getIntValue(); |
688 | 0 | if (actual_length != desired_length) { |
689 | 0 | QTC::TC("qpdf", "QPDF_Stream provider length mismatch"); |
690 | | // This would be caused by programmer error on the part of a library user, not by |
691 | | // invalid input data. |
692 | 0 | throw std::runtime_error( |
693 | 0 | "stream data provider for " + obj->getObjGen().unparse(' ') + " provided " + |
694 | 0 | std::to_string(actual_length) + " bytes instead of expected " + |
695 | 0 | std::to_string(desired_length) + " bytes"); |
696 | 0 | } |
697 | 0 | } else { |
698 | 0 | QTC::TC("qpdf", "QPDF_Stream provider length not provided"); |
699 | 0 | s->stream_dict.replaceKey("/Length", QPDFObjectHandle::newInteger(actual_length)); |
700 | 0 | } |
701 | 87.5k | } else { |
702 | 87.5k | if (offset() == 0) { |
703 | 0 | throw std::logic_error("pipeStreamData called for stream with no data"); |
704 | 0 | } |
705 | 87.5k | if (!Streams::pipeStreamData( |
706 | 87.5k | qpdf(), |
707 | 87.5k | id_gen(), |
708 | 87.5k | offset(), |
709 | 87.5k | s->length, |
710 | 87.5k | s->stream_dict, |
711 | 87.5k | isRootMetadata(), |
712 | 87.5k | pipeline, |
713 | 87.5k | suppress_warnings, |
714 | 87.5k | will_retry)) { |
715 | 9.72k | filter = false; |
716 | 9.72k | return false; |
717 | 9.72k | } |
718 | 87.5k | } |
719 | | |
720 | 77.8k | if (filter && !suppress_warnings && normalizer.anyBadTokens()) { |
721 | 0 | warn("content normalization encountered bad tokens"); |
722 | 0 | if (normalizer.lastTokenWasBad()) { |
723 | 0 | QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize"); |
724 | 0 | warn( |
725 | 0 | "normalized content ended with a bad token; you may be able to resolve this by " |
726 | 0 | "coalescing content streams in combination with normalizing content. From the " |
727 | 0 | "command line, specify --coalesce-contents"); |
728 | 0 | } |
729 | 0 | warn( |
730 | 0 | "Resulting stream data may be corrupted but is may still useful for manual " |
731 | 0 | "inspection. For more information on this warning, search for content normalization " |
732 | 0 | "in the manual."); |
733 | 0 | } |
734 | | |
735 | 77.8k | return true; |
736 | 87.5k | } |
737 | | |
738 | | void |
739 | | Stream::replaceStreamData( |
740 | | std::string&& data, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms) |
741 | 0 | { |
742 | 0 | auto s = stream(); |
743 | 0 | s->stream_data = std::make_shared<Buffer>(std::move(data)); |
744 | 0 | s->stream_provider = nullptr; |
745 | 0 | replaceFilterData(filter, decode_parms, s->stream_data->getSize()); |
746 | 0 | } |
747 | | |
748 | | void |
749 | | Stream::replaceStreamData( |
750 | | std::shared_ptr<Buffer> data, |
751 | | QPDFObjectHandle const& filter, |
752 | | QPDFObjectHandle const& decode_parms) |
753 | 0 | { |
754 | 0 | auto s = stream(); |
755 | 0 | s->stream_data = data; |
756 | 0 | s->stream_provider = nullptr; |
757 | 0 | replaceFilterData(filter, decode_parms, data->size()); |
758 | 0 | } |
759 | | |
760 | | void |
761 | | Stream::replaceStreamData( |
762 | | std::shared_ptr<QPDFObjectHandle::StreamDataProvider> provider, |
763 | | QPDFObjectHandle const& filter, |
764 | | QPDFObjectHandle const& decode_parms) |
765 | 0 | { |
766 | 0 | auto s = stream(); |
767 | 0 | s->stream_provider = provider; |
768 | 0 | s->stream_data = nullptr; |
769 | 0 | replaceFilterData(filter, decode_parms, 0); |
770 | 0 | } |
771 | | |
772 | | void |
773 | | Stream::replaceFilterData( |
774 | | QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms, size_t length) |
775 | 0 | { |
776 | 0 | auto s = stream(); |
777 | 0 | if (filter) { |
778 | 0 | s->stream_dict.replaceKey("/Filter", filter); |
779 | 0 | } |
780 | 0 | if (decode_parms) { |
781 | 0 | s->stream_dict.replaceKey("/DecodeParms", decode_parms); |
782 | 0 | } |
783 | 0 | if (length == 0) { |
784 | 0 | QTC::TC("qpdf", "QPDF_Stream unknown stream length"); |
785 | 0 | s->stream_dict.removeKey("/Length"); |
786 | 0 | } else { |
787 | 0 | s->stream_dict.replaceKey( |
788 | 0 | "/Length", QPDFObjectHandle::newInteger(QIntC::to_longlong(length))); |
789 | 0 | } |
790 | 0 | } |
791 | | |
792 | | void |
793 | | Stream::warn(std::string const& message) |
794 | 3.19k | { |
795 | 3.19k | qpdf()->warn(qpdf_e_damaged_pdf, "", offset(), message); |
796 | 3.19k | } |
797 | | |
798 | | QPDFObjectHandle |
799 | | QPDFObjectHandle::getDict() const |
800 | 268k | { |
801 | 268k | return as_stream(error).getDict(); |
802 | 268k | } |
803 | | |
804 | | void |
805 | | QPDFObjectHandle::setFilterOnWrite(bool val) |
806 | 10.0k | { |
807 | 10.0k | as_stream(error).setFilterOnWrite(val); |
808 | 10.0k | } |
809 | | |
810 | | bool |
811 | | QPDFObjectHandle::getFilterOnWrite() |
812 | 70.0k | { |
813 | 70.0k | return as_stream(error).getFilterOnWrite(); |
814 | 70.0k | } |
815 | | |
816 | | bool |
817 | | QPDFObjectHandle::isDataModified() |
818 | 75.0k | { |
819 | 75.0k | return as_stream(error).isDataModified(); |
820 | 75.0k | } |
821 | | |
822 | | void |
823 | | QPDFObjectHandle::replaceDict(QPDFObjectHandle const& new_dict) |
824 | 0 | { |
825 | 0 | as_stream(error).replaceDict(new_dict); |
826 | 0 | } |
827 | | |
828 | | bool |
829 | | QPDFObjectHandle::isRootMetadata() const |
830 | 70.0k | { |
831 | 70.0k | return as_stream(error).isRootMetadata(); |
832 | 70.0k | } |
833 | | |
834 | | std::shared_ptr<Buffer> |
835 | | QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level) |
836 | 5.98k | { |
837 | 5.98k | return std::make_shared<Buffer>(as_stream(error).getStreamData(level)); |
838 | 5.98k | } |
839 | | |
840 | | std::shared_ptr<Buffer> |
841 | | QPDFObjectHandle::getRawStreamData() |
842 | 0 | { |
843 | 0 | return std::make_shared<Buffer>(as_stream(error).getRawStreamData()); |
844 | 0 | } |
845 | | |
846 | | bool |
847 | | QPDFObjectHandle::pipeStreamData( |
848 | | Pipeline* p, |
849 | | bool* filtering_attempted, |
850 | | int encode_flags, |
851 | | qpdf_stream_decode_level_e decode_level, |
852 | | bool suppress_warnings, |
853 | | bool will_retry) |
854 | 0 | { |
855 | 0 | return as_stream(error).pipeStreamData( |
856 | 0 | p, filtering_attempted, encode_flags, decode_level, suppress_warnings, will_retry); |
857 | 0 | } |
858 | | |
859 | | bool |
860 | | QPDFObjectHandle::pipeStreamData( |
861 | | Pipeline* p, |
862 | | int encode_flags, |
863 | | qpdf_stream_decode_level_e decode_level, |
864 | | bool suppress_warnings, |
865 | | bool will_retry) |
866 | 80.0k | { |
867 | 80.0k | bool filtering_attempted; |
868 | 80.0k | as_stream(error).pipeStreamData( |
869 | 80.0k | p, &filtering_attempted, encode_flags, decode_level, suppress_warnings, will_retry); |
870 | 80.0k | return filtering_attempted; |
871 | 80.0k | } |
872 | | |
873 | | bool |
874 | | QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter, bool normalize, bool compress) |
875 | 0 | { |
876 | 0 | int encode_flags = 0; |
877 | 0 | qpdf_stream_decode_level_e decode_level = qpdf_dl_none; |
878 | 0 | if (filter) { |
879 | 0 | decode_level = qpdf_dl_generalized; |
880 | 0 | if (normalize) { |
881 | 0 | encode_flags |= qpdf_ef_normalize; |
882 | 0 | } |
883 | 0 | if (compress) { |
884 | 0 | encode_flags |= qpdf_ef_compress; |
885 | 0 | } |
886 | 0 | } |
887 | 0 | return pipeStreamData(p, encode_flags, decode_level, false); |
888 | 0 | } |
889 | | |
890 | | void |
891 | | QPDFObjectHandle::replaceStreamData( |
892 | | std::shared_ptr<Buffer> data, |
893 | | QPDFObjectHandle const& filter, |
894 | | QPDFObjectHandle const& decode_parms) |
895 | 0 | { |
896 | 0 | as_stream(error).replaceStreamData(data, filter, decode_parms); |
897 | 0 | } |
898 | | |
899 | | void |
900 | | QPDFObjectHandle::replaceStreamData( |
901 | | std::string const& data, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms) |
902 | 0 | { |
903 | 0 | std::string s(data); |
904 | 0 | as_stream(error).replaceStreamData(std::move(s), filter, decode_parms); |
905 | 0 | } |
906 | | |
907 | | void |
908 | | QPDFObjectHandle::replaceStreamData( |
909 | | std::shared_ptr<StreamDataProvider> provider, |
910 | | QPDFObjectHandle const& filter, |
911 | | QPDFObjectHandle const& decode_parms) |
912 | 0 | { |
913 | 0 | as_stream(error).replaceStreamData(provider, filter, decode_parms); |
914 | 0 | } |
915 | | |
916 | | namespace |
917 | | { |
918 | | class FunctionProvider: public QPDFObjectHandle::StreamDataProvider |
919 | | { |
920 | | public: |
921 | | FunctionProvider(std::function<void(Pipeline*)> provider) : |
922 | 0 | StreamDataProvider(false), |
923 | 0 | p1(provider), |
924 | 0 | p2(nullptr) |
925 | 0 | { |
926 | 0 | } |
927 | | FunctionProvider(std::function<bool(Pipeline*, bool, bool)> provider) : |
928 | 0 | StreamDataProvider(true), |
929 | 0 | p1(nullptr), |
930 | 0 | p2(provider) |
931 | 0 | { |
932 | 0 | } |
933 | | |
934 | | void |
935 | | provideStreamData(QPDFObjGen const&, Pipeline* pipeline) override |
936 | 0 | { |
937 | 0 | p1(pipeline); |
938 | 0 | } |
939 | | |
940 | | bool |
941 | | provideStreamData( |
942 | | QPDFObjGen const&, Pipeline* pipeline, bool suppress_warnings, bool will_retry) override |
943 | 0 | { |
944 | 0 | return p2(pipeline, suppress_warnings, will_retry); |
945 | 0 | } |
946 | | |
947 | | private: |
948 | | std::function<void(Pipeline*)> p1; |
949 | | std::function<bool(Pipeline*, bool, bool)> p2; |
950 | | }; |
951 | | } // namespace |
952 | | |
953 | | void |
954 | | QPDFObjectHandle::replaceStreamData( |
955 | | std::function<void(Pipeline*)> provider, |
956 | | QPDFObjectHandle const& filter, |
957 | | QPDFObjectHandle const& decode_parms) |
958 | 0 | { |
959 | 0 | auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider)); |
960 | 0 | as_stream(error).replaceStreamData(sdp, filter, decode_parms); |
961 | 0 | } |
962 | | |
963 | | void |
964 | | QPDFObjectHandle::replaceStreamData( |
965 | | std::function<bool(Pipeline*, bool, bool)> provider, |
966 | | QPDFObjectHandle const& filter, |
967 | | QPDFObjectHandle const& decode_parms) |
968 | 0 | { |
969 | 0 | auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider)); |
970 | 0 | as_stream(error).replaceStreamData(sdp, filter, decode_parms); |
971 | 0 | } |
972 | | |
973 | | JSON |
974 | | QPDFObjectHandle::getStreamJSON( |
975 | | int json_version, |
976 | | qpdf_json_stream_data_e json_data, |
977 | | qpdf_stream_decode_level_e decode_level, |
978 | | Pipeline* p, |
979 | | std::string const& data_filename) |
980 | 0 | { |
981 | 0 | return as_stream(error).getStreamJSON(json_version, json_data, decode_level, p, data_filename); |
982 | 0 | } |
983 | | |
984 | | QPDFObjectHandle |
985 | | QPDFObjectHandle::copyStream() |
986 | 0 | { |
987 | 0 | return as_stream(error).copy(); |
988 | 0 | } |