Coverage Report

Created: 2026-01-25 06:29

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDF_Stream.cc
Line
Count
Source
1
#include <qpdf/QPDFObjectHandle_private.hh>
2
3
#include <qpdf/ContentNormalizer.hh>
4
#include <qpdf/JSON_writer.hh>
5
#include <qpdf/Pipeline.hh>
6
#include <qpdf/Pipeline_private.hh>
7
#include <qpdf/Pl_Buffer.hh>
8
#include <qpdf/Pl_Count.hh>
9
#include <qpdf/Pl_Discard.hh>
10
#include <qpdf/Pl_Flate.hh>
11
#include <qpdf/Pl_QPDFTokenizer.hh>
12
#include <qpdf/QIntC.hh>
13
#include <qpdf/QPDFExc.hh>
14
#include <qpdf/QPDF_private.hh>
15
#include <qpdf/QTC.hh>
16
#include <qpdf/QUtil.hh>
17
#include <qpdf/SF_ASCII85Decode.hh>
18
#include <qpdf/SF_ASCIIHexDecode.hh>
19
#include <qpdf/SF_DCTDecode.hh>
20
#include <qpdf/SF_FlateLzwDecode.hh>
21
#include <qpdf/SF_RunLengthDecode.hh>
22
23
#include <stdexcept>
24
25
using namespace std::literals;
26
using namespace qpdf;
27
28
using Streams = QPDF::Doc::Objects::Streams;
29
30
class Streams::Copier final: public QPDFObjectHandle::StreamDataProvider
31
{
32
    class Data
33
    {
34
        friend class Streams;
35
36
      public:
37
        Data(Stream& source, Dictionary const& dest_dict) :
38
393
            encp(source.qpdf()->m->encp),
39
393
            file(source.qpdf()->m->file),
40
393
            source_og(source.id_gen()),
41
393
            offset(source.offset()),
42
393
            length(source.getLength()),
43
393
            dest_dict(dest_dict),
44
393
            is_root_metadata(source.isRootMetadata())
45
393
        {
46
393
        }
47
48
      private:
49
        std::shared_ptr<EncryptionParameters> encp;
50
        std::shared_ptr<InputSource> file;
51
        QPDFObjGen source_og;
52
        qpdf_offset_t offset;
53
        size_t length;
54
        QPDFObjectHandle dest_dict;
55
        bool is_root_metadata{false};
56
    };
57
58
  public:
59
    Copier() = delete;
60
    Copier(StreamDataProvider const&) = delete;
61
    Copier(StreamDataProvider&&) = delete;
62
    Copier& operator=(StreamDataProvider const&) = delete;
63
    Copier& operator=(StreamDataProvider&&) = delete;
64
304k
    ~Copier() final = default;
65
66
    Copier(Streams& streams) :
67
304k
        QPDFObjectHandle::StreamDataProvider(true),
68
304k
        streams(streams)
69
304k
    {
70
304k
    }
71
72
    bool
73
    provideStreamData(
74
        QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) final
75
0
    {
76
0
        auto data = copied_data.find(og);
77
0
        if (data != copied_data.end()) {
78
0
            auto& fd = data->second;
79
0
            QTC::TC("qpdf", "QPDF pipe foreign encrypted stream", fd.encp->encrypted ? 0 : 1);
80
0
            if (streams.qpdf.pipeStreamData(
81
0
                    fd.encp,
82
0
                    fd.file,
83
0
                    streams.qpdf,
84
0
                    fd.source_og,
85
0
                    fd.offset,
86
0
                    fd.length,
87
0
                    fd.dest_dict,
88
0
                    fd.is_root_metadata,
89
0
                    pipeline,
90
0
                    suppress_warnings,
91
0
                    will_retry)) {
92
0
                return true; // for CI coverage
93
0
            } else {
94
0
                return false;
95
0
            }
96
0
        }
97
0
        auto stream = copied_streams.find(og);
98
0
        qpdf_invariant(stream == copied_streams.end() || stream->second);
99
0
        if (stream != copied_streams.end() &&
100
0
            stream->second.pipeStreamData(
101
0
                pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry)) {
102
0
            return true; // for CI coverage
103
0
        }
104
0
        return false;
105
0
    }
106
107
    void
108
    register_copy(Stream& dest, Stream& source, bool provider)
109
393
    {
110
393
        qpdf_expect(source);
111
393
        qpdf_expect(dest);
112
393
        if (provider) {
113
0
            copied_streams.insert_or_assign(dest, source);
114
393
        } else {
115
393
            copied_data.insert_or_assign(dest, Data(source, dest.getDict()));
116
393
        }
117
393
    }
118
119
  private:
120
    Streams& streams;
121
    std::map<QPDFObjGen, Stream> copied_streams;
122
    std::map<QPDFObjGen, Data> copied_data;
123
};
124
125
Streams::Streams(Common& common) :
126
304k
    Common(common),
127
304k
    copier_(std::make_shared<Copier>(*this))
128
304k
{
129
304k
}
130
131
namespace
132
{
133
    class SF_Crypt final: public QPDFStreamFilter
134
    {
135
      public:
136
7.46k
        SF_Crypt() = default;
137
        ~SF_Crypt() final = default;
138
139
        bool
140
        setDecodeParms(QPDFObjectHandle decode_parms) final
141
5.24k
        {
142
            // we only validate here - processing happens in decryptStream
143
5.24k
            if (Dictionary dict = decode_parms) {
144
15.4k
                for (auto const& [key, value]: dict) {
145
15.4k
                    if (key == "/Type" &&
146
998
                        (value.null() || Name(value) == "/CryptFilterDecodeParms")) {
147
445
                        continue;
148
445
                    }
149
14.9k
                    if (key == "/Name") {
150
398
                        continue;
151
398
                    }
152
14.5k
                    if (!value.null()) {
153
1.14k
                        return false;
154
1.14k
                    }
155
14.5k
                }
156
2.12k
                return true;
157
3.26k
            }
158
1.97k
            return decode_parms.null();
159
5.24k
        }
160
161
        Pipeline*
162
        getDecodePipeline(Pipeline*) final
163
3.72k
        {
164
            // Not used -- handled by pipeStreamData
165
3.72k
            return nullptr;
166
3.72k
        }
167
    };
168
169
    class StreamBlobProvider
170
    {
171
      public:
172
        StreamBlobProvider(Stream stream, qpdf_stream_decode_level_e decode_level) :
173
0
            stream(stream),
174
0
            decode_level(decode_level)
175
0
        {
176
0
        }
177
        void
178
        operator()(Pipeline* p)
179
0
        {
180
0
            stream.pipeStreamData(p, nullptr, 0, decode_level, false, false);
181
0
        }
182
183
      private:
184
        Stream stream;
185
        qpdf_stream_decode_level_e decode_level;
186
    };
187
188
    /// User defined streamfilter factories
189
    std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>> filter_factories;
190
    bool filter_factories_registered = false;
191
} // namespace
192
193
std::string
194
QPDF_Stream::Members::expand_filter_name(std::string const& name) const
195
0
{
196
    // The PDF specification provides these filter abbreviations for use in inline images, but
197
    // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also
198
    // accepts them for stream filters.
199
0
    if (name == "/AHx") {
200
0
        return "/ASCIIHexDecode";
201
0
    }
202
0
    if (name == "/A85") {
203
0
        return "/ASCII85Decode";
204
0
    }
205
0
    if (name == "/LZW") {
206
0
        return "/LZWDecode";
207
0
    }
208
0
    if (name == "/Fl") {
209
0
        return "/FlateDecode";
210
0
    }
211
0
    if (name == "/RL") {
212
0
        return "/RunLengthDecode";
213
0
    }
214
0
    if (name == "/CCF") {
215
0
        return "/CCITTFaxDecode";
216
0
    }
217
0
    if (name == "/DCT") {
218
0
        return "/DCTDecode";
219
0
    }
220
0
    return name;
221
0
};
222
223
std::function<std::shared_ptr<QPDFStreamFilter>()>
224
QPDF_Stream::Members::filter_factory(std::string const& name) const
225
317k
{
226
317k
    if (filter_factories_registered) [[unlikely]] {
227
        // We need to check user provided filters first as we allow users to replace qpdf provided
228
        // default filters. This will have a performance impact if the facility to register stream
229
        // filters is actually used. We can optimize this away if necessary.
230
0
        auto ff = filter_factories.find(expand_filter_name(name));
231
0
        if (ff != filter_factories.end()) {
232
0
            return ff->second;
233
0
        }
234
0
    }
235
317k
    if (name == "/FlateDecode") {
236
108k
        return SF_FlateLzwDecode::flate_factory;
237
108k
    }
238
209k
    if (name == "/Crypt") {
239
7.46k
        return []() { return std::make_shared<SF_Crypt>(); };
240
7.46k
    }
241
201k
    if (name == "/LZWDecode") {
242
7.47k
        return SF_FlateLzwDecode::lzw_factory;
243
7.47k
    }
244
194k
    if (name == "/RunLengthDecode") {
245
871
        return SF_RunLengthDecode::factory;
246
871
    }
247
193k
    if (name == "/DCTDecode") {
248
59.2k
        return SF_DCTDecode::factory;
249
59.2k
    }
250
134k
    if (name == "/ASCII85Decode") {
251
16.8k
        return SF_ASCII85Decode::factory;
252
16.8k
    }
253
117k
    if (name == "/ASCIIHexDecode") {
254
5.98k
        return SF_ASCIIHexDecode::factory;
255
5.98k
    }
256
    // The PDF specification provides these filter abbreviations for use in inline images, but
257
    // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader
258
    // also accepts them for stream filters.
259
260
111k
    if (name == "/Fl") {
261
23.7k
        return SF_FlateLzwDecode::flate_factory;
262
23.7k
    }
263
87.7k
    if (name == "/AHx") {
264
11.8k
        return SF_ASCIIHexDecode::factory;
265
11.8k
    }
266
75.9k
    if (name == "/A85") {
267
5.94k
        return SF_ASCII85Decode::factory;
268
5.94k
    }
269
69.9k
    if (name == "/LZW") {
270
30.0k
        return SF_FlateLzwDecode::lzw_factory;
271
30.0k
    }
272
39.9k
    if (name == "/RL") {
273
16.7k
        return SF_RunLengthDecode::factory;
274
16.7k
    }
275
23.2k
    if (name == "/DCT") {
276
7.05k
        return SF_DCTDecode::factory;
277
7.05k
    }
278
16.1k
    return nullptr;
279
23.2k
}
280
281
Stream::Stream(
282
    QPDF& qpdf, QPDFObjGen og, QPDFObjectHandle stream_dict, qpdf_offset_t offset, size_t length) :
283
485k
    BaseHandle(QPDFObject::create<QPDF_Stream>(&qpdf, og, std::move(stream_dict), length))
284
485k
{
285
485k
    auto descr = std::make_shared<QPDFObject::Description>(
286
485k
        qpdf.getFilename() + ", stream object " + og.unparse(' '));
287
485k
    obj->setDescription(&qpdf, descr, offset);
288
485k
    setDictDescription();
289
485k
}
290
291
Stream
292
Stream::copy()
293
411
{
294
411
    Stream result = qpdf()->newStream();
295
411
    result.stream()->stream_dict = getDict().copy();
296
411
    copy_data_to(result);
297
411
    return result;
298
411
}
299
300
void
301
Stream::copy_data_to(Stream& dest)
302
407
{
303
407
    qpdf_expect(dest);
304
407
    auto s = stream();
305
407
    auto& d_streams = dest.qpdf()->doc().objects().streams();
306
307
407
    auto dict = dest.getDict();
308
309
    // Copy information from the foreign stream so we can pipe its data later without keeping the
310
    // original QPDF object around.
311
407
    if (qpdf()->doc().config().immediate_copy_from() && !s->stream_data) {
312
        // Pull the stream data into a buffer before attempting the copy operation. Do it on the
313
        // source stream so that if the source stream is copied multiple times, we don't have to
314
        // keep duplicating the memory. Passing uninitialised object handles will preserve the
315
        // existing filters and decode parameters.
316
0
        replaceStreamData(getRawStreamData(), {}, {});
317
0
    }
318
407
    if (s->stream_data) {
319
14
        dest.replaceStreamData(s->stream_data, dict["/Filter"], dict["/DecodeParms"]);
320
393
    } else {
321
393
        d_streams.copier()->register_copy(dest, *this, s->stream_provider.get());
322
393
        dest.replaceStreamData(d_streams.copier(), dict["/Filter"], dict["/DecodeParms"]);
323
393
    }
324
407
}
325
326
void
327
Stream::registerStreamFilter(
328
    std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
329
0
{
330
0
    filter_factories[filter_name] = factory;
331
0
    filter_factories_registered = true;
332
0
}
333
334
JSON
335
Stream::getStreamJSON(
336
    int json_version,
337
    qpdf_json_stream_data_e json_data,
338
    qpdf_stream_decode_level_e decode_level,
339
    Pipeline* p,
340
    std::string const& data_filename)
341
0
{
342
0
    Pl_Buffer pb{"streamjson"};
343
0
    JSON::Writer jw{&pb, 0};
344
0
    decode_level =
345
0
        writeStreamJSON(json_version, jw, json_data, decode_level, p, data_filename, true);
346
0
    pb.finish();
347
0
    auto result = JSON::parse(pb.getString());
348
0
    if (json_data == qpdf_sj_inline) {
349
0
        result.addDictionaryMember("data", JSON::makeBlob(StreamBlobProvider(*this, decode_level)));
350
0
    }
351
0
    return result;
352
0
}
353
354
qpdf_stream_decode_level_e
355
Stream::writeStreamJSON(
356
    int json_version,
357
    JSON::Writer& jw,
358
    qpdf_json_stream_data_e json_data,
359
    qpdf_stream_decode_level_e decode_level,
360
    Pipeline* p,
361
    std::string const& data_filename,
362
    bool no_data_key)
363
0
{
364
0
    auto s = stream();
365
0
    switch (json_data) {
366
0
    case qpdf_sj_none:
367
0
    case qpdf_sj_inline:
368
0
        if (p != nullptr) {
369
0
            throw std::logic_error(
370
0
                "QPDF_Stream::writeStreamJSON: pipeline should only be supplied "
371
0
                "when json_data is file");
372
0
        }
373
0
        break;
374
0
    case qpdf_sj_file:
375
0
        if (p == nullptr) {
376
0
            throw std::logic_error(
377
0
                "QPDF_Stream::writeStreamJSON: pipeline must be supplied when json_data is file");
378
0
        }
379
0
        if (data_filename.empty()) {
380
0
            throw std::logic_error(
381
0
                "QPDF_Stream::writeStreamJSON: data_filename must be supplied "
382
0
                "when json_data is file");
383
0
        }
384
0
        break;
385
0
    }
386
387
0
    jw.writeStart('{');
388
389
0
    if (json_data == qpdf_sj_none) {
390
0
        jw.writeNext();
391
0
        jw << R"("dict": )";
392
0
        s->stream_dict.writeJSON(json_version, jw);
393
0
        jw.writeEnd('}');
394
0
        return decode_level;
395
0
    }
396
397
0
    Pl_Discard discard;
398
0
    Pl_Buffer buf_pl{"stream data"};
399
0
    Pipeline* data_pipeline = &buf_pl;
400
0
    if (no_data_key && json_data == qpdf_sj_inline) {
401
0
        data_pipeline = &discard;
402
0
    }
403
    // pipeStreamData produced valid data.
404
0
    bool buf_pl_ready = false;
405
0
    bool filtered = false;
406
0
    bool filter = (decode_level != qpdf_dl_none);
407
0
    for (int attempt = 1; attempt <= 2; ++attempt) {
408
0
        bool succeeded =
409
0
            pipeStreamData(data_pipeline, &filtered, 0, decode_level, false, (attempt == 1));
410
0
        if (!succeeded || (filter && !filtered)) {
411
            // Try again
412
0
            filter = false;
413
0
            decode_level = qpdf_dl_none;
414
0
            buf_pl.getString(); // reset buf_pl
415
0
        } else {
416
0
            buf_pl_ready = true;
417
0
            break;
418
0
        }
419
0
    }
420
0
    if (!buf_pl_ready) {
421
0
        throw std::logic_error("QPDF_Stream: failed to get stream data");
422
0
    }
423
    // We can use unsafeShallowCopy because we are only touching top-level keys.
424
0
    auto dict = s->stream_dict.unsafeShallowCopy();
425
0
    dict.removeKey("/Length");
426
0
    if (filter && filtered) {
427
0
        dict.removeKey("/Filter");
428
0
        dict.removeKey("/DecodeParms");
429
0
    }
430
0
    if (json_data == qpdf_sj_file) {
431
0
        jw.writeNext() << R"("datafile": ")" << JSON::Writer::encode_string(data_filename) << "\"";
432
0
        p->writeString(buf_pl.getString());
433
0
    } else if (json_data == qpdf_sj_inline) {
434
0
        if (!no_data_key) {
435
0
            jw.writeNext() << R"("data": ")";
436
0
            jw.writeBase64(buf_pl.getString()) << "\"";
437
0
        }
438
0
    } else {
439
0
        throw std::logic_error("QPDF_Stream::writeStreamJSON : unexpected value of json_data");
440
0
    }
441
442
0
    jw.writeNext() << R"("dict": )";
443
0
    dict.writeJSON(json_version, jw);
444
0
    jw.writeEnd('}');
445
446
0
    return decode_level;
447
0
}
448
449
void
450
qpdf::Stream::setDictDescription()
451
500k
{
452
500k
    auto s = stream();
453
500k
    if (!s->stream_dict.hasObjectDescription()) {
454
48.2k
        s->stream_dict.setObjectDescription(
455
48.2k
            obj->getQPDF(), obj->getDescription() + " -> stream dictionary");
456
48.2k
    }
457
500k
}
458
459
std::string
460
Stream::getStreamData(qpdf_stream_decode_level_e decode_level)
461
119k
{
462
119k
    std::string result;
463
119k
    pl::String buf(result);
464
119k
    bool filtered;
465
119k
    pipeStreamData(&buf, &filtered, 0, decode_level, false, false);
466
119k
    if (!filtered) {
467
21.5k
        throw QPDFExc(
468
21.5k
            qpdf_e_unsupported,
469
21.5k
            qpdf()->getFilename(),
470
21.5k
            "",
471
21.5k
            offset(),
472
21.5k
            "getStreamData called on unfilterable stream");
473
21.5k
    }
474
97.8k
    return result;
475
119k
}
476
477
std::string
478
Stream::getRawStreamData()
479
0
{
480
0
    std::string result;
481
0
    pl::String buf(result);
482
0
    if (!pipeStreamData(&buf, nullptr, 0, qpdf_dl_none, false, false)) {
483
0
        throw QPDFExc(
484
0
            qpdf_e_unsupported,
485
0
            qpdf()->getFilename(),
486
0
            "",
487
0
            offset(),
488
0
            "error getting raw stream data");
489
0
    }
490
0
    return result;
491
0
}
492
493
bool
494
Stream::isRootMetadata() const
495
878k
{
496
878k
    if (!stream()->stream_dict.isDictionaryOfType("/Metadata", "/XML")) {
497
875k
        return false;
498
875k
    }
499
3.86k
    return qpdf()->getRoot()["/Metadata"].isSameObjectAs(obj);
500
878k
}
501
502
bool
503
Stream::filterable(
504
    qpdf_stream_decode_level_e decode_level,
505
    std::vector<std::shared_ptr<QPDFStreamFilter>>& filters)
506
402k
{
507
402k
    auto s = stream();
508
    // Check filters
509
510
402k
    auto const& filter_obj = s->stream_dict["/Filter"];
511
512
402k
    if (filter_obj.null()) {
513
        // No filters
514
160k
        return true;
515
160k
    }
516
242k
    if (filter_obj.size() > global::Limits::max_stream_filters()) {
517
1.26k
        global::Limits::error();
518
1.26k
        warn(
519
1.26k
            "limits error(max-stream-filters): too many filters for stream; treating stream as "
520
1.26k
            "not filterable");
521
1.26k
        return false;
522
1.26k
    }
523
240k
    if (filter_obj.isName()) {
524
        // One filter
525
149k
        auto ff = s->filter_factory(filter_obj.getName());
526
149k
        if (!ff) {
527
9.81k
            return false;
528
9.81k
        }
529
140k
        filters.emplace_back(ff());
530
140k
    } else if (Array array = filter_obj) {
531
        // Potentially multiple filters
532
169k
        for (Name item: array) {
533
169k
            if (!item) {
534
1.63k
                warn("stream filter type is not name or array");
535
1.63k
                return false;
536
1.63k
            }
537
167k
            auto ff = s->filter_factory(item);
538
167k
            if (!ff) {
539
6.38k
                filters.clear();
540
6.38k
                return false;
541
6.38k
            }
542
161k
            filters.emplace_back(ff());
543
161k
        }
544
90.2k
    } else {
545
561
        warn("stream filter type is not name or array");
546
561
        return false;
547
561
    }
548
549
    // filters now contains a list of filters to be applied in order. See which ones we can support.
550
    // See if we can support any decode parameters that are specified.
551
552
222k
    auto decode_obj = s->stream_dict.getKey("/DecodeParms");
553
554
222k
    auto can_filter = // linebreak
555
286k
        [](auto d_level, auto& filter, auto& d_obj) -> bool {
556
286k
        if (!filter.setDecodeParms(d_obj) ||
557
282k
            (d_level < qpdf_dl_all && filter.isLossyCompression()) ||
558
282k
            (d_level < qpdf_dl_specialized && filter.isSpecializedCompression())) {
559
4.17k
            return false;
560
4.17k
        }
561
282k
        return true;
562
286k
    };
563
564
222k
    auto decode_array = decode_obj.as_array(strict);
565
222k
    if (!decode_array || decode_array.size() == 0) {
566
218k
        if (decode_array) {
567
120
            decode_obj = QPDFObjectHandle::newNull();
568
120
        }
569
570
282k
        for (auto& filter: filters) {
571
282k
            if (!can_filter(decode_level, *filter, decode_obj)) {
572
3.58k
                return false;
573
3.58k
            }
574
282k
        }
575
218k
    } else {
576
        // Ignore /DecodeParms entirely if /Filters is empty.  At least one case of a file whose
577
        // /DecodeParms was [ << >> ] when /Filters was empty has been seen in the wild.
578
3.85k
        if (!filters.empty() && QIntC::to_size(decode_array.size()) != filters.size()) {
579
398
            warn("stream /DecodeParms length is inconsistent with filters");
580
398
            return false;
581
398
        }
582
583
3.45k
        int i = -1;
584
4.06k
        for (auto& filter: filters) {
585
4.06k
            auto d_obj = decode_array.get(++i);
586
4.06k
            if (!can_filter(decode_level, *filter, d_obj)) {
587
583
                return false;
588
583
            }
589
4.06k
        }
590
3.45k
    }
591
592
217k
    return true;
593
222k
}
594
595
bool
596
Stream::pipeStreamData(
597
    Pipeline* pipeline,
598
    bool* filterp,
599
    int encode_flags,
600
    qpdf_stream_decode_level_e decode_level,
601
    bool suppress_warnings,
602
    bool will_retry)
603
576k
{
604
576k
    auto s = stream();
605
576k
    std::vector<std::shared_ptr<QPDFStreamFilter>> filters;
606
576k
    bool ignored;
607
576k
    if (!filterp) {
608
0
        filterp = &ignored;
609
0
    }
610
576k
    bool& filter = *filterp;
611
612
576k
    const bool empty_stream = !s->stream_provider && !s->stream_data && s->length == 0;
613
576k
    const bool empty_stream_data = s->stream_data && s->stream_data->getSize() == 0;
614
576k
    const bool empty = empty_stream || empty_stream_data;
615
616
576k
    if (empty_stream || empty_stream_data) {
617
21.8k
        filter = true;
618
21.8k
    }
619
620
576k
    filter = empty || encode_flags || decode_level != qpdf_dl_none;
621
576k
    if (filter) {
622
402k
        filter = filterable(decode_level, filters);
623
402k
    }
624
625
576k
    if (!pipeline) {
626
0
        QTC::TC("qpdf", "QPDF_Stream pipeStreamData with null pipeline");
627
        // Return value is whether we can filter in this case.
628
0
        return filter;
629
0
    }
630
631
    // Construct the pipeline in reverse order. Force pipelines we create to be deleted when this
632
    // function finishes. Pipelines created by QPDFStreamFilter objects will be deleted by those
633
    // objects.
634
576k
    std::vector<std::unique_ptr<Pipeline>> to_delete;
635
636
576k
    ContentNormalizer normalizer;
637
576k
    if (filter) {
638
378k
        if (encode_flags & qpdf_ef_compress) {
639
174k
            auto new_pipeline =
640
174k
                std::make_unique<Pl_Flate>("compress stream", pipeline, Pl_Flate::a_deflate);
641
174k
            pipeline = new_pipeline.get();
642
174k
            to_delete.push_back(std::move(new_pipeline));
643
174k
        }
644
645
378k
        if (encode_flags & qpdf_ef_normalize) {
646
5.51k
            auto new_pipeline =
647
5.51k
                std::make_unique<Pl_QPDFTokenizer>("normalizer", &normalizer, pipeline);
648
5.51k
            pipeline = new_pipeline.get();
649
5.51k
            to_delete.push_back(std::move(new_pipeline));
650
5.51k
        }
651
652
378k
        for (auto iter = s->token_filters.rbegin(); iter != s->token_filters.rend(); ++iter) {
653
0
            auto new_pipeline =
654
0
                std::make_unique<Pl_QPDFTokenizer>("token filter", (*iter).get(), pipeline);
655
0
            pipeline = new_pipeline.get();
656
0
            to_delete.push_back(std::move(new_pipeline));
657
0
        }
658
659
658k
        for (auto f_iter = filters.rbegin(); f_iter != filters.rend(); ++f_iter) {
660
280k
            if (auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline)) {
661
276k
                pipeline = decode_pipeline;
662
276k
            }
663
280k
            auto* flate = dynamic_cast<Pl_Flate*>(pipeline);
664
280k
            if (flate) {
665
129k
                flate->setWarnCallback([this](char const* msg, int code) { warn(msg); });
666
129k
            }
667
280k
        }
668
378k
    }
669
670
576k
    if (s->stream_data.get()) {
671
27.3k
        QTC::TC("qpdf", "QPDF_Stream pipe replaced stream data");
672
27.3k
        pipeline->write(s->stream_data->getBuffer(), s->stream_data->getSize());
673
27.3k
        pipeline->finish();
674
549k
    } else if (s->stream_provider.get()) {
675
7.83k
        Pl_Count count("stream provider count", pipeline);
676
7.83k
        if (s->stream_provider->supportsRetry()) {
677
0
            if (!s->stream_provider->provideStreamData(
678
0
                    obj->getObjGen(), &count, suppress_warnings, will_retry)) {
679
0
                filter = false;
680
0
                return false;
681
0
            }
682
7.83k
        } else {
683
7.83k
            s->stream_provider->provideStreamData(obj->getObjGen(), &count);
684
7.83k
        }
685
7.83k
        qpdf_offset_t actual_length = count.getCount();
686
7.83k
        if (s->stream_dict.hasKey("/Length")) {
687
0
            auto desired_length = s->stream_dict.getKey("/Length").getIntValue();
688
0
            if (actual_length != desired_length) {
689
0
                QTC::TC("qpdf", "QPDF_Stream provider length mismatch");
690
                // This would be caused by programmer error on the part of a library user, not by
691
                // invalid input data.
692
0
                throw std::runtime_error(
693
0
                    "stream data provider for " + obj->getObjGen().unparse(' ') + " provided " +
694
0
                    std::to_string(actual_length) + " bytes instead of expected " +
695
0
                    std::to_string(desired_length) + " bytes");
696
0
            }
697
7.83k
        } else {
698
7.83k
            QTC::TC("qpdf", "QPDF_Stream provider length not provided");
699
7.83k
            s->stream_dict.replaceKey("/Length", QPDFObjectHandle::newInteger(actual_length));
700
7.83k
        }
701
541k
    } else {
702
541k
        if (offset() == 0) {
703
0
            throw std::logic_error("pipeStreamData called for stream with no data");
704
0
        }
705
541k
        if (!Streams::pipeStreamData(
706
541k
                qpdf(),
707
541k
                id_gen(),
708
541k
                offset(),
709
541k
                s->length,
710
541k
                s->stream_dict,
711
541k
                isRootMetadata(),
712
541k
                pipeline,
713
541k
                suppress_warnings,
714
541k
                will_retry)) {
715
91.0k
            filter = false;
716
91.0k
            return false;
717
91.0k
        }
718
541k
    }
719
720
485k
    if (filter && !suppress_warnings && normalizer.anyBadTokens()) {
721
1.36k
        warn("content normalization encountered bad tokens");
722
1.36k
        if (normalizer.lastTokenWasBad()) {
723
718
            QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize");
724
718
            warn(
725
718
                "normalized content ended with a bad token; you may be able to resolve this by "
726
718
                "coalescing content streams in combination with normalizing content. From the "
727
718
                "command line, specify --coalesce-contents");
728
718
        }
729
1.36k
        warn(
730
1.36k
            "Resulting stream data may be corrupted but is may still useful for manual "
731
1.36k
            "inspection. For more information on this warning, search for content normalization "
732
1.36k
            "in the manual.");
733
1.36k
    }
734
735
485k
    return true;
736
576k
}
737
738
void
739
Stream::replaceStreamData(
740
    std::string&& data, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms)
741
45.1k
{
742
45.1k
    auto s = stream();
743
45.1k
    s->stream_data = std::make_shared<Buffer>(std::move(data));
744
45.1k
    s->stream_provider = nullptr;
745
45.1k
    replaceFilterData(filter, decode_parms, s->stream_data->getSize());
746
45.1k
}
747
748
void
749
Stream::replaceStreamData(
750
    std::shared_ptr<Buffer> data,
751
    QPDFObjectHandle const& filter,
752
    QPDFObjectHandle const& decode_parms)
753
14
{
754
14
    auto s = stream();
755
14
    s->stream_data = data;
756
14
    s->stream_provider = nullptr;
757
14
    replaceFilterData(filter, decode_parms, data->size());
758
14
}
759
760
void
761
Stream::replaceStreamData(
762
    std::shared_ptr<QPDFObjectHandle::StreamDataProvider> provider,
763
    QPDFObjectHandle const& filter,
764
    QPDFObjectHandle const& decode_parms)
765
19.7k
{
766
19.7k
    auto s = stream();
767
19.7k
    s->stream_provider = provider;
768
19.7k
    s->stream_data = nullptr;
769
19.7k
    replaceFilterData(filter, decode_parms, 0);
770
19.7k
}
771
772
void
773
Stream::replaceFilterData(
774
    QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms, size_t length)
775
64.8k
{
776
64.8k
    auto s = stream();
777
64.8k
    if (filter) {
778
24.9k
        s->stream_dict.replaceKey("/Filter", filter);
779
24.9k
    }
780
64.8k
    if (decode_parms) {
781
24.6k
        s->stream_dict.replaceKey("/DecodeParms", decode_parms);
782
24.6k
    }
783
64.8k
    if (length == 0) {
784
25.5k
        QTC::TC("qpdf", "QPDF_Stream unknown stream length");
785
25.5k
        s->stream_dict.removeKey("/Length");
786
39.3k
    } else {
787
39.3k
        s->stream_dict.replaceKey(
788
39.3k
            "/Length", QPDFObjectHandle::newInteger(QIntC::to_longlong(length)));
789
39.3k
    }
790
64.8k
}
791
792
void
793
Stream::warn(std::string const& message)
794
42.1k
{
795
42.1k
    qpdf()->warn(qpdf_e_damaged_pdf, "", offset(), message);
796
42.1k
}
797
798
QPDFObjectHandle
799
QPDFObjectHandle::getDict() const
800
1.56M
{
801
1.56M
    return as_stream(error).getDict();
802
1.56M
}
803
804
void
805
QPDFObjectHandle::setFilterOnWrite(bool val)
806
86.6k
{
807
86.6k
    as_stream(error).setFilterOnWrite(val);
808
86.6k
}
809
810
bool
811
QPDFObjectHandle::getFilterOnWrite()
812
338k
{
813
338k
    return as_stream(error).getFilterOnWrite();
814
338k
}
815
816
bool
817
QPDFObjectHandle::isDataModified()
818
372k
{
819
372k
    return as_stream(error).isDataModified();
820
372k
}
821
822
void
823
QPDFObjectHandle::replaceDict(QPDFObjectHandle const& new_dict)
824
4.58k
{
825
4.58k
    as_stream(error).replaceDict(new_dict);
826
4.58k
}
827
828
bool
829
QPDFObjectHandle::isRootMetadata() const
830
338k
{
831
338k
    return as_stream(error).isRootMetadata();
832
338k
}
833
834
std::shared_ptr<Buffer>
835
QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level)
836
83.1k
{
837
83.1k
    return std::make_shared<Buffer>(as_stream(error).getStreamData(level));
838
83.1k
}
839
840
std::shared_ptr<Buffer>
841
QPDFObjectHandle::getRawStreamData()
842
0
{
843
0
    return std::make_shared<Buffer>(as_stream(error).getRawStreamData());
844
0
}
845
846
bool
847
QPDFObjectHandle::pipeStreamData(
848
    Pipeline* p,
849
    bool* filtering_attempted,
850
    int encode_flags,
851
    qpdf_stream_decode_level_e decode_level,
852
    bool suppress_warnings,
853
    bool will_retry)
854
0
{
855
0
    return as_stream(error).pipeStreamData(
856
0
        p, filtering_attempted, encode_flags, decode_level, suppress_warnings, will_retry);
857
0
}
858
859
bool
860
QPDFObjectHandle::pipeStreamData(
861
    Pipeline* p,
862
    int encode_flags,
863
    qpdf_stream_decode_level_e decode_level,
864
    bool suppress_warnings,
865
    bool will_retry)
866
457k
{
867
457k
    bool filtering_attempted;
868
457k
    as_stream(error).pipeStreamData(
869
457k
        p, &filtering_attempted, encode_flags, decode_level, suppress_warnings, will_retry);
870
457k
    return filtering_attempted;
871
457k
}
872
873
bool
874
QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter, bool normalize, bool compress)
875
0
{
876
0
    int encode_flags = 0;
877
0
    qpdf_stream_decode_level_e decode_level = qpdf_dl_none;
878
0
    if (filter) {
879
0
        decode_level = qpdf_dl_generalized;
880
0
        if (normalize) {
881
0
            encode_flags |= qpdf_ef_normalize;
882
0
        }
883
0
        if (compress) {
884
0
            encode_flags |= qpdf_ef_compress;
885
0
        }
886
0
    }
887
0
    return pipeStreamData(p, encode_flags, decode_level, false);
888
0
}
889
890
void
891
QPDFObjectHandle::replaceStreamData(
892
    std::shared_ptr<Buffer> data,
893
    QPDFObjectHandle const& filter,
894
    QPDFObjectHandle const& decode_parms)
895
0
{
896
0
    as_stream(error).replaceStreamData(data, filter, decode_parms);
897
0
}
898
899
void
900
QPDFObjectHandle::replaceStreamData(
901
    std::string const& data, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms)
902
28.2k
{
903
28.2k
    std::string s(data);
904
28.2k
    as_stream(error).replaceStreamData(std::move(s), filter, decode_parms);
905
28.2k
}
906
907
void
908
QPDFObjectHandle::replaceStreamData(
909
    std::shared_ptr<StreamDataProvider> provider,
910
    QPDFObjectHandle const& filter,
911
    QPDFObjectHandle const& decode_parms)
912
7.83k
{
913
7.83k
    as_stream(error).replaceStreamData(provider, filter, decode_parms);
914
7.83k
}
915
916
namespace
917
{
918
    class FunctionProvider: public QPDFObjectHandle::StreamDataProvider
919
    {
920
      public:
921
        FunctionProvider(std::function<void(Pipeline*)> provider) :
922
11.4k
            StreamDataProvider(false),
923
11.4k
            p1(provider),
924
11.4k
            p2(nullptr)
925
11.4k
        {
926
11.4k
        }
927
        FunctionProvider(std::function<bool(Pipeline*, bool, bool)> provider) :
928
0
            StreamDataProvider(true),
929
0
            p1(nullptr),
930
0
            p2(provider)
931
0
        {
932
0
        }
933
934
        void
935
        provideStreamData(QPDFObjGen const&, Pipeline* pipeline) override
936
0
        {
937
0
            p1(pipeline);
938
0
        }
939
940
        bool
941
        provideStreamData(
942
            QPDFObjGen const&, Pipeline* pipeline, bool suppress_warnings, bool will_retry) override
943
0
        {
944
0
            return p2(pipeline, suppress_warnings, will_retry);
945
0
        }
946
947
      private:
948
        std::function<void(Pipeline*)> p1;
949
        std::function<bool(Pipeline*, bool, bool)> p2;
950
    };
951
} // namespace
952
953
void
954
QPDFObjectHandle::replaceStreamData(
955
    std::function<void(Pipeline*)> provider,
956
    QPDFObjectHandle const& filter,
957
    QPDFObjectHandle const& decode_parms)
958
11.4k
{
959
11.4k
    auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider));
960
11.4k
    as_stream(error).replaceStreamData(sdp, filter, decode_parms);
961
11.4k
}
962
963
void
964
QPDFObjectHandle::replaceStreamData(
965
    std::function<bool(Pipeline*, bool, bool)> provider,
966
    QPDFObjectHandle const& filter,
967
    QPDFObjectHandle const& decode_parms)
968
0
{
969
0
    auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider));
970
0
    as_stream(error).replaceStreamData(sdp, filter, decode_parms);
971
0
}
972
973
JSON
974
QPDFObjectHandle::getStreamJSON(
975
    int json_version,
976
    qpdf_json_stream_data_e json_data,
977
    qpdf_stream_decode_level_e decode_level,
978
    Pipeline* p,
979
    std::string const& data_filename)
980
0
{
981
0
    return as_stream(error).getStreamJSON(json_version, json_data, decode_level, p, data_filename);
982
0
}
983
984
QPDFObjectHandle
985
QPDFObjectHandle::copyStream()
986
0
{
987
0
    return as_stream(error).copy();
988
0
}