Coverage Report

Created: 2025-12-05 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDF_Stream.cc
Line
Count
Source
1
#include <qpdf/QPDFObjectHandle_private.hh>
2
3
#include <qpdf/ContentNormalizer.hh>
4
#include <qpdf/JSON_writer.hh>
5
#include <qpdf/Pipeline.hh>
6
#include <qpdf/Pipeline_private.hh>
7
#include <qpdf/Pl_Buffer.hh>
8
#include <qpdf/Pl_Count.hh>
9
#include <qpdf/Pl_Discard.hh>
10
#include <qpdf/Pl_Flate.hh>
11
#include <qpdf/Pl_QPDFTokenizer.hh>
12
#include <qpdf/QIntC.hh>
13
#include <qpdf/QPDFExc.hh>
14
#include <qpdf/QPDF_private.hh>
15
#include <qpdf/QTC.hh>
16
#include <qpdf/QUtil.hh>
17
#include <qpdf/SF_ASCII85Decode.hh>
18
#include <qpdf/SF_ASCIIHexDecode.hh>
19
#include <qpdf/SF_DCTDecode.hh>
20
#include <qpdf/SF_FlateLzwDecode.hh>
21
#include <qpdf/SF_RunLengthDecode.hh>
22
23
#include <stdexcept>
24
25
using namespace std::literals;
26
using namespace qpdf;
27
28
using Streams = QPDF::Doc::Objects::Streams;
29
30
class Streams::Copier final: public QPDFObjectHandle::StreamDataProvider
31
{
32
    class Data
33
    {
34
        friend class Streams;
35
36
      public:
37
        Data(Stream& source, Dictionary const& dest_dict) :
38
0
            encp(source.qpdf()->m->encp),
39
0
            file(source.qpdf()->m->file),
40
0
            source_og(source.id_gen()),
41
0
            offset(source.offset()),
42
0
            length(source.getLength()),
43
0
            dest_dict(dest_dict),
44
0
            is_root_metadata(source.isRootMetadata())
45
0
        {
46
0
        }
47
48
      private:
49
        std::shared_ptr<EncryptionParameters> encp;
50
        std::shared_ptr<InputSource> file;
51
        QPDFObjGen source_og;
52
        qpdf_offset_t offset;
53
        size_t length;
54
        QPDFObjectHandle dest_dict;
55
        bool is_root_metadata{false};
56
    };
57
58
  public:
59
    Copier() = delete;
60
    Copier(StreamDataProvider const&) = delete;
61
    Copier(StreamDataProvider&&) = delete;
62
    Copier& operator=(StreamDataProvider const&) = delete;
63
    Copier& operator=(StreamDataProvider&&) = delete;
64
19.0k
    ~Copier() final = default;
65
66
    Copier(Streams& streams) :
67
19.0k
        QPDFObjectHandle::StreamDataProvider(true),
68
19.0k
        streams(streams)
69
19.0k
    {
70
19.0k
    }
71
72
    bool
73
    provideStreamData(
74
        QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) final
75
0
    {
76
0
        auto data = copied_data.find(og);
77
0
        if (data != copied_data.end()) {
78
0
            auto& fd = data->second;
79
0
            QTC::TC("qpdf", "QPDF pipe foreign encrypted stream", fd.encp->encrypted ? 0 : 1);
80
0
            if (streams.qpdf.pipeStreamData(
81
0
                    fd.encp,
82
0
                    fd.file,
83
0
                    streams.qpdf,
84
0
                    fd.source_og,
85
0
                    fd.offset,
86
0
                    fd.length,
87
0
                    fd.dest_dict,
88
0
                    fd.is_root_metadata,
89
0
                    pipeline,
90
0
                    suppress_warnings,
91
0
                    will_retry)) {
92
0
                return true; // for CI coverage
93
0
            } else {
94
0
                return false;
95
0
            }
96
0
        }
97
0
        auto stream = copied_streams.find(og);
98
0
        qpdf_invariant(stream == copied_streams.end() || stream->second);
99
0
        if (stream != copied_streams.end() &&
100
0
            stream->second.pipeStreamData(
101
0
                pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry)) {
102
0
            return true; // for CI coverage
103
0
        }
104
0
        return false;
105
0
    }
106
107
    void
108
    register_copy(Stream& dest, Stream& source, bool provider)
109
0
    {
110
0
        qpdf_expect(source);
111
0
        qpdf_expect(dest);
112
0
        if (provider) {
113
0
            copied_streams.insert_or_assign(dest, source);
114
0
        } else {
115
0
            copied_data.insert_or_assign(dest, Data(source, dest.getDict()));
116
0
        }
117
0
    }
118
119
  private:
120
    Streams& streams;
121
    std::map<QPDFObjGen, Stream> copied_streams;
122
    std::map<QPDFObjGen, Data> copied_data;
123
};
124
125
Streams::Streams(Common& common) :
126
19.0k
    Common(common),
127
19.0k
    copier_(std::make_shared<Copier>(*this))
128
19.0k
{
129
19.0k
}
130
131
namespace
132
{
133
    class SF_Crypt final: public QPDFStreamFilter
134
    {
135
      public:
136
178
        SF_Crypt() = default;
137
        ~SF_Crypt() final = default;
138
139
        bool
140
        setDecodeParms(QPDFObjectHandle decode_parms) final
141
174
        {
142
            // we only validate here - processing happens in decryptStream
143
174
            if (Dictionary dict = decode_parms) {
144
1.82k
                for (auto const& [key, value]: dict) {
145
1.82k
                    if (key == "/Type" &&
146
14
                        (value.null() || Name(value) == "/CryptFilterDecodeParms")) {
147
4
                        continue;
148
4
                    }
149
1.81k
                    if (key == "/Name") {
150
3
                        continue;
151
3
                    }
152
1.81k
                    if (!value.null()) {
153
35
                        return false;
154
35
                    }
155
1.81k
                }
156
106
                return true;
157
141
            }
158
33
            return decode_parms.null();
159
174
        }
160
161
        Pipeline*
162
        getDecodePipeline(Pipeline*) final
163
136
        {
164
            // Not used -- handled by pipeStreamData
165
136
            return nullptr;
166
136
        }
167
    };
168
169
    class StreamBlobProvider
170
    {
171
      public:
172
        StreamBlobProvider(Stream stream, qpdf_stream_decode_level_e decode_level) :
173
0
            stream(stream),
174
0
            decode_level(decode_level)
175
0
        {
176
0
        }
177
        void
178
        operator()(Pipeline* p)
179
0
        {
180
0
            stream.pipeStreamData(p, nullptr, 0, decode_level, false, false);
181
0
        }
182
183
      private:
184
        Stream stream;
185
        qpdf_stream_decode_level_e decode_level;
186
    };
187
188
    /// User defined streamfilter factories
189
    std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>> filter_factories;
190
    bool filter_factories_registered = false;
191
} // namespace
192
193
std::string
194
QPDF_Stream::Members::expand_filter_name(std::string const& name) const
195
0
{
196
    // The PDF specification provides these filter abbreviations for use in inline images, but
197
    // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also
198
    // accepts them for stream filters.
199
0
    if (name == "/AHx") {
200
0
        return "/ASCIIHexDecode";
201
0
    }
202
0
    if (name == "/A85") {
203
0
        return "/ASCII85Decode";
204
0
    }
205
0
    if (name == "/LZW") {
206
0
        return "/LZWDecode";
207
0
    }
208
0
    if (name == "/Fl") {
209
0
        return "/FlateDecode";
210
0
    }
211
0
    if (name == "/RL") {
212
0
        return "/RunLengthDecode";
213
0
    }
214
0
    if (name == "/CCF") {
215
0
        return "/CCITTFaxDecode";
216
0
    }
217
0
    if (name == "/DCT") {
218
0
        return "/DCTDecode";
219
0
    }
220
0
    return name;
221
0
};
222
223
std::function<std::shared_ptr<QPDFStreamFilter>()>
224
QPDF_Stream::Members::filter_factory(std::string const& name) const
225
11.2k
{
226
11.2k
    if (filter_factories_registered) [[unlikely]] {
227
        // We need to check user provided filters first as we allow users to replace qpdf provided
228
        // default filters. This will have a performance impact if the facility to register stream
229
        // filters is actually used. We can optimize this away if necessary.
230
0
        auto ff = filter_factories.find(expand_filter_name(name));
231
0
        if (ff != filter_factories.end()) {
232
0
            return ff->second;
233
0
        }
234
0
    }
235
11.2k
    if (name == "/FlateDecode") {
236
2.71k
        return SF_FlateLzwDecode::flate_factory;
237
2.71k
    }
238
8.56k
    if (name == "/Crypt") {
239
178
        return []() { return std::make_shared<SF_Crypt>(); };
240
178
    }
241
8.38k
    if (name == "/LZWDecode") {
242
107
        return SF_FlateLzwDecode::lzw_factory;
243
107
    }
244
8.28k
    if (name == "/RunLengthDecode") {
245
41
        return SF_RunLengthDecode::factory;
246
41
    }
247
8.24k
    if (name == "/DCTDecode") {
248
96
        return SF_DCTDecode::factory;
249
96
    }
250
8.14k
    if (name == "/ASCII85Decode") {
251
532
        return SF_ASCII85Decode::factory;
252
532
    }
253
7.61k
    if (name == "/ASCIIHexDecode") {
254
53
        return SF_ASCIIHexDecode::factory;
255
53
    }
256
    // The PDF specification provides these filter abbreviations for use in inline images, but
257
    // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader
258
    // also accepts them for stream filters.
259
260
7.55k
    if (name == "/Fl") {
261
2.23k
        return SF_FlateLzwDecode::flate_factory;
262
2.23k
    }
263
5.32k
    if (name == "/AHx") {
264
392
        return SF_ASCIIHexDecode::factory;
265
392
    }
266
4.93k
    if (name == "/A85") {
267
42
        return SF_ASCII85Decode::factory;
268
42
    }
269
4.88k
    if (name == "/LZW") {
270
3.24k
        return SF_FlateLzwDecode::lzw_factory;
271
3.24k
    }
272
1.64k
    if (name == "/RL") {
273
1.31k
        return SF_RunLengthDecode::factory;
274
1.31k
    }
275
329
    if (name == "/DCT") {
276
59
        return SF_DCTDecode::factory;
277
59
    }
278
270
    return nullptr;
279
329
}
280
281
Stream::Stream(
282
    QPDF& qpdf, QPDFObjGen og, QPDFObjectHandle stream_dict, qpdf_offset_t offset, size_t length) :
283
19.3k
    BaseHandle(QPDFObject::create<QPDF_Stream>(&qpdf, og, std::move(stream_dict), length))
284
19.3k
{
285
19.3k
    auto descr = std::make_shared<QPDFObject::Description>(
286
19.3k
        qpdf.getFilename() + ", stream object " + og.unparse(' '));
287
19.3k
    obj->setDescription(&qpdf, descr, offset);
288
19.3k
    setDictDescription();
289
19.3k
}
290
291
Stream
292
Stream::copy()
293
0
{
294
0
    Stream result = qpdf()->newStream();
295
0
    result.stream()->stream_dict = getDict().copy();
296
0
    copy_data_to(result);
297
0
    return result;
298
0
}
299
300
void
301
Stream::copy_data_to(Stream& dest)
302
0
{
303
0
    qpdf_expect(dest);
304
0
    auto s = stream();
305
0
    auto& d_streams = dest.qpdf()->doc().objects().streams();
306
307
0
    auto dict = dest.getDict();
308
309
    // Copy information from the foreign stream so we can pipe its data later without keeping the
310
    // original QPDF object around.
311
0
    if (qpdf()->doc().config().immediate_copy_from() && !s->stream_data) {
312
        // Pull the stream data into a buffer before attempting the copy operation. Do it on the
313
        // source stream so that if the source stream is copied multiple times, we don't have to
314
        // keep duplicating the memory. Passing uninitialised object handles will preserve the
315
        // existing filters and decode parameters.
316
0
        replaceStreamData(getRawStreamData(), {}, {});
317
0
    }
318
0
    if (s->stream_data) {
319
0
        dest.replaceStreamData(s->stream_data, dict["/Filter"], dict["/DecodeParms"]);
320
0
    } else {
321
0
        d_streams.copier()->register_copy(dest, *this, s->stream_provider.get());
322
0
        dest.replaceStreamData(d_streams.copier(), dict["/Filter"], dict["/DecodeParms"]);
323
0
    }
324
0
}
325
326
void
327
Stream::registerStreamFilter(
328
    std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
329
0
{
330
0
    filter_factories[filter_name] = factory;
331
0
    filter_factories_registered = true;
332
0
}
333
334
JSON
335
Stream::getStreamJSON(
336
    int json_version,
337
    qpdf_json_stream_data_e json_data,
338
    qpdf_stream_decode_level_e decode_level,
339
    Pipeline* p,
340
    std::string const& data_filename)
341
0
{
342
0
    Pl_Buffer pb{"streamjson"};
343
0
    JSON::Writer jw{&pb, 0};
344
0
    decode_level =
345
0
        writeStreamJSON(json_version, jw, json_data, decode_level, p, data_filename, true);
346
0
    pb.finish();
347
0
    auto result = JSON::parse(pb.getString());
348
0
    if (json_data == qpdf_sj_inline) {
349
0
        result.addDictionaryMember("data", JSON::makeBlob(StreamBlobProvider(*this, decode_level)));
350
0
    }
351
0
    return result;
352
0
}
353
354
qpdf_stream_decode_level_e
355
Stream::writeStreamJSON(
356
    int json_version,
357
    JSON::Writer& jw,
358
    qpdf_json_stream_data_e json_data,
359
    qpdf_stream_decode_level_e decode_level,
360
    Pipeline* p,
361
    std::string const& data_filename,
362
    bool no_data_key)
363
0
{
364
0
    auto s = stream();
365
0
    switch (json_data) {
366
0
    case qpdf_sj_none:
367
0
    case qpdf_sj_inline:
368
0
        if (p != nullptr) {
369
0
            throw std::logic_error(
370
0
                "QPDF_Stream::writeStreamJSON: pipeline should only be supplied "
371
0
                "when json_data is file");
372
0
        }
373
0
        break;
374
0
    case qpdf_sj_file:
375
0
        if (p == nullptr) {
376
0
            throw std::logic_error(
377
0
                "QPDF_Stream::writeStreamJSON: pipeline must be supplied when json_data is file");
378
0
        }
379
0
        if (data_filename.empty()) {
380
0
            throw std::logic_error(
381
0
                "QPDF_Stream::writeStreamJSON: data_filename must be supplied "
382
0
                "when json_data is file");
383
0
        }
384
0
        break;
385
0
    }
386
387
0
    jw.writeStart('{');
388
389
0
    if (json_data == qpdf_sj_none) {
390
0
        jw.writeNext();
391
0
        jw << R"("dict": )";
392
0
        s->stream_dict.writeJSON(json_version, jw);
393
0
        jw.writeEnd('}');
394
0
        return decode_level;
395
0
    }
396
397
0
    Pl_Discard discard;
398
0
    Pl_Buffer buf_pl{"stream data"};
399
0
    Pipeline* data_pipeline = &buf_pl;
400
0
    if (no_data_key && json_data == qpdf_sj_inline) {
401
0
        data_pipeline = &discard;
402
0
    }
403
    // pipeStreamData produced valid data.
404
0
    bool buf_pl_ready = false;
405
0
    bool filtered = false;
406
0
    bool filter = (decode_level != qpdf_dl_none);
407
0
    for (int attempt = 1; attempt <= 2; ++attempt) {
408
0
        bool succeeded =
409
0
            pipeStreamData(data_pipeline, &filtered, 0, decode_level, false, (attempt == 1));
410
0
        if (!succeeded || (filter && !filtered)) {
411
            // Try again
412
0
            filter = false;
413
0
            decode_level = qpdf_dl_none;
414
0
            buf_pl.getString(); // reset buf_pl
415
0
        } else {
416
0
            buf_pl_ready = true;
417
0
            break;
418
0
        }
419
0
    }
420
0
    if (!buf_pl_ready) {
421
0
        throw std::logic_error("QPDF_Stream: failed to get stream data");
422
0
    }
423
    // We can use unsafeShallowCopy because we are only touching top-level keys.
424
0
    auto dict = s->stream_dict.unsafeShallowCopy();
425
0
    dict.removeKey("/Length");
426
0
    if (filter && filtered) {
427
0
        dict.removeKey("/Filter");
428
0
        dict.removeKey("/DecodeParms");
429
0
    }
430
0
    if (json_data == qpdf_sj_file) {
431
0
        jw.writeNext() << R"("datafile": ")" << JSON::Writer::encode_string(data_filename) << "\"";
432
0
        p->writeString(buf_pl.getString());
433
0
    } else if (json_data == qpdf_sj_inline) {
434
0
        if (!no_data_key) {
435
0
            jw.writeNext() << R"("data": ")";
436
0
            jw.writeBase64(buf_pl.getString()) << "\"";
437
0
        }
438
0
    } else {
439
0
        throw std::logic_error("QPDF_Stream::writeStreamJSON : unexpected value of json_data");
440
0
    }
441
442
0
    jw.writeNext() << R"("dict": )";
443
0
    dict.writeJSON(json_version, jw);
444
0
    jw.writeEnd('}');
445
446
0
    return decode_level;
447
0
}
448
449
void
450
qpdf::Stream::setDictDescription()
451
19.3k
{
452
19.3k
    auto s = stream();
453
19.3k
    if (!s->stream_dict.hasObjectDescription()) {
454
0
        s->stream_dict.setObjectDescription(
455
0
            obj->getQPDF(), obj->getDescription() + " -> stream dictionary");
456
0
    }
457
19.3k
}
458
459
std::string
460
Stream::getStreamData(qpdf_stream_decode_level_e decode_level)
461
9.42k
{
462
9.42k
    std::string result;
463
9.42k
    pl::String buf(result);
464
9.42k
    bool filtered;
465
9.42k
    pipeStreamData(&buf, &filtered, 0, decode_level, false, false);
466
9.42k
    if (!filtered) {
467
2.02k
        throw QPDFExc(
468
2.02k
            qpdf_e_unsupported,
469
2.02k
            qpdf()->getFilename(),
470
2.02k
            "",
471
2.02k
            offset(),
472
2.02k
            "getStreamData called on unfilterable stream");
473
2.02k
    }
474
7.40k
    return result;
475
9.42k
}
476
477
std::string
478
Stream::getRawStreamData()
479
0
{
480
0
    std::string result;
481
0
    pl::String buf(result);
482
0
    if (!pipeStreamData(&buf, nullptr, 0, qpdf_dl_none, false, false)) {
483
0
        throw QPDFExc(
484
0
            qpdf_e_unsupported,
485
0
            qpdf()->getFilename(),
486
0
            "",
487
0
            offset(),
488
0
            "error getting raw stream data");
489
0
    }
490
0
    return result;
491
0
}
492
493
bool
494
Stream::isRootMetadata() const
495
9.35k
{
496
9.35k
    if (!stream()->stream_dict.isDictionaryOfType("/Metadata", "/XML")) {
497
9.33k
        return false;
498
9.33k
    }
499
22
    return qpdf()->getRoot()["/Metadata"].isSameObjectAs(obj);
500
9.35k
}
501
502
bool
503
Stream::filterable(
504
    qpdf_stream_decode_level_e decode_level,
505
    std::vector<std::shared_ptr<QPDFStreamFilter>>& filters)
506
9.42k
{
507
9.42k
    auto s = stream();
508
    // Check filters
509
510
9.42k
    auto const& filter_obj = s->stream_dict["/Filter"];
511
512
9.42k
    if (filter_obj.null()) {
513
        // No filters
514
3.15k
        return true;
515
3.15k
    }
516
6.27k
    if (filter_obj.size() > global::Limits::max_stream_filters()) {
517
27
        global::Limits::error();
518
27
        warn(
519
27
            "limits error(max-stream-filters): too many filters for stream; treating stream as "
520
27
            "not filterable");
521
27
        return false;
522
27
    }
523
6.24k
    if (filter_obj.isName()) {
524
        // One filter
525
4.90k
        auto ff = s->filter_factory(filter_obj.getName());
526
4.90k
        if (!ff) {
527
140
            return false;
528
140
        }
529
4.76k
        filters.emplace_back(ff());
530
4.76k
    } else if (Array array = filter_obj) {
531
        // Potentially multiple filters
532
6.39k
        for (Name item: array) {
533
6.39k
            if (!item) {
534
19
                warn("stream filter type is not name or array");
535
19
                return false;
536
19
            }
537
6.37k
            auto ff = s->filter_factory(item);
538
6.37k
            if (!ff) {
539
130
                filters.clear();
540
130
                return false;
541
130
            }
542
6.24k
            filters.emplace_back(ff());
543
6.24k
        }
544
1.32k
    } else {
545
16
        warn("stream filter type is not name or array");
546
16
        return false;
547
16
    }
548
549
    // filters now contains a list of filters to be applied in order. See which ones we can support.
550
    // See if we can support any decode parameters that are specified.
551
552
5.93k
    auto decode_obj = s->stream_dict.getKey("/DecodeParms");
553
554
5.93k
    auto can_filter = // linebreak
555
10.2k
        [](auto d_level, auto& filter, auto& d_obj) -> bool {
556
10.2k
        if (!filter.setDecodeParms(d_obj) ||
557
10.0k
            (d_level < qpdf_dl_all && filter.isLossyCompression()) ||
558
10.0k
            (d_level < qpdf_dl_specialized && filter.isSpecializedCompression())) {
559
176
            return false;
560
176
        }
561
10.0k
        return true;
562
10.2k
    };
563
564
5.93k
    auto decode_array = decode_obj.as_array(strict);
565
5.93k
    if (!decode_array || decode_array.size() == 0) {
566
5.91k
        if (decode_array) {
567
4
            decode_obj = QPDFObjectHandle::newNull();
568
4
        }
569
570
10.1k
        for (auto& filter: filters) {
571
10.1k
            if (!can_filter(decode_level, *filter, decode_obj)) {
572
172
                return false;
573
172
            }
574
10.1k
        }
575
5.91k
    } else {
576
        // Ignore /DecodeParms entirely if /Filters is empty.  At least one case of a file whose
577
        // /DecodeParms was [ << >> ] when /Filters was empty has been seen in the wild.
578
27
        if (!filters.empty() && QIntC::to_size(decode_array.size()) != filters.size()) {
579
3
            warn("stream /DecodeParms length is inconsistent with filters");
580
3
            return false;
581
3
        }
582
583
24
        int i = -1;
584
42
        for (auto& filter: filters) {
585
42
            auto d_obj = decode_array.get(++i);
586
42
            if (!can_filter(decode_level, *filter, d_obj)) {
587
4
                return false;
588
4
            }
589
42
        }
590
24
    }
591
592
5.76k
    return true;
593
5.93k
}
594
595
bool
596
Stream::pipeStreamData(
597
    Pipeline* pipeline,
598
    bool* filterp,
599
    int encode_flags,
600
    qpdf_stream_decode_level_e decode_level,
601
    bool suppress_warnings,
602
    bool will_retry)
603
9.42k
{
604
9.42k
    auto s = stream();
605
9.42k
    std::vector<std::shared_ptr<QPDFStreamFilter>> filters;
606
9.42k
    bool ignored;
607
9.42k
    if (!filterp) {
608
0
        filterp = &ignored;
609
0
    }
610
9.42k
    bool& filter = *filterp;
611
612
9.42k
    const bool empty_stream = !s->stream_provider && !s->stream_data && s->length == 0;
613
9.42k
    const bool empty_stream_data = s->stream_data && s->stream_data->getSize() == 0;
614
9.42k
    const bool empty = empty_stream || empty_stream_data;
615
616
9.42k
    if (empty_stream || empty_stream_data) {
617
630
        filter = true;
618
630
    }
619
620
9.42k
    filter = empty || encode_flags || decode_level != qpdf_dl_none;
621
9.42k
    if (filter) {
622
9.42k
        filter = filterable(decode_level, filters);
623
9.42k
    }
624
625
9.42k
    if (!pipeline) {
626
0
        QTC::TC("qpdf", "QPDF_Stream pipeStreamData with null pipeline");
627
        // Return value is whether we can filter in this case.
628
0
        return filter;
629
0
    }
630
631
    // Construct the pipeline in reverse order. Force pipelines we create to be deleted when this
632
    // function finishes. Pipelines created by QPDFStreamFilter objects will be deleted by those
633
    // objects.
634
9.42k
    std::vector<std::unique_ptr<Pipeline>> to_delete;
635
636
9.42k
    ContentNormalizer normalizer;
637
9.42k
    if (filter) {
638
8.89k
        if (encode_flags & qpdf_ef_compress) {
639
0
            auto new_pipeline =
640
0
                std::make_unique<Pl_Flate>("compress stream", pipeline, Pl_Flate::a_deflate);
641
0
            pipeline = new_pipeline.get();
642
0
            to_delete.push_back(std::move(new_pipeline));
643
0
        }
644
645
8.89k
        if (encode_flags & qpdf_ef_normalize) {
646
0
            auto new_pipeline =
647
0
                std::make_unique<Pl_QPDFTokenizer>("normalizer", &normalizer, pipeline);
648
0
            pipeline = new_pipeline.get();
649
0
            to_delete.push_back(std::move(new_pipeline));
650
0
        }
651
652
8.89k
        for (auto iter = s->token_filters.rbegin(); iter != s->token_filters.rend(); ++iter) {
653
0
            auto new_pipeline =
654
0
                std::make_unique<Pl_QPDFTokenizer>("token filter", (*iter).get(), pipeline);
655
0
            pipeline = new_pipeline.get();
656
0
            to_delete.push_back(std::move(new_pipeline));
657
0
        }
658
659
18.8k
        for (auto f_iter = filters.rbegin(); f_iter != filters.rend(); ++f_iter) {
660
9.98k
            if (auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline)) {
661
9.81k
                pipeline = decode_pipeline;
662
9.81k
            }
663
9.98k
            auto* flate = dynamic_cast<Pl_Flate*>(pipeline);
664
9.98k
            if (flate) {
665
4.85k
                flate->setWarnCallback([this](char const* msg, int code) { warn(msg); });
666
4.85k
            }
667
9.98k
        }
668
8.89k
    }
669
670
9.42k
    if (s->stream_data.get()) {
671
0
        QTC::TC("qpdf", "QPDF_Stream pipe replaced stream data");
672
0
        pipeline->write(s->stream_data->getBuffer(), s->stream_data->getSize());
673
0
        pipeline->finish();
674
9.42k
    } else if (s->stream_provider.get()) {
675
0
        Pl_Count count("stream provider count", pipeline);
676
0
        if (s->stream_provider->supportsRetry()) {
677
0
            if (!s->stream_provider->provideStreamData(
678
0
                    obj->getObjGen(), &count, suppress_warnings, will_retry)) {
679
0
                filter = false;
680
0
                return false;
681
0
            }
682
0
        } else {
683
0
            s->stream_provider->provideStreamData(obj->getObjGen(), &count);
684
0
        }
685
0
        qpdf_offset_t actual_length = count.getCount();
686
0
        if (s->stream_dict.hasKey("/Length")) {
687
0
            auto desired_length = s->stream_dict.getKey("/Length").getIntValue();
688
0
            if (actual_length != desired_length) {
689
0
                QTC::TC("qpdf", "QPDF_Stream provider length mismatch");
690
                // This would be caused by programmer error on the part of a library user, not by
691
                // invalid input data.
692
0
                throw std::runtime_error(
693
0
                    "stream data provider for " + obj->getObjGen().unparse(' ') + " provided " +
694
0
                    std::to_string(actual_length) + " bytes instead of expected " +
695
0
                    std::to_string(desired_length) + " bytes");
696
0
            }
697
0
        } else {
698
0
            QTC::TC("qpdf", "QPDF_Stream provider length not provided");
699
0
            s->stream_dict.replaceKey("/Length", QPDFObjectHandle::newInteger(actual_length));
700
0
        }
701
9.42k
    } else {
702
9.42k
        if (offset() == 0) {
703
0
            throw std::logic_error("pipeStreamData called for stream with no data");
704
0
        }
705
9.42k
        if (!Streams::pipeStreamData(
706
9.42k
                qpdf(),
707
9.42k
                id_gen(),
708
9.42k
                offset(),
709
9.42k
                s->length,
710
9.42k
                s->stream_dict,
711
9.42k
                isRootMetadata(),
712
9.42k
                pipeline,
713
9.42k
                suppress_warnings,
714
9.42k
                will_retry)) {
715
1.52k
            filter = false;
716
1.52k
            return false;
717
1.52k
        }
718
9.42k
    }
719
720
7.89k
    if (filter && !suppress_warnings && normalizer.anyBadTokens()) {
721
0
        warn("content normalization encountered bad tokens");
722
0
        if (normalizer.lastTokenWasBad()) {
723
0
            QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize");
724
0
            warn(
725
0
                "normalized content ended with a bad token; you may be able to resolve this by "
726
0
                "coalescing content streams in combination with normalizing content. From the "
727
0
                "command line, specify --coalesce-contents");
728
0
        }
729
0
        warn(
730
0
            "Resulting stream data may be corrupted but is may still useful for manual "
731
0
            "inspection. For more information on this warning, search for content normalization "
732
0
            "in the manual.");
733
0
    }
734
735
7.89k
    return true;
736
9.42k
}
737
738
void
739
Stream::replaceStreamData(
740
    std::string&& data, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms)
741
0
{
742
0
    auto s = stream();
743
0
    s->stream_data = std::make_shared<Buffer>(std::move(data));
744
0
    s->stream_provider = nullptr;
745
0
    replaceFilterData(filter, decode_parms, s->stream_data->getSize());
746
0
}
747
748
void
749
Stream::replaceStreamData(
750
    std::shared_ptr<Buffer> data,
751
    QPDFObjectHandle const& filter,
752
    QPDFObjectHandle const& decode_parms)
753
0
{
754
0
    auto s = stream();
755
0
    s->stream_data = data;
756
0
    s->stream_provider = nullptr;
757
0
    replaceFilterData(filter, decode_parms, data->size());
758
0
}
759
760
void
761
Stream::replaceStreamData(
762
    std::shared_ptr<QPDFObjectHandle::StreamDataProvider> provider,
763
    QPDFObjectHandle const& filter,
764
    QPDFObjectHandle const& decode_parms)
765
0
{
766
0
    auto s = stream();
767
0
    s->stream_provider = provider;
768
0
    s->stream_data = nullptr;
769
0
    replaceFilterData(filter, decode_parms, 0);
770
0
}
771
772
void
773
Stream::replaceFilterData(
774
    QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms, size_t length)
775
0
{
776
0
    auto s = stream();
777
0
    if (filter) {
778
0
        s->stream_dict.replaceKey("/Filter", filter);
779
0
    }
780
0
    if (decode_parms) {
781
0
        s->stream_dict.replaceKey("/DecodeParms", decode_parms);
782
0
    }
783
0
    if (length == 0) {
784
0
        QTC::TC("qpdf", "QPDF_Stream unknown stream length");
785
0
        s->stream_dict.removeKey("/Length");
786
0
    } else {
787
0
        s->stream_dict.replaceKey(
788
0
            "/Length", QPDFObjectHandle::newInteger(QIntC::to_longlong(length)));
789
0
    }
790
0
}
791
792
void
793
Stream::warn(std::string const& message)
794
2.22k
{
795
2.22k
    qpdf()->warn(qpdf_e_damaged_pdf, "", offset(), message);
796
2.22k
}
797
798
QPDFObjectHandle
799
QPDFObjectHandle::getDict() const
800
40.0k
{
801
40.0k
    return as_stream(error).getDict();
802
40.0k
}
803
804
void
805
QPDFObjectHandle::setFilterOnWrite(bool val)
806
0
{
807
0
    as_stream(error).setFilterOnWrite(val);
808
0
}
809
810
bool
811
QPDFObjectHandle::getFilterOnWrite()
812
0
{
813
0
    return as_stream(error).getFilterOnWrite();
814
0
}
815
816
bool
817
QPDFObjectHandle::isDataModified()
818
0
{
819
0
    return as_stream(error).isDataModified();
820
0
}
821
822
void
823
QPDFObjectHandle::replaceDict(QPDFObjectHandle const& new_dict)
824
0
{
825
0
    as_stream(error).replaceDict(new_dict);
826
0
}
827
828
bool
829
QPDFObjectHandle::isRootMetadata() const
830
0
{
831
0
    return as_stream(error).isRootMetadata();
832
0
}
833
834
std::shared_ptr<Buffer>
835
QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level)
836
7.46k
{
837
7.46k
    return std::make_shared<Buffer>(as_stream(error).getStreamData(level));
838
7.46k
}
839
840
std::shared_ptr<Buffer>
841
QPDFObjectHandle::getRawStreamData()
842
0
{
843
0
    return std::make_shared<Buffer>(as_stream(error).getRawStreamData());
844
0
}
845
846
bool
847
QPDFObjectHandle::pipeStreamData(
848
    Pipeline* p,
849
    bool* filtering_attempted,
850
    int encode_flags,
851
    qpdf_stream_decode_level_e decode_level,
852
    bool suppress_warnings,
853
    bool will_retry)
854
0
{
855
0
    return as_stream(error).pipeStreamData(
856
0
        p, filtering_attempted, encode_flags, decode_level, suppress_warnings, will_retry);
857
0
}
858
859
bool
860
QPDFObjectHandle::pipeStreamData(
861
    Pipeline* p,
862
    int encode_flags,
863
    qpdf_stream_decode_level_e decode_level,
864
    bool suppress_warnings,
865
    bool will_retry)
866
0
{
867
0
    bool filtering_attempted;
868
0
    as_stream(error).pipeStreamData(
869
0
        p, &filtering_attempted, encode_flags, decode_level, suppress_warnings, will_retry);
870
0
    return filtering_attempted;
871
0
}
872
873
bool
874
QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter, bool normalize, bool compress)
875
0
{
876
0
    int encode_flags = 0;
877
0
    qpdf_stream_decode_level_e decode_level = qpdf_dl_none;
878
0
    if (filter) {
879
0
        decode_level = qpdf_dl_generalized;
880
0
        if (normalize) {
881
0
            encode_flags |= qpdf_ef_normalize;
882
0
        }
883
0
        if (compress) {
884
0
            encode_flags |= qpdf_ef_compress;
885
0
        }
886
0
    }
887
0
    return pipeStreamData(p, encode_flags, decode_level, false);
888
0
}
889
890
void
891
QPDFObjectHandle::replaceStreamData(
892
    std::shared_ptr<Buffer> data,
893
    QPDFObjectHandle const& filter,
894
    QPDFObjectHandle const& decode_parms)
895
0
{
896
0
    as_stream(error).replaceStreamData(data, filter, decode_parms);
897
0
}
898
899
void
900
QPDFObjectHandle::replaceStreamData(
901
    std::string const& data, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms)
902
0
{
903
0
    std::string s(data);
904
0
    as_stream(error).replaceStreamData(std::move(s), filter, decode_parms);
905
0
}
906
907
void
908
QPDFObjectHandle::replaceStreamData(
909
    std::shared_ptr<StreamDataProvider> provider,
910
    QPDFObjectHandle const& filter,
911
    QPDFObjectHandle const& decode_parms)
912
0
{
913
0
    as_stream(error).replaceStreamData(provider, filter, decode_parms);
914
0
}
915
916
namespace
917
{
918
    class FunctionProvider: public QPDFObjectHandle::StreamDataProvider
919
    {
920
      public:
921
        FunctionProvider(std::function<void(Pipeline*)> provider) :
922
0
            StreamDataProvider(false),
923
0
            p1(provider),
924
0
            p2(nullptr)
925
0
        {
926
0
        }
927
        FunctionProvider(std::function<bool(Pipeline*, bool, bool)> provider) :
928
0
            StreamDataProvider(true),
929
0
            p1(nullptr),
930
0
            p2(provider)
931
0
        {
932
0
        }
933
934
        void
935
        provideStreamData(QPDFObjGen const&, Pipeline* pipeline) override
936
0
        {
937
0
            p1(pipeline);
938
0
        }
939
940
        bool
941
        provideStreamData(
942
            QPDFObjGen const&, Pipeline* pipeline, bool suppress_warnings, bool will_retry) override
943
0
        {
944
0
            return p2(pipeline, suppress_warnings, will_retry);
945
0
        }
946
947
      private:
948
        std::function<void(Pipeline*)> p1;
949
        std::function<bool(Pipeline*, bool, bool)> p2;
950
    };
951
} // namespace
952
953
void
954
QPDFObjectHandle::replaceStreamData(
955
    std::function<void(Pipeline*)> provider,
956
    QPDFObjectHandle const& filter,
957
    QPDFObjectHandle const& decode_parms)
958
0
{
959
0
    auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider));
960
0
    as_stream(error).replaceStreamData(sdp, filter, decode_parms);
961
0
}
962
963
void
964
QPDFObjectHandle::replaceStreamData(
965
    std::function<bool(Pipeline*, bool, bool)> provider,
966
    QPDFObjectHandle const& filter,
967
    QPDFObjectHandle const& decode_parms)
968
0
{
969
0
    auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider));
970
0
    as_stream(error).replaceStreamData(sdp, filter, decode_parms);
971
0
}
972
973
JSON
974
QPDFObjectHandle::getStreamJSON(
975
    int json_version,
976
    qpdf_json_stream_data_e json_data,
977
    qpdf_stream_decode_level_e decode_level,
978
    Pipeline* p,
979
    std::string const& data_filename)
980
0
{
981
0
    return as_stream(error).getStreamJSON(json_version, json_data, decode_level, p, data_filename);
982
0
}
983
984
QPDFObjectHandle
985
QPDFObjectHandle::copyStream()
986
0
{
987
0
    return as_stream(error).copy();
988
0
}