Coverage Report

Created: 2025-11-09 06:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDF_Stream.cc
Line
Count
Source
1
#include <qpdf/QPDFObjectHandle_private.hh>
2
3
#include <qpdf/ContentNormalizer.hh>
4
#include <qpdf/JSON_writer.hh>
5
#include <qpdf/Pipeline.hh>
6
#include <qpdf/Pipeline_private.hh>
7
#include <qpdf/Pl_Buffer.hh>
8
#include <qpdf/Pl_Count.hh>
9
#include <qpdf/Pl_Discard.hh>
10
#include <qpdf/Pl_Flate.hh>
11
#include <qpdf/Pl_QPDFTokenizer.hh>
12
#include <qpdf/QIntC.hh>
13
#include <qpdf/QPDFExc.hh>
14
#include <qpdf/QPDF_private.hh>
15
#include <qpdf/QTC.hh>
16
#include <qpdf/QUtil.hh>
17
#include <qpdf/SF_ASCII85Decode.hh>
18
#include <qpdf/SF_ASCIIHexDecode.hh>
19
#include <qpdf/SF_DCTDecode.hh>
20
#include <qpdf/SF_FlateLzwDecode.hh>
21
#include <qpdf/SF_RunLengthDecode.hh>
22
23
#include <stdexcept>
24
25
using namespace std::literals;
26
using namespace qpdf;
27
28
using Streams = QPDF::Doc::Objects::Streams;
29
30
class Streams::Copier final: public QPDFObjectHandle::StreamDataProvider
31
{
32
    class Data
33
    {
34
        friend class Streams;
35
36
      public:
37
        Data(Stream& source, Dictionary const& dest_dict) :
38
0
            encp(source.qpdf()->m->encp),
39
0
            file(source.qpdf()->m->file),
40
0
            source_og(source.id_gen()),
41
0
            offset(source.offset()),
42
0
            length(source.getLength()),
43
0
            dest_dict(dest_dict),
44
0
            is_root_metadata(source.isRootMetadata())
45
0
        {
46
0
        }
47
48
      private:
49
        std::shared_ptr<EncryptionParameters> encp;
50
        std::shared_ptr<InputSource> file;
51
        QPDFObjGen source_og;
52
        qpdf_offset_t offset;
53
        size_t length;
54
        QPDFObjectHandle dest_dict;
55
        bool is_root_metadata{false};
56
    };
57
58
  public:
59
    Copier() = delete;
60
    Copier(StreamDataProvider const&) = delete;
61
    Copier(StreamDataProvider&&) = delete;
62
    Copier& operator=(StreamDataProvider const&) = delete;
63
    Copier& operator=(StreamDataProvider&&) = delete;
64
18.7k
    ~Copier() final = default;
65
66
    Copier(Streams& streams) :
67
18.7k
        QPDFObjectHandle::StreamDataProvider(true),
68
18.7k
        streams(streams)
69
18.7k
    {
70
18.7k
    }
71
72
    bool
73
    provideStreamData(
74
        QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) final
75
0
    {
76
0
        auto data = copied_data.find(og);
77
0
        if (data != copied_data.end()) {
78
0
            auto& fd = data->second;
79
0
            QTC::TC("qpdf", "QPDF pipe foreign encrypted stream", fd.encp->encrypted ? 0 : 1);
80
0
            if (streams.qpdf.pipeStreamData(
81
0
                    fd.encp,
82
0
                    fd.file,
83
0
                    streams.qpdf,
84
0
                    fd.source_og,
85
0
                    fd.offset,
86
0
                    fd.length,
87
0
                    fd.dest_dict,
88
0
                    fd.is_root_metadata,
89
0
                    pipeline,
90
0
                    suppress_warnings,
91
0
                    will_retry)) {
92
0
                return true; // for CI coverage
93
0
            } else {
94
0
                return false;
95
0
            }
96
0
        }
97
0
        auto stream = copied_streams.find(og);
98
0
        qpdf_invariant(stream == copied_streams.end() || stream->second);
99
0
        if (stream != copied_streams.end() &&
100
0
            stream->second.pipeStreamData(
101
0
                pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry)) {
102
0
            return true; // for CI coverage
103
0
        }
104
0
        return false;
105
0
    }
106
107
    void
108
    register_copy(Stream& dest, Stream& source, bool provider)
109
0
    {
110
0
        qpdf_expect(source);
111
0
        qpdf_expect(dest);
112
0
        if (provider) {
113
0
            copied_streams.insert_or_assign(dest, source);
114
0
        } else {
115
0
            copied_data.insert_or_assign(dest, Data(source, dest.getDict()));
116
0
        }
117
0
    }
118
119
  private:
120
    Streams& streams;
121
    std::map<QPDFObjGen, Stream> copied_streams;
122
    std::map<QPDFObjGen, Data> copied_data;
123
};
124
125
Streams::Streams(Common& common) :
126
18.7k
    Common(common),
127
18.7k
    copier_(std::make_shared<Copier>(*this))
128
18.7k
{
129
18.7k
}
130
131
namespace
132
{
133
    class SF_Crypt final: public QPDFStreamFilter
134
    {
135
      public:
136
331
        SF_Crypt() = default;
137
        ~SF_Crypt() final = default;
138
139
        bool
140
        setDecodeParms(QPDFObjectHandle decode_parms) final
141
310
        {
142
            // we only validate here - processing happens in decryptStream
143
310
            if (Dictionary dict = decode_parms) {
144
1.56k
                for (auto const& [key, value]: dict) {
145
1.56k
                    if (key == "/Type" &&
146
14
                        (value.null() || Name(value) == "/CryptFilterDecodeParms")) {
147
4
                        continue;
148
4
                    }
149
1.55k
                    if (key == "/Name") {
150
3
                        continue;
151
3
                    }
152
1.55k
                    if (!value.null()) {
153
38
                        return false;
154
38
                    }
155
1.55k
                }
156
80
                return true;
157
118
            }
158
192
            return decode_parms.null();
159
310
        }
160
161
        Pipeline*
162
        getDecodePipeline(Pipeline*) final
163
269
        {
164
            // Not used -- handled by pipeStreamData
165
269
            return nullptr;
166
269
        }
167
    };
168
169
    class StreamBlobProvider
170
    {
171
      public:
172
        StreamBlobProvider(Stream stream, qpdf_stream_decode_level_e decode_level) :
173
0
            stream(stream),
174
0
            decode_level(decode_level)
175
0
        {
176
0
        }
177
        void
178
        operator()(Pipeline* p)
179
0
        {
180
0
            stream.pipeStreamData(p, nullptr, 0, decode_level, false, false);
181
0
        }
182
183
      private:
184
        Stream stream;
185
        qpdf_stream_decode_level_e decode_level;
186
    };
187
188
    /// User defined streamfilter factories
189
    std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>> filter_factories;
190
    bool filter_factories_registered = false;
191
} // namespace
192
193
std::string
194
QPDF_Stream::Members::expand_filter_name(std::string const& name) const
195
0
{
196
    // The PDF specification provides these filter abbreviations for use in inline images, but
197
    // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also
198
    // accepts them for stream filters.
199
0
    if (name == "/AHx") {
200
0
        return "/ASCIIHexDecode";
201
0
    }
202
0
    if (name == "/A85") {
203
0
        return "/ASCII85Decode";
204
0
    }
205
0
    if (name == "/LZW") {
206
0
        return "/LZWDecode";
207
0
    }
208
0
    if (name == "/Fl") {
209
0
        return "/FlateDecode";
210
0
    }
211
0
    if (name == "/RL") {
212
0
        return "/RunLengthDecode";
213
0
    }
214
0
    if (name == "/CCF") {
215
0
        return "/CCITTFaxDecode";
216
0
    }
217
0
    if (name == "/DCT") {
218
0
        return "/DCTDecode";
219
0
    }
220
0
    return name;
221
0
};
222
223
std::function<std::shared_ptr<QPDFStreamFilter>()>
224
QPDF_Stream::Members::filter_factory(std::string const& name) const
225
37.2k
{
226
37.2k
    if (filter_factories_registered) [[unlikely]] {
227
        // We need to check user provided filters first as we allow users to replace qpdf provided
228
        // default filters. This will have a performance impact if the facility to register stream
229
        // filters is actually used. We can optimize this away if necessary.
230
0
        auto ff = filter_factories.find(expand_filter_name(name));
231
0
        if (ff != filter_factories.end()) {
232
0
            return ff->second;
233
0
        }
234
0
    }
235
37.2k
    if (name == "/FlateDecode") {
236
2.98k
        return SF_FlateLzwDecode::flate_factory;
237
2.98k
    }
238
34.2k
    if (name == "/Crypt") {
239
331
        return []() { return std::make_shared<SF_Crypt>(); };
240
331
    }
241
33.9k
    if (name == "/LZWDecode") {
242
496
        return SF_FlateLzwDecode::lzw_factory;
243
496
    }
244
33.4k
    if (name == "/RunLengthDecode") {
245
35
        return SF_RunLengthDecode::factory;
246
35
    }
247
33.4k
    if (name == "/DCTDecode") {
248
107
        return SF_DCTDecode::factory;
249
107
    }
250
33.3k
    if (name == "/ASCII85Decode") {
251
588
        return SF_ASCII85Decode::factory;
252
588
    }
253
32.7k
    if (name == "/ASCIIHexDecode") {
254
250
        return SF_ASCIIHexDecode::factory;
255
250
    }
256
    // The PDF specification provides these filter abbreviations for use in inline images, but
257
    // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader
258
    // also accepts them for stream filters.
259
260
32.4k
    if (name == "/Fl") {
261
2.69k
        return SF_FlateLzwDecode::flate_factory;
262
2.69k
    }
263
29.7k
    if (name == "/AHx") {
264
384
        return SF_ASCIIHexDecode::factory;
265
384
    }
266
29.4k
    if (name == "/A85") {
267
47
        return SF_ASCII85Decode::factory;
268
47
    }
269
29.3k
    if (name == "/LZW") {
270
26.6k
        return SF_FlateLzwDecode::lzw_factory;
271
26.6k
    }
272
2.73k
    if (name == "/RL") {
273
2.40k
        return SF_RunLengthDecode::factory;
274
2.40k
    }
275
321
    if (name == "/DCT") {
276
58
        return SF_DCTDecode::factory;
277
58
    }
278
263
    return nullptr;
279
321
}
280
281
Stream::Stream(
282
    QPDF& qpdf, QPDFObjGen og, QPDFObjectHandle stream_dict, qpdf_offset_t offset, size_t length) :
283
18.9k
    BaseHandle(QPDFObject::create<QPDF_Stream>(&qpdf, og, std::move(stream_dict), length))
284
18.9k
{
285
18.9k
    auto descr = std::make_shared<QPDFObject::Description>(
286
18.9k
        qpdf.getFilename() + ", stream object " + og.unparse(' '));
287
18.9k
    obj->setDescription(&qpdf, descr, offset);
288
18.9k
    setDictDescription();
289
18.9k
}
290
291
Stream
292
Stream::copy()
293
0
{
294
0
    Stream result = qpdf()->newStream();
295
0
    result.stream()->stream_dict = getDict().copy();
296
0
    copy_data_to(result);
297
0
    return result;
298
0
}
299
300
void
301
Stream::copy_data_to(Stream& dest)
302
0
{
303
0
    qpdf_expect(dest);
304
0
    auto s = stream();
305
0
    auto& d_streams = dest.qpdf()->doc().objects().streams();
306
307
0
    auto dict = dest.getDict();
308
309
    // Copy information from the foreign stream so we can pipe its data later without keeping the
310
    // original QPDF object around.
311
0
    if (qpdf()->doc().config().immediate_copy_from() && !s->stream_data) {
312
        // Pull the stream data into a buffer before attempting the copy operation. Do it on the
313
        // source stream so that if the source stream is copied multiple times, we don't have to
314
        // keep duplicating the memory. Passing uninitialised object handles will preserve the
315
        // existing filters and decode parameters.
316
0
        replaceStreamData(getRawStreamData(), {}, {});
317
0
    }
318
0
    if (s->stream_data) {
319
0
        dest.replaceStreamData(s->stream_data, dict["/Filter"], dict["/DecodeParms"]);
320
0
    } else {
321
0
        d_streams.copier()->register_copy(dest, *this, s->stream_provider.get());
322
0
        dest.replaceStreamData(d_streams.copier(), dict["/Filter"], dict["/DecodeParms"]);
323
0
    }
324
0
}
325
326
void
327
Stream::registerStreamFilter(
328
    std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
329
0
{
330
0
    filter_factories[filter_name] = factory;
331
0
    filter_factories_registered = true;
332
0
}
333
334
JSON
335
Stream::getStreamJSON(
336
    int json_version,
337
    qpdf_json_stream_data_e json_data,
338
    qpdf_stream_decode_level_e decode_level,
339
    Pipeline* p,
340
    std::string const& data_filename)
341
0
{
342
0
    Pl_Buffer pb{"streamjson"};
343
0
    JSON::Writer jw{&pb, 0};
344
0
    decode_level =
345
0
        writeStreamJSON(json_version, jw, json_data, decode_level, p, data_filename, true);
346
0
    pb.finish();
347
0
    auto result = JSON::parse(pb.getString());
348
0
    if (json_data == qpdf_sj_inline) {
349
0
        result.addDictionaryMember("data", JSON::makeBlob(StreamBlobProvider(*this, decode_level)));
350
0
    }
351
0
    return result;
352
0
}
353
354
qpdf_stream_decode_level_e
355
Stream::writeStreamJSON(
356
    int json_version,
357
    JSON::Writer& jw,
358
    qpdf_json_stream_data_e json_data,
359
    qpdf_stream_decode_level_e decode_level,
360
    Pipeline* p,
361
    std::string const& data_filename,
362
    bool no_data_key)
363
0
{
364
0
    auto s = stream();
365
0
    switch (json_data) {
366
0
    case qpdf_sj_none:
367
0
    case qpdf_sj_inline:
368
0
        if (p != nullptr) {
369
0
            throw std::logic_error(
370
0
                "QPDF_Stream::writeStreamJSON: pipeline should only be supplied "
371
0
                "when json_data is file");
372
0
        }
373
0
        break;
374
0
    case qpdf_sj_file:
375
0
        if (p == nullptr) {
376
0
            throw std::logic_error(
377
0
                "QPDF_Stream::writeStreamJSON: pipeline must be supplied when json_data is file");
378
0
        }
379
0
        if (data_filename.empty()) {
380
0
            throw std::logic_error(
381
0
                "QPDF_Stream::writeStreamJSON: data_filename must be supplied "
382
0
                "when json_data is file");
383
0
        }
384
0
        break;
385
0
    }
386
387
0
    jw.writeStart('{');
388
389
0
    if (json_data == qpdf_sj_none) {
390
0
        jw.writeNext();
391
0
        jw << R"("dict": )";
392
0
        s->stream_dict.writeJSON(json_version, jw);
393
0
        jw.writeEnd('}');
394
0
        return decode_level;
395
0
    }
396
397
0
    Pl_Discard discard;
398
0
    Pl_Buffer buf_pl{"stream data"};
399
0
    Pipeline* data_pipeline = &buf_pl;
400
0
    if (no_data_key && json_data == qpdf_sj_inline) {
401
0
        data_pipeline = &discard;
402
0
    }
403
    // pipeStreamData produced valid data.
404
0
    bool buf_pl_ready = false;
405
0
    bool filtered = false;
406
0
    bool filter = (decode_level != qpdf_dl_none);
407
0
    for (int attempt = 1; attempt <= 2; ++attempt) {
408
0
        bool succeeded =
409
0
            pipeStreamData(data_pipeline, &filtered, 0, decode_level, false, (attempt == 1));
410
0
        if (!succeeded || (filter && !filtered)) {
411
            // Try again
412
0
            filter = false;
413
0
            decode_level = qpdf_dl_none;
414
0
            buf_pl.getString(); // reset buf_pl
415
0
        } else {
416
0
            buf_pl_ready = true;
417
0
            break;
418
0
        }
419
0
    }
420
0
    if (!buf_pl_ready) {
421
0
        throw std::logic_error("QPDF_Stream: failed to get stream data");
422
0
    }
423
    // We can use unsafeShallowCopy because we are only touching top-level keys.
424
0
    auto dict = s->stream_dict.unsafeShallowCopy();
425
0
    dict.removeKey("/Length");
426
0
    if (filter && filtered) {
427
0
        dict.removeKey("/Filter");
428
0
        dict.removeKey("/DecodeParms");
429
0
    }
430
0
    if (json_data == qpdf_sj_file) {
431
0
        jw.writeNext() << R"("datafile": ")" << JSON::Writer::encode_string(data_filename) << "\"";
432
0
        p->writeString(buf_pl.getString());
433
0
    } else if (json_data == qpdf_sj_inline) {
434
0
        if (!no_data_key) {
435
0
            jw.writeNext() << R"("data": ")";
436
0
            jw.writeBase64(buf_pl.getString()) << "\"";
437
0
        }
438
0
    } else {
439
0
        throw std::logic_error("QPDF_Stream::writeStreamJSON : unexpected value of json_data");
440
0
    }
441
442
0
    jw.writeNext() << R"("dict": )";
443
0
    dict.writeJSON(json_version, jw);
444
0
    jw.writeEnd('}');
445
446
0
    return decode_level;
447
0
}
448
449
void
450
qpdf::Stream::setDictDescription()
451
18.9k
{
452
18.9k
    auto s = stream();
453
18.9k
    if (!s->stream_dict.hasObjectDescription()) {
454
0
        s->stream_dict.setObjectDescription(
455
0
            obj->getQPDF(), obj->getDescription() + " -> stream dictionary");
456
0
    }
457
18.9k
}
458
459
std::string
460
Stream::getStreamData(qpdf_stream_decode_level_e decode_level)
461
9.26k
{
462
9.26k
    std::string result;
463
9.26k
    pl::String buf(result);
464
9.26k
    bool filtered;
465
9.26k
    pipeStreamData(&buf, &filtered, 0, decode_level, false, false);
466
9.26k
    if (!filtered) {
467
2.08k
        throw QPDFExc(
468
2.08k
            qpdf_e_unsupported,
469
2.08k
            qpdf()->getFilename(),
470
2.08k
            "",
471
2.08k
            offset(),
472
2.08k
            "getStreamData called on unfilterable stream");
473
2.08k
    }
474
7.18k
    return result;
475
9.26k
}
476
477
std::string
478
Stream::getRawStreamData()
479
0
{
480
0
    std::string result;
481
0
    pl::String buf(result);
482
0
    if (!pipeStreamData(&buf, nullptr, 0, qpdf_dl_none, false, false)) {
483
0
        throw QPDFExc(
484
0
            qpdf_e_unsupported,
485
0
            qpdf()->getFilename(),
486
0
            "",
487
0
            offset(),
488
0
            "error getting raw stream data");
489
0
    }
490
0
    return result;
491
0
}
492
493
bool
494
Stream::isRootMetadata() const
495
9.21k
{
496
9.21k
    if (!stream()->stream_dict.isDictionaryOfType("/Metadata", "/XML")) {
497
9.19k
        return false;
498
9.19k
    }
499
20
    return qpdf()->getRoot()["/Metadata"].isSameObjectAs(obj);
500
9.21k
}
501
502
bool
503
Stream::filterable(
504
    qpdf_stream_decode_level_e decode_level,
505
    std::vector<std::shared_ptr<QPDFStreamFilter>>& filters)
506
9.26k
{
507
9.26k
    auto s = stream();
508
    // Check filters
509
510
9.26k
    auto const& filter_obj = s->stream_dict["/Filter"];
511
512
9.26k
    if (filter_obj.null()) {
513
        // No filters
514
3.02k
        return true;
515
3.02k
    }
516
6.24k
    if (filter_obj.isName()) {
517
        // One filter
518
4.76k
        auto ff = s->filter_factory(filter_obj.getName());
519
4.76k
        if (!ff) {
520
113
            return false;
521
113
        }
522
4.65k
        filters.emplace_back(ff());
523
4.65k
    } else if (Array array = filter_obj) {
524
        // Potentially multiple filters
525
32.5k
        for (Name item: array) {
526
32.5k
            if (!item) {
527
24
                warn("stream filter type is not name or array");
528
24
                return false;
529
24
            }
530
32.5k
            auto ff = s->filter_factory(item);
531
32.5k
            if (!ff) {
532
150
                filters.clear();
533
150
                return false;
534
150
            }
535
32.3k
            filters.emplace_back(ff());
536
32.3k
        }
537
1.47k
    } else {
538
7
        warn("stream filter type is not name or array");
539
7
        return false;
540
7
    }
541
542
    // filters now contains a list of filters to be applied in order. See which ones we can support.
543
    // See if we can support any decode parameters that are specified.
544
545
5.95k
    auto decode_obj = s->stream_dict.getKey("/DecodeParms");
546
547
5.95k
    auto can_filter = // linebreak
548
33.6k
        [](auto d_level, auto& filter, auto& d_obj) -> bool {
549
33.6k
        if (!filter.setDecodeParms(d_obj) ||
550
33.5k
            (d_level < qpdf_dl_all && filter.isLossyCompression()) ||
551
33.5k
            (d_level < qpdf_dl_specialized && filter.isSpecializedCompression())) {
552
143
            return false;
553
143
        }
554
33.5k
        return true;
555
33.6k
    };
556
557
5.95k
    auto decode_array = decode_obj.as_array(strict);
558
5.95k
    if (!decode_array || decode_array.size() == 0) {
559
5.91k
        if (decode_array) {
560
4
            decode_obj = QPDFObjectHandle::newNull();
561
4
        }
562
563
33.4k
        for (auto& filter: filters) {
564
33.4k
            if (!can_filter(decode_level, *filter, decode_obj)) {
565
139
                return false;
566
139
            }
567
33.4k
        }
568
5.91k
    } else {
569
        // Ignore /DecodeParms entirely if /Filters is empty.  At least one case of a file whose
570
        // /DecodeParms was [ << >> ] when /Filters was empty has been seen in the wild.
571
33
        if (!filters.empty() && QIntC::to_size(decode_array.size()) != filters.size()) {
572
8
            warn("stream /DecodeParms length is inconsistent with filters");
573
8
            return false;
574
8
        }
575
576
25
        int i = -1;
577
207
        for (auto& filter: filters) {
578
207
            auto d_obj = decode_array.get(++i);
579
207
            if (!can_filter(decode_level, *filter, d_obj)) {
580
4
                return false;
581
4
            }
582
207
        }
583
25
    }
584
585
5.80k
    return true;
586
5.95k
}
587
588
bool
589
Stream::pipeStreamData(
590
    Pipeline* pipeline,
591
    bool* filterp,
592
    int encode_flags,
593
    qpdf_stream_decode_level_e decode_level,
594
    bool suppress_warnings,
595
    bool will_retry)
596
9.26k
{
597
9.26k
    auto s = stream();
598
9.26k
    std::vector<std::shared_ptr<QPDFStreamFilter>> filters;
599
9.26k
    bool ignored;
600
9.26k
    if (!filterp) {
601
0
        filterp = &ignored;
602
0
    }
603
9.26k
    bool& filter = *filterp;
604
605
9.26k
    const bool empty_stream = !s->stream_provider && !s->stream_data && s->length == 0;
606
9.26k
    const bool empty_stream_data = s->stream_data && s->stream_data->getSize() == 0;
607
9.26k
    const bool empty = empty_stream || empty_stream_data;
608
609
9.26k
    if (empty_stream || empty_stream_data) {
610
537
        filter = true;
611
537
    }
612
613
9.26k
    filter = empty || encode_flags || decode_level != qpdf_dl_none;
614
9.26k
    if (filter) {
615
9.26k
        filter = filterable(decode_level, filters);
616
9.26k
    }
617
618
9.26k
    if (!pipeline) {
619
0
        QTC::TC("qpdf", "QPDF_Stream pipeStreamData with null pipeline");
620
        // Return value is whether we can filter in this case.
621
0
        return filter;
622
0
    }
623
624
    // Construct the pipeline in reverse order. Force pipelines we create to be deleted when this
625
    // function finishes. Pipelines created by QPDFStreamFilter objects will be deleted by those
626
    // objects.
627
9.26k
    std::vector<std::unique_ptr<Pipeline>> to_delete;
628
629
9.26k
    ContentNormalizer normalizer;
630
9.26k
    if (filter) {
631
8.81k
        if (encode_flags & qpdf_ef_compress) {
632
0
            auto new_pipeline =
633
0
                std::make_unique<Pl_Flate>("compress stream", pipeline, Pl_Flate::a_deflate);
634
0
            pipeline = new_pipeline.get();
635
0
            to_delete.push_back(std::move(new_pipeline));
636
0
        }
637
638
8.81k
        if (encode_flags & qpdf_ef_normalize) {
639
0
            auto new_pipeline =
640
0
                std::make_unique<Pl_QPDFTokenizer>("normalizer", &normalizer, pipeline);
641
0
            pipeline = new_pipeline.get();
642
0
            to_delete.push_back(std::move(new_pipeline));
643
0
        }
644
645
8.81k
        for (auto iter = s->token_filters.rbegin(); iter != s->token_filters.rend(); ++iter) {
646
0
            auto new_pipeline =
647
0
                std::make_unique<Pl_QPDFTokenizer>("token filter", (*iter).get(), pipeline);
648
0
            pipeline = new_pipeline.get();
649
0
            to_delete.push_back(std::move(new_pipeline));
650
0
        }
651
652
42.3k
        for (auto f_iter = filters.rbegin(); f_iter != filters.rend(); ++f_iter) {
653
33.5k
            if (auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline)) {
654
33.2k
                pipeline = decode_pipeline;
655
33.2k
            }
656
33.5k
            auto* flate = dynamic_cast<Pl_Flate*>(pipeline);
657
33.5k
            if (flate) {
658
5.38k
                flate->setWarnCallback([this](char const* msg, int code) { warn(msg); });
659
5.38k
            }
660
33.5k
        }
661
8.81k
    }
662
663
9.26k
    if (s->stream_data.get()) {
664
0
        QTC::TC("qpdf", "QPDF_Stream pipe replaced stream data");
665
0
        pipeline->write(s->stream_data->getBuffer(), s->stream_data->getSize());
666
0
        pipeline->finish();
667
9.26k
    } else if (s->stream_provider.get()) {
668
0
        Pl_Count count("stream provider count", pipeline);
669
0
        if (s->stream_provider->supportsRetry()) {
670
0
            if (!s->stream_provider->provideStreamData(
671
0
                    obj->getObjGen(), &count, suppress_warnings, will_retry)) {
672
0
                filter = false;
673
0
                return false;
674
0
            }
675
0
        } else {
676
0
            s->stream_provider->provideStreamData(obj->getObjGen(), &count);
677
0
        }
678
0
        qpdf_offset_t actual_length = count.getCount();
679
0
        if (s->stream_dict.hasKey("/Length")) {
680
0
            auto desired_length = s->stream_dict.getKey("/Length").getIntValue();
681
0
            if (actual_length != desired_length) {
682
0
                QTC::TC("qpdf", "QPDF_Stream provider length mismatch");
683
                // This would be caused by programmer error on the part of a library user, not by
684
                // invalid input data.
685
0
                throw std::runtime_error(
686
0
                    "stream data provider for " + obj->getObjGen().unparse(' ') + " provided " +
687
0
                    std::to_string(actual_length) + " bytes instead of expected " +
688
0
                    std::to_string(desired_length) + " bytes");
689
0
            }
690
0
        } else {
691
0
            QTC::TC("qpdf", "QPDF_Stream provider length not provided");
692
0
            s->stream_dict.replaceKey("/Length", QPDFObjectHandle::newInteger(actual_length));
693
0
        }
694
9.26k
    } else {
695
9.26k
        if (offset() == 0) {
696
0
            throw std::logic_error("pipeStreamData called for stream with no data");
697
0
        }
698
9.26k
        if (!Streams::pipeStreamData(
699
9.26k
                qpdf(),
700
9.26k
                id_gen(),
701
9.26k
                offset(),
702
9.26k
                s->length,
703
9.26k
                s->stream_dict,
704
9.26k
                isRootMetadata(),
705
9.26k
                pipeline,
706
9.26k
                suppress_warnings,
707
9.26k
                will_retry)) {
708
1.65k
            filter = false;
709
1.65k
            return false;
710
1.65k
        }
711
9.26k
    }
712
713
7.61k
    if (filter && !suppress_warnings && normalizer.anyBadTokens()) {
714
0
        warn("content normalization encountered bad tokens");
715
0
        if (normalizer.lastTokenWasBad()) {
716
0
            QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize");
717
0
            warn(
718
0
                "normalized content ended with a bad token; you may be able to resolve this by "
719
0
                "coalescing content streams in combination with normalizing content. From the "
720
0
                "command line, specify --coalesce-contents");
721
0
        }
722
0
        warn(
723
0
            "Resulting stream data may be corrupted but is may still useful for manual "
724
0
            "inspection. For more information on this warning, search for content normalization "
725
0
            "in the manual.");
726
0
    }
727
728
7.61k
    return true;
729
9.26k
}
730
731
void
732
Stream::replaceStreamData(
733
    std::string&& data, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms)
734
0
{
735
0
    auto s = stream();
736
0
    s->stream_data = std::make_shared<Buffer>(std::move(data));
737
0
    s->stream_provider = nullptr;
738
0
    replaceFilterData(filter, decode_parms, s->stream_data->getSize());
739
0
}
740
741
void
742
Stream::replaceStreamData(
743
    std::shared_ptr<Buffer> data,
744
    QPDFObjectHandle const& filter,
745
    QPDFObjectHandle const& decode_parms)
746
0
{
747
0
    auto s = stream();
748
0
    s->stream_data = data;
749
0
    s->stream_provider = nullptr;
750
0
    replaceFilterData(filter, decode_parms, data->size());
751
0
}
752
753
void
754
Stream::replaceStreamData(
755
    std::shared_ptr<QPDFObjectHandle::StreamDataProvider> provider,
756
    QPDFObjectHandle const& filter,
757
    QPDFObjectHandle const& decode_parms)
758
0
{
759
0
    auto s = stream();
760
0
    s->stream_provider = provider;
761
0
    s->stream_data = nullptr;
762
0
    replaceFilterData(filter, decode_parms, 0);
763
0
}
764
765
void
766
Stream::replaceFilterData(
767
    QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms, size_t length)
768
0
{
769
0
    auto s = stream();
770
0
    if (filter) {
771
0
        s->stream_dict.replaceKey("/Filter", filter);
772
0
    }
773
0
    if (decode_parms) {
774
0
        s->stream_dict.replaceKey("/DecodeParms", decode_parms);
775
0
    }
776
0
    if (length == 0) {
777
0
        QTC::TC("qpdf", "QPDF_Stream unknown stream length");
778
0
        s->stream_dict.removeKey("/Length");
779
0
    } else {
780
0
        s->stream_dict.replaceKey(
781
0
            "/Length", QPDFObjectHandle::newInteger(QIntC::to_longlong(length)));
782
0
    }
783
0
}
784
785
void
786
Stream::warn(std::string const& message)
787
2.11k
{
788
2.11k
    qpdf()->warn(qpdf_e_damaged_pdf, "", offset(), message);
789
2.11k
}
790
791
QPDFObjectHandle
792
QPDFObjectHandle::getDict() const
793
39.3k
{
794
39.3k
    return as_stream(error).getDict();
795
39.3k
}
796
797
void
798
QPDFObjectHandle::setFilterOnWrite(bool val)
799
0
{
800
0
    as_stream(error).setFilterOnWrite(val);
801
0
}
802
803
bool
804
QPDFObjectHandle::getFilterOnWrite()
805
0
{
806
0
    return as_stream(error).getFilterOnWrite();
807
0
}
808
809
bool
810
QPDFObjectHandle::isDataModified()
811
0
{
812
0
    return as_stream(error).isDataModified();
813
0
}
814
815
void
816
QPDFObjectHandle::replaceDict(QPDFObjectHandle const& new_dict)
817
0
{
818
0
    as_stream(error).replaceDict(new_dict);
819
0
}
820
821
bool
822
QPDFObjectHandle::isRootMetadata() const
823
0
{
824
0
    return as_stream(error).isRootMetadata();
825
0
}
826
827
std::shared_ptr<Buffer>
828
QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level)
829
7.35k
{
830
7.35k
    return std::make_shared<Buffer>(as_stream(error).getStreamData(level));
831
7.35k
}
832
833
std::shared_ptr<Buffer>
834
QPDFObjectHandle::getRawStreamData()
835
0
{
836
0
    return std::make_shared<Buffer>(as_stream(error).getRawStreamData());
837
0
}
838
839
bool
840
QPDFObjectHandle::pipeStreamData(
841
    Pipeline* p,
842
    bool* filtering_attempted,
843
    int encode_flags,
844
    qpdf_stream_decode_level_e decode_level,
845
    bool suppress_warnings,
846
    bool will_retry)
847
0
{
848
0
    return as_stream(error).pipeStreamData(
849
0
        p, filtering_attempted, encode_flags, decode_level, suppress_warnings, will_retry);
850
0
}
851
852
bool
853
QPDFObjectHandle::pipeStreamData(
854
    Pipeline* p,
855
    int encode_flags,
856
    qpdf_stream_decode_level_e decode_level,
857
    bool suppress_warnings,
858
    bool will_retry)
859
0
{
860
0
    bool filtering_attempted;
861
0
    as_stream(error).pipeStreamData(
862
0
        p, &filtering_attempted, encode_flags, decode_level, suppress_warnings, will_retry);
863
0
    return filtering_attempted;
864
0
}
865
866
bool
867
QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter, bool normalize, bool compress)
868
0
{
869
0
    int encode_flags = 0;
870
0
    qpdf_stream_decode_level_e decode_level = qpdf_dl_none;
871
0
    if (filter) {
872
0
        decode_level = qpdf_dl_generalized;
873
0
        if (normalize) {
874
0
            encode_flags |= qpdf_ef_normalize;
875
0
        }
876
0
        if (compress) {
877
0
            encode_flags |= qpdf_ef_compress;
878
0
        }
879
0
    }
880
0
    return pipeStreamData(p, encode_flags, decode_level, false);
881
0
}
882
883
void
884
QPDFObjectHandle::replaceStreamData(
885
    std::shared_ptr<Buffer> data,
886
    QPDFObjectHandle const& filter,
887
    QPDFObjectHandle const& decode_parms)
888
0
{
889
0
    as_stream(error).replaceStreamData(data, filter, decode_parms);
890
0
}
891
892
void
893
QPDFObjectHandle::replaceStreamData(
894
    std::string const& data, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms)
895
0
{
896
0
    std::string s(data);
897
0
    as_stream(error).replaceStreamData(std::move(s), filter, decode_parms);
898
0
}
899
900
void
901
QPDFObjectHandle::replaceStreamData(
902
    std::shared_ptr<StreamDataProvider> provider,
903
    QPDFObjectHandle const& filter,
904
    QPDFObjectHandle const& decode_parms)
905
0
{
906
0
    as_stream(error).replaceStreamData(provider, filter, decode_parms);
907
0
}
908
909
namespace
910
{
911
    class FunctionProvider: public QPDFObjectHandle::StreamDataProvider
912
    {
913
      public:
914
        FunctionProvider(std::function<void(Pipeline*)> provider) :
915
0
            StreamDataProvider(false),
916
0
            p1(provider),
917
0
            p2(nullptr)
918
0
        {
919
0
        }
920
        FunctionProvider(std::function<bool(Pipeline*, bool, bool)> provider) :
921
0
            StreamDataProvider(true),
922
0
            p1(nullptr),
923
0
            p2(provider)
924
0
        {
925
0
        }
926
927
        void
928
        provideStreamData(QPDFObjGen const&, Pipeline* pipeline) override
929
0
        {
930
0
            p1(pipeline);
931
0
        }
932
933
        bool
934
        provideStreamData(
935
            QPDFObjGen const&, Pipeline* pipeline, bool suppress_warnings, bool will_retry) override
936
0
        {
937
0
            return p2(pipeline, suppress_warnings, will_retry);
938
0
        }
939
940
      private:
941
        std::function<void(Pipeline*)> p1;
942
        std::function<bool(Pipeline*, bool, bool)> p2;
943
    };
944
} // namespace
945
946
void
947
QPDFObjectHandle::replaceStreamData(
948
    std::function<void(Pipeline*)> provider,
949
    QPDFObjectHandle const& filter,
950
    QPDFObjectHandle const& decode_parms)
951
0
{
952
0
    auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider));
953
0
    as_stream(error).replaceStreamData(sdp, filter, decode_parms);
954
0
}
955
956
void
957
QPDFObjectHandle::replaceStreamData(
958
    std::function<bool(Pipeline*, bool, bool)> provider,
959
    QPDFObjectHandle const& filter,
960
    QPDFObjectHandle const& decode_parms)
961
0
{
962
0
    auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider));
963
0
    as_stream(error).replaceStreamData(sdp, filter, decode_parms);
964
0
}
965
966
JSON
967
QPDFObjectHandle::getStreamJSON(
968
    int json_version,
969
    qpdf_json_stream_data_e json_data,
970
    qpdf_stream_decode_level_e decode_level,
971
    Pipeline* p,
972
    std::string const& data_filename)
973
0
{
974
0
    return as_stream(error).getStreamJSON(json_version, json_data, decode_level, p, data_filename);
975
0
}
976
977
QPDFObjectHandle
978
QPDFObjectHandle::copyStream()
979
0
{
980
0
    return as_stream(error).copy();
981
0
}