Coverage Report

Created: 2025-10-10 06:19

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDF_Stream.cc
Line
Count
Source
1
#include <qpdf/QPDFObjectHandle_private.hh>
2
3
#include <qpdf/ContentNormalizer.hh>
4
#include <qpdf/JSON_writer.hh>
5
#include <qpdf/Pipeline.hh>
6
#include <qpdf/Pipeline_private.hh>
7
#include <qpdf/Pl_Buffer.hh>
8
#include <qpdf/Pl_Count.hh>
9
#include <qpdf/Pl_Discard.hh>
10
#include <qpdf/Pl_Flate.hh>
11
#include <qpdf/Pl_QPDFTokenizer.hh>
12
#include <qpdf/QIntC.hh>
13
#include <qpdf/QPDFExc.hh>
14
#include <qpdf/QPDF_private.hh>
15
#include <qpdf/QTC.hh>
16
#include <qpdf/QUtil.hh>
17
#include <qpdf/SF_ASCII85Decode.hh>
18
#include <qpdf/SF_ASCIIHexDecode.hh>
19
#include <qpdf/SF_DCTDecode.hh>
20
#include <qpdf/SF_FlateLzwDecode.hh>
21
#include <qpdf/SF_RunLengthDecode.hh>
22
23
#include <stdexcept>
24
25
using namespace std::literals;
26
using namespace qpdf;
27
28
using Streams = QPDF::Doc::Objects::Streams;
29
30
bool
31
Streams::immediate_copy_from() const
32
0
{
33
0
    return qpdf_.m->immediate_copy_from;
34
0
}
35
36
class Streams::Copier final: public QPDFObjectHandle::StreamDataProvider
37
{
38
    class Data
39
    {
40
        friend class Streams;
41
42
      public:
43
        Data(Stream& source, Dictionary const& dest_dict) :
44
0
            encp(source.qpdf()->m->encp),
45
0
            file(source.qpdf()->m->file),
46
0
            source_og(source.id_gen()),
47
0
            offset(source.offset()),
48
0
            length(source.getLength()),
49
0
            dest_dict(dest_dict),
50
0
            is_root_metadata(source.isRootMetadata())
51
0
        {
52
0
        }
53
54
      private:
55
        std::shared_ptr<EncryptionParameters> encp;
56
        std::shared_ptr<InputSource> file;
57
        QPDFObjGen source_og;
58
        qpdf_offset_t offset;
59
        size_t length;
60
        QPDFObjectHandle dest_dict;
61
        bool is_root_metadata{false};
62
    };
63
64
  public:
65
    Copier() = delete;
66
    Copier(StreamDataProvider const&) = delete;
67
    Copier(StreamDataProvider&&) = delete;
68
    Copier& operator=(StreamDataProvider const&) = delete;
69
    Copier& operator=(StreamDataProvider&&) = delete;
70
24.3k
    ~Copier() final = default;
71
72
    Copier(Streams& streams) :
73
24.3k
        QPDFObjectHandle::StreamDataProvider(true),
74
24.3k
        streams(streams)
75
24.3k
    {
76
24.3k
    }
77
78
    bool
79
    provideStreamData(
80
        QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) final
81
0
    {
82
0
        auto data = copied_data.find(og);
83
0
        if (data != copied_data.end()) {
84
0
            auto& fd = data->second;
85
0
            QTC::TC("qpdf", "QPDF pipe foreign encrypted stream", fd.encp->encrypted ? 0 : 1);
86
0
            if (streams.qpdf().pipeStreamData(
87
0
                    fd.encp,
88
0
                    fd.file,
89
0
                    streams.qpdf(),
90
0
                    fd.source_og,
91
0
                    fd.offset,
92
0
                    fd.length,
93
0
                    fd.dest_dict,
94
0
                    fd.is_root_metadata,
95
0
                    pipeline,
96
0
                    suppress_warnings,
97
0
                    will_retry)) {
98
0
                return true; // for CI coverage
99
0
            } else {
100
0
                return false;
101
0
            }
102
0
        }
103
0
        auto stream = copied_streams.find(og);
104
0
        qpdf_invariant(stream == copied_streams.end() || stream->second);
105
0
        if (stream != copied_streams.end() &&
106
0
            stream->second.pipeStreamData(
107
0
                pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry)) {
108
0
            return true; // for CI coverage
109
0
        }
110
0
        return false;
111
0
    }
112
113
    void
114
    register_copy(Stream& dest, Stream& source, bool provider)
115
0
    {
116
0
        qpdf_expect(source);
117
0
        qpdf_expect(dest);
118
0
        if (provider) {
119
0
            copied_streams.insert_or_assign(dest, source);
120
0
        } else {
121
0
            copied_data.insert_or_assign(dest, Data(source, dest.getDict()));
122
0
        }
123
0
    }
124
125
  private:
126
    Streams& streams;
127
    std::map<QPDFObjGen, Stream> copied_streams;
128
    std::map<QPDFObjGen, Data> copied_data;
129
};
130
131
Streams::Streams(QPDF& qpdf) :
132
24.3k
    qpdf_(qpdf),
133
24.3k
    copier_(std::make_shared<Copier>(*this))
134
24.3k
{
135
24.3k
}
136
137
namespace
138
{
139
    class SF_Crypt final: public QPDFStreamFilter
140
    {
141
      public:
142
2.00k
        SF_Crypt() = default;
143
        ~SF_Crypt() final = default;
144
145
        bool
146
        setDecodeParms(QPDFObjectHandle decode_parms) final
147
1.21k
        {
148
            // we only validate here - processing happens in decryptStream
149
1.21k
            if (Dictionary dict = decode_parms) {
150
642
                for (auto const& [key, value]: dict) {
151
642
                    if (key == "/Type" &&
152
45
                        (value.null() || Name(value) == "/CryptFilterDecodeParms")) {
153
10
                        continue;
154
10
                    }
155
632
                    if (key == "/Name") {
156
10
                        continue;
157
10
                    }
158
622
                    if (!value.null()) {
159
93
                        return false;
160
93
                    }
161
622
                }
162
82
                return true;
163
175
            }
164
1.04k
            return decode_parms.null();
165
1.21k
        }
166
167
        Pipeline*
168
        getDecodePipeline(Pipeline*) final
169
1.04k
        {
170
            // Not used -- handled by pipeStreamData
171
1.04k
            return nullptr;
172
1.04k
        }
173
    };
174
175
    class StreamBlobProvider
176
    {
177
      public:
178
        StreamBlobProvider(Stream stream, qpdf_stream_decode_level_e decode_level) :
179
0
            stream(stream),
180
0
            decode_level(decode_level)
181
0
        {
182
0
        }
183
        void
184
        operator()(Pipeline* p)
185
0
        {
186
0
            stream.pipeStreamData(p, nullptr, 0, decode_level, false, false);
187
0
        }
188
189
      private:
190
        Stream stream;
191
        qpdf_stream_decode_level_e decode_level;
192
    };
193
194
    /// User defined streamfilter factories
195
    std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>> filter_factories;
196
    bool filter_factories_registered = false;
197
} // namespace
198
199
std::string
200
QPDF_Stream::Members::expand_filter_name(std::string const& name) const
201
0
{
202
    // The PDF specification provides these filter abbreviations for use in inline images, but
203
    // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also
204
    // accepts them for stream filters.
205
0
    if (name == "/AHx") {
206
0
        return "/ASCIIHexDecode";
207
0
    }
208
0
    if (name == "/A85") {
209
0
        return "/ASCII85Decode";
210
0
    }
211
0
    if (name == "/LZW") {
212
0
        return "/LZWDecode";
213
0
    }
214
0
    if (name == "/Fl") {
215
0
        return "/FlateDecode";
216
0
    }
217
0
    if (name == "/RL") {
218
0
        return "/RunLengthDecode";
219
0
    }
220
0
    if (name == "/CCF") {
221
0
        return "/CCITTFaxDecode";
222
0
    }
223
0
    if (name == "/DCT") {
224
0
        return "/DCTDecode";
225
0
    }
226
0
    return name;
227
0
};
228
229
std::function<std::shared_ptr<QPDFStreamFilter>()>
230
QPDF_Stream::Members::filter_factory(std::string const& name) const
231
47.2k
{
232
47.2k
    if (filter_factories_registered) [[unlikely]] {
233
        // We need to check user provided filters first as we allow users to replace qpdf provided
234
        // default filters. This will have a performance impact if the facility to register stream
235
        // filters is actually used. We can optimize this away if necessary.
236
0
        auto ff = filter_factories.find(expand_filter_name(name));
237
0
        if (ff != filter_factories.end()) {
238
0
            return ff->second;
239
0
        }
240
0
    }
241
47.2k
    if (name == "/FlateDecode") {
242
13.7k
        return SF_FlateLzwDecode::flate_factory;
243
13.7k
    }
244
33.4k
    if (name == "/Crypt") {
245
2.00k
        return []() { return std::make_shared<SF_Crypt>(); };
246
2.00k
    }
247
31.4k
    if (name == "/LZWDecode") {
248
2.20k
        return SF_FlateLzwDecode::lzw_factory;
249
2.20k
    }
250
29.2k
    if (name == "/RunLengthDecode") {
251
73
        return SF_RunLengthDecode::factory;
252
73
    }
253
29.2k
    if (name == "/DCTDecode") {
254
7.65k
        return SF_DCTDecode::factory;
255
7.65k
    }
256
21.5k
    if (name == "/ASCII85Decode") {
257
2.62k
        return SF_ASCII85Decode::factory;
258
2.62k
    }
259
18.9k
    if (name == "/ASCIIHexDecode") {
260
843
        return SF_ASCIIHexDecode::factory;
261
843
    }
262
    // The PDF specification provides these filter abbreviations for use in inline images, but
263
    // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader
264
    // also accepts them for stream filters.
265
266
18.0k
    if (name == "/Fl") {
267
1.70k
        return SF_FlateLzwDecode::flate_factory;
268
1.70k
    }
269
16.3k
    if (name == "/AHx") {
270
3.41k
        return SF_ASCIIHexDecode::factory;
271
3.41k
    }
272
12.9k
    if (name == "/A85") {
273
703
        return SF_ASCII85Decode::factory;
274
703
    }
275
12.2k
    if (name == "/LZW") {
276
1.86k
        return SF_FlateLzwDecode::lzw_factory;
277
1.86k
    }
278
10.3k
    if (name == "/RL") {
279
6.79k
        return SF_RunLengthDecode::factory;
280
6.79k
    }
281
3.60k
    if (name == "/DCT") {
282
1.58k
        return SF_DCTDecode::factory;
283
1.58k
    }
284
2.02k
    return nullptr;
285
3.60k
}
286
287
Stream::Stream(
288
    QPDF& qpdf, QPDFObjGen og, QPDFObjectHandle stream_dict, qpdf_offset_t offset, size_t length) :
289
46.0k
    BaseHandle(QPDFObject::create<QPDF_Stream>(&qpdf, og, std::move(stream_dict), length))
290
46.0k
{
291
46.0k
    auto descr = std::make_shared<QPDFObject::Description>(
292
46.0k
        qpdf.getFilename() + ", stream object " + og.unparse(' '));
293
46.0k
    obj->setDescription(&qpdf, descr, offset);
294
46.0k
    setDictDescription();
295
46.0k
}
296
297
Stream
298
Stream::copy()
299
0
{
300
0
    Stream result = qpdf()->newStream();
301
0
    result.stream()->stream_dict = getDict().copy();
302
0
    copy_data_to(result);
303
0
    return result;
304
0
}
305
306
void
307
Stream::copy_data_to(Stream& dest)
308
0
{
309
0
    qpdf_expect(dest);
310
0
    auto s = stream();
311
0
    auto& streams = qpdf()->doc().objects().streams();
312
0
    auto& d_streams = dest.qpdf()->doc().objects().streams();
313
314
0
    auto dict = dest.getDict();
315
316
    // Copy information from the foreign stream so we can pipe its data later without keeping the
317
    // original QPDF object around.
318
0
    if (streams.immediate_copy_from() && !s->stream_data) {
319
        // Pull the stream data into a buffer before attempting the copy operation. Do it on the
320
        // source stream so that if the source stream is copied multiple times, we don't have to
321
        // keep duplicating the memory.
322
0
        replaceStreamData(
323
0
            getRawStreamData(), s->stream_dict["/Filter"], s->stream_dict["/DecodeParms"]);
324
0
    }
325
0
    if (s->stream_data) {
326
0
        dest.replaceStreamData(s->stream_data, dict["/Filter"], dict["/DecodeParms"]);
327
0
    } else {
328
0
        d_streams.copier()->register_copy(dest, *this, s->stream_provider.get());
329
0
        dest.replaceStreamData(d_streams.copier(), dict["/Filter"], dict["/DecodeParms"]);
330
0
    }
331
0
}
332
333
void
334
Stream::registerStreamFilter(
335
    std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
336
0
{
337
0
    filter_factories[filter_name] = factory;
338
0
    filter_factories_registered = true;
339
0
}
340
341
JSON
342
Stream::getStreamJSON(
343
    int json_version,
344
    qpdf_json_stream_data_e json_data,
345
    qpdf_stream_decode_level_e decode_level,
346
    Pipeline* p,
347
    std::string const& data_filename)
348
0
{
349
0
    Pl_Buffer pb{"streamjson"};
350
0
    JSON::Writer jw{&pb, 0};
351
0
    decode_level =
352
0
        writeStreamJSON(json_version, jw, json_data, decode_level, p, data_filename, true);
353
0
    pb.finish();
354
0
    auto result = JSON::parse(pb.getString());
355
0
    if (json_data == qpdf_sj_inline) {
356
0
        result.addDictionaryMember("data", JSON::makeBlob(StreamBlobProvider(*this, decode_level)));
357
0
    }
358
0
    return result;
359
0
}
360
361
qpdf_stream_decode_level_e
362
Stream::writeStreamJSON(
363
    int json_version,
364
    JSON::Writer& jw,
365
    qpdf_json_stream_data_e json_data,
366
    qpdf_stream_decode_level_e decode_level,
367
    Pipeline* p,
368
    std::string const& data_filename,
369
    bool no_data_key)
370
0
{
371
0
    auto s = stream();
372
0
    switch (json_data) {
373
0
    case qpdf_sj_none:
374
0
    case qpdf_sj_inline:
375
0
        if (p != nullptr) {
376
0
            throw std::logic_error(
377
0
                "QPDF_Stream::writeStreamJSON: pipeline should only be supplied "
378
0
                "when json_data is file");
379
0
        }
380
0
        break;
381
0
    case qpdf_sj_file:
382
0
        if (p == nullptr) {
383
0
            throw std::logic_error(
384
0
                "QPDF_Stream::writeStreamJSON: pipeline must be supplied when json_data is file");
385
0
        }
386
0
        if (data_filename.empty()) {
387
0
            throw std::logic_error(
388
0
                "QPDF_Stream::writeStreamJSON: data_filename must be supplied "
389
0
                "when json_data is file");
390
0
        }
391
0
        break;
392
0
    }
393
394
0
    jw.writeStart('{');
395
396
0
    if (json_data == qpdf_sj_none) {
397
0
        jw.writeNext();
398
0
        jw << R"("dict": )";
399
0
        s->stream_dict.writeJSON(json_version, jw);
400
0
        jw.writeEnd('}');
401
0
        return decode_level;
402
0
    }
403
404
0
    Pl_Discard discard;
405
0
    Pl_Buffer buf_pl{"stream data"};
406
0
    Pipeline* data_pipeline = &buf_pl;
407
0
    if (no_data_key && json_data == qpdf_sj_inline) {
408
0
        data_pipeline = &discard;
409
0
    }
410
    // pipeStreamData produced valid data.
411
0
    bool buf_pl_ready = false;
412
0
    bool filtered = false;
413
0
    bool filter = (decode_level != qpdf_dl_none);
414
0
    for (int attempt = 1; attempt <= 2; ++attempt) {
415
0
        bool succeeded =
416
0
            pipeStreamData(data_pipeline, &filtered, 0, decode_level, false, (attempt == 1));
417
0
        if (!succeeded || (filter && !filtered)) {
418
            // Try again
419
0
            filter = false;
420
0
            decode_level = qpdf_dl_none;
421
0
            buf_pl.getString(); // reset buf_pl
422
0
        } else {
423
0
            buf_pl_ready = true;
424
0
            break;
425
0
        }
426
0
    }
427
0
    if (!buf_pl_ready) {
428
0
        throw std::logic_error("QPDF_Stream: failed to get stream data");
429
0
    }
430
    // We can use unsafeShallowCopy because we are only touching top-level keys.
431
0
    auto dict = s->stream_dict.unsafeShallowCopy();
432
0
    dict.removeKey("/Length");
433
0
    if (filter && filtered) {
434
0
        dict.removeKey("/Filter");
435
0
        dict.removeKey("/DecodeParms");
436
0
    }
437
0
    if (json_data == qpdf_sj_file) {
438
0
        jw.writeNext() << R"("datafile": ")" << JSON::Writer::encode_string(data_filename) << "\"";
439
0
        p->writeString(buf_pl.getString());
440
0
    } else if (json_data == qpdf_sj_inline) {
441
0
        if (!no_data_key) {
442
0
            jw.writeNext() << R"("data": ")";
443
0
            jw.writeBase64(buf_pl.getString()) << "\"";
444
0
        }
445
0
    } else {
446
0
        throw std::logic_error("QPDF_Stream::writeStreamJSON : unexpected value of json_data");
447
0
    }
448
449
0
    jw.writeNext() << R"("dict": )";
450
0
    dict.writeJSON(json_version, jw);
451
0
    jw.writeEnd('}');
452
453
0
    return decode_level;
454
0
}
455
456
void
457
qpdf::Stream::setDictDescription()
458
46.0k
{
459
46.0k
    auto s = stream();
460
46.0k
    if (!s->stream_dict.hasObjectDescription()) {
461
0
        s->stream_dict.setObjectDescription(
462
0
            obj->getQPDF(), obj->getDescription() + " -> stream dictionary");
463
0
    }
464
46.0k
}
465
466
std::string
467
Stream::getStreamData(qpdf_stream_decode_level_e decode_level)
468
7.54k
{
469
7.54k
    std::string result;
470
7.54k
    pl::String buf(result);
471
7.54k
    bool filtered;
472
7.54k
    pipeStreamData(&buf, &filtered, 0, decode_level, false, false);
473
7.54k
    if (!filtered) {
474
1.49k
        throw QPDFExc(
475
1.49k
            qpdf_e_unsupported,
476
1.49k
            qpdf()->getFilename(),
477
1.49k
            "",
478
1.49k
            offset(),
479
1.49k
            "getStreamData called on unfilterable stream");
480
1.49k
    }
481
6.04k
    return result;
482
7.54k
}
483
484
std::string
485
Stream::getRawStreamData()
486
0
{
487
0
    std::string result;
488
0
    pl::String buf(result);
489
0
    if (!pipeStreamData(&buf, nullptr, 0, qpdf_dl_none, false, false)) {
490
0
        throw QPDFExc(
491
0
            qpdf_e_unsupported,
492
0
            qpdf()->getFilename(),
493
0
            "",
494
0
            offset(),
495
0
            "error getting raw stream data");
496
0
    }
497
0
    return result;
498
0
}
499
500
bool
501
Stream::isRootMetadata() const
502
157k
{
503
157k
    if (!stream()->stream_dict.isDictionaryOfType("/Metadata", "/XML")) {
504
156k
        return false;
505
156k
    }
506
933
    return qpdf()->getRoot()["/Metadata"].isSameObjectAs(obj);
507
157k
}
508
509
bool
510
Stream::filterable(
511
    qpdf_stream_decode_level_e decode_level,
512
    std::vector<std::shared_ptr<QPDFStreamFilter>>& filters)
513
60.4k
{
514
60.4k
    auto s = stream();
515
    // Check filters
516
517
60.4k
    auto const& filter_obj = s->stream_dict["/Filter"];
518
519
60.4k
    if (filter_obj.null()) {
520
        // No filters
521
30.8k
        return true;
522
30.8k
    }
523
29.6k
    if (filter_obj.isName()) {
524
        // One filter
525
15.7k
        auto ff = s->filter_factory(filter_obj.getName());
526
15.7k
        if (!ff) {
527
1.05k
            return false;
528
1.05k
        }
529
14.7k
        filters.emplace_back(ff());
530
14.7k
    } else if (Array array = filter_obj) {
531
        // Potentially multiple filters
532
31.5k
        for (Name item: array) {
533
31.5k
            if (!item) {
534
115
                warn("stream filter type is not name or array");
535
115
                return false;
536
115
            }
537
31.4k
            auto ff = s->filter_factory(item);
538
31.4k
            if (!ff) {
539
965
                filters.clear();
540
965
                return false;
541
965
            }
542
30.5k
            filters.emplace_back(ff());
543
30.5k
        }
544
13.7k
    } else {
545
35
        warn("stream filter type is not name or array");
546
35
        return false;
547
35
    }
548
549
    // filters now contains a list of filters to be applied in order. See which ones we can support.
550
    // See if we can support any decode parameters that are specified.
551
552
27.4k
    auto decode_obj = s->stream_dict.getKey("/DecodeParms");
553
554
27.4k
    auto can_filter = // linebreak
555
40.2k
        [](auto d_level, auto& filter, auto& d_obj) -> bool {
556
40.2k
        if (!filter.setDecodeParms(d_obj) ||
557
39.8k
            (d_level < qpdf_dl_all && filter.isLossyCompression()) ||
558
39.8k
            (d_level < qpdf_dl_specialized && filter.isSpecializedCompression())) {
559
398
            return false;
560
398
        }
561
39.8k
        return true;
562
40.2k
    };
563
564
27.4k
    auto decode_array = decode_obj.as_array(strict);
565
27.4k
    if (!decode_array || decode_array.size() == 0) {
566
26.6k
        if (decode_array) {
567
10
            decode_obj = QPDFObjectHandle::newNull();
568
10
        }
569
570
39.4k
        for (auto& filter: filters) {
571
39.4k
            if (!can_filter(decode_level, *filter, decode_obj)) {
572
330
                return false;
573
330
            }
574
39.4k
        }
575
26.6k
    } else {
576
        // Ignore /DecodeParms entirely if /Filters is empty.  At least one case of a file whose
577
        // /DecodeParms was [ << >> ] when /Filters was empty has been seen in the wild.
578
784
        if (!filters.empty() && QIntC::to_size(decode_array.size()) != filters.size()) {
579
78
            warn("stream /DecodeParms length is inconsistent with filters");
580
78
            return false;
581
78
        }
582
583
706
        int i = -1;
584
706
        for (auto& filter: filters) {
585
705
            auto d_obj = decode_array.get(++i);
586
705
            if (!can_filter(decode_level, *filter, d_obj)) {
587
68
                return false;
588
68
            }
589
705
        }
590
706
    }
591
592
26.9k
    return true;
593
27.4k
}
594
595
bool
596
Stream::pipeStreamData(
597
    Pipeline* pipeline,
598
    bool* filterp,
599
    int encode_flags,
600
    qpdf_stream_decode_level_e decode_level,
601
    bool suppress_warnings,
602
    bool will_retry)
603
87.5k
{
604
87.5k
    auto s = stream();
605
87.5k
    std::vector<std::shared_ptr<QPDFStreamFilter>> filters;
606
87.5k
    bool ignored;
607
87.5k
    if (!filterp) {
608
0
        filterp = &ignored;
609
0
    }
610
87.5k
    bool& filter = *filterp;
611
612
87.5k
    const bool empty_stream = !s->stream_provider && !s->stream_data && s->length == 0;
613
87.5k
    const bool empty_stream_data = s->stream_data && s->stream_data->getSize() == 0;
614
87.5k
    const bool empty = empty_stream || empty_stream_data;
615
616
87.5k
    if (empty_stream || empty_stream_data) {
617
3.03k
        filter = true;
618
3.03k
    }
619
620
87.5k
    filter = empty || encode_flags || decode_level != qpdf_dl_none;
621
87.5k
    if (filter) {
622
60.4k
        filter = filterable(decode_level, filters);
623
60.4k
    }
624
625
87.5k
    if (!pipeline) {
626
0
        QTC::TC("qpdf", "QPDF_Stream pipeStreamData with null pipeline");
627
        // Return value is whether we can filter in this case.
628
0
        return filter;
629
0
    }
630
631
    // Construct the pipeline in reverse order. Force pipelines we create to be deleted when this
632
    // function finishes. Pipelines created by QPDFStreamFilter objects will be deleted by those
633
    // objects.
634
87.5k
    std::vector<std::unique_ptr<Pipeline>> to_delete;
635
636
87.5k
    ContentNormalizer normalizer;
637
87.5k
    if (filter) {
638
57.7k
        if (encode_flags & qpdf_ef_compress) {
639
45.8k
            auto new_pipeline =
640
45.8k
                std::make_unique<Pl_Flate>("compress stream", pipeline, Pl_Flate::a_deflate);
641
45.8k
            pipeline = new_pipeline.get();
642
45.8k
            to_delete.push_back(std::move(new_pipeline));
643
45.8k
        }
644
645
57.7k
        if (encode_flags & qpdf_ef_normalize) {
646
0
            auto new_pipeline =
647
0
                std::make_unique<Pl_QPDFTokenizer>("normalizer", &normalizer, pipeline);
648
0
            pipeline = new_pipeline.get();
649
0
            to_delete.push_back(std::move(new_pipeline));
650
0
        }
651
652
57.7k
        for (auto iter = s->token_filters.rbegin(); iter != s->token_filters.rend(); ++iter) {
653
0
            auto new_pipeline =
654
0
                std::make_unique<Pl_QPDFTokenizer>("token filter", (*iter).get(), pipeline);
655
0
            pipeline = new_pipeline.get();
656
0
            to_delete.push_back(std::move(new_pipeline));
657
0
        }
658
659
97.5k
        for (auto f_iter = filters.rbegin(); f_iter != filters.rend(); ++f_iter) {
660
39.7k
            if (auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline)) {
661
38.6k
                pipeline = decode_pipeline;
662
38.6k
            }
663
39.7k
            auto* flate = dynamic_cast<Pl_Flate*>(pipeline);
664
39.7k
            if (flate) {
665
15.8k
                flate->setWarnCallback([this](char const* msg, int code) { warn(msg); });
666
15.8k
            }
667
39.7k
        }
668
57.7k
    }
669
670
87.5k
    if (s->stream_data.get()) {
671
0
        QTC::TC("qpdf", "QPDF_Stream pipe replaced stream data");
672
0
        pipeline->write(s->stream_data->getBuffer(), s->stream_data->getSize());
673
0
        pipeline->finish();
674
87.5k
    } else if (s->stream_provider.get()) {
675
0
        Pl_Count count("stream provider count", pipeline);
676
0
        if (s->stream_provider->supportsRetry()) {
677
0
            if (!s->stream_provider->provideStreamData(
678
0
                    obj->getObjGen(), &count, suppress_warnings, will_retry)) {
679
0
                filter = false;
680
0
                return false;
681
0
            }
682
0
        } else {
683
0
            s->stream_provider->provideStreamData(obj->getObjGen(), &count);
684
0
        }
685
0
        qpdf_offset_t actual_length = count.getCount();
686
0
        if (s->stream_dict.hasKey("/Length")) {
687
0
            auto desired_length = s->stream_dict.getKey("/Length").getIntValue();
688
0
            if (actual_length != desired_length) {
689
0
                QTC::TC("qpdf", "QPDF_Stream provider length mismatch");
690
                // This would be caused by programmer error on the part of a library user, not by
691
                // invalid input data.
692
0
                throw std::runtime_error(
693
0
                    "stream data provider for " + obj->getObjGen().unparse(' ') + " provided " +
694
0
                    std::to_string(actual_length) + " bytes instead of expected " +
695
0
                    std::to_string(desired_length) + " bytes");
696
0
            }
697
0
        } else {
698
0
            QTC::TC("qpdf", "QPDF_Stream provider length not provided");
699
0
            s->stream_dict.replaceKey("/Length", QPDFObjectHandle::newInteger(actual_length));
700
0
        }
701
87.5k
    } else {
702
87.5k
        if (offset() == 0) {
703
0
            throw std::logic_error("pipeStreamData called for stream with no data");
704
0
        }
705
87.5k
        if (!Streams::pipeStreamData(
706
87.5k
                qpdf(),
707
87.5k
                id_gen(),
708
87.5k
                offset(),
709
87.5k
                s->length,
710
87.5k
                s->stream_dict,
711
87.5k
                isRootMetadata(),
712
87.5k
                pipeline,
713
87.5k
                suppress_warnings,
714
87.5k
                will_retry)) {
715
9.72k
            filter = false;
716
9.72k
            return false;
717
9.72k
        }
718
87.5k
    }
719
720
77.8k
    if (filter && !suppress_warnings && normalizer.anyBadTokens()) {
721
0
        warn("content normalization encountered bad tokens");
722
0
        if (normalizer.lastTokenWasBad()) {
723
0
            QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize");
724
0
            warn(
725
0
                "normalized content ended with a bad token; you may be able to resolve this by "
726
0
                "coalescing content streams in combination with normalizing content. From the "
727
0
                "command line, specify --coalesce-contents");
728
0
        }
729
0
        warn(
730
0
            "Resulting stream data may be corrupted but is may still useful for manual "
731
0
            "inspection. For more information on this warning, search for content normalization "
732
0
            "in the manual.");
733
0
    }
734
735
77.8k
    return true;
736
87.5k
}
737
738
void
739
Stream::replaceStreamData(
740
    std::string&& data, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms)
741
0
{
742
0
    auto s = stream();
743
0
    s->stream_data = std::make_shared<Buffer>(std::move(data));
744
0
    s->stream_provider = nullptr;
745
0
    replaceFilterData(filter, decode_parms, s->stream_data->getSize());
746
0
}
747
748
void
749
Stream::replaceStreamData(
750
    std::shared_ptr<Buffer> data,
751
    QPDFObjectHandle const& filter,
752
    QPDFObjectHandle const& decode_parms)
753
0
{
754
0
    auto s = stream();
755
0
    s->stream_data = data;
756
0
    s->stream_provider = nullptr;
757
0
    replaceFilterData(filter, decode_parms, data->size());
758
0
}
759
760
void
761
Stream::replaceStreamData(
762
    std::shared_ptr<QPDFObjectHandle::StreamDataProvider> provider,
763
    QPDFObjectHandle const& filter,
764
    QPDFObjectHandle const& decode_parms)
765
0
{
766
0
    auto s = stream();
767
0
    s->stream_provider = provider;
768
0
    s->stream_data = nullptr;
769
0
    replaceFilterData(filter, decode_parms, 0);
770
0
}
771
772
void
773
Stream::replaceFilterData(
774
    QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms, size_t length)
775
0
{
776
0
    auto s = stream();
777
0
    if (filter) {
778
0
        s->stream_dict.replaceKey("/Filter", filter);
779
0
    }
780
0
    if (decode_parms) {
781
0
        s->stream_dict.replaceKey("/DecodeParms", decode_parms);
782
0
    }
783
0
    if (length == 0) {
784
0
        QTC::TC("qpdf", "QPDF_Stream unknown stream length");
785
0
        s->stream_dict.removeKey("/Length");
786
0
    } else {
787
0
        s->stream_dict.replaceKey(
788
0
            "/Length", QPDFObjectHandle::newInteger(QIntC::to_longlong(length)));
789
0
    }
790
0
}
791
792
void
793
Stream::warn(std::string const& message)
794
3.19k
{
795
3.19k
    qpdf()->warn(qpdf_e_damaged_pdf, "", offset(), message);
796
3.19k
}
797
798
QPDFObjectHandle
799
QPDFObjectHandle::getDict() const
800
268k
{
801
268k
    return as_stream(error).getDict();
802
268k
}
803
804
void
805
QPDFObjectHandle::setFilterOnWrite(bool val)
806
10.0k
{
807
10.0k
    as_stream(error).setFilterOnWrite(val);
808
10.0k
}
809
810
bool
811
QPDFObjectHandle::getFilterOnWrite()
812
70.0k
{
813
70.0k
    return as_stream(error).getFilterOnWrite();
814
70.0k
}
815
816
bool
817
QPDFObjectHandle::isDataModified()
818
75.0k
{
819
75.0k
    return as_stream(error).isDataModified();
820
75.0k
}
821
822
void
823
QPDFObjectHandle::replaceDict(QPDFObjectHandle const& new_dict)
824
0
{
825
0
    as_stream(error).replaceDict(new_dict);
826
0
}
827
828
bool
829
QPDFObjectHandle::isRootMetadata() const
830
70.0k
{
831
70.0k
    return as_stream(error).isRootMetadata();
832
70.0k
}
833
834
std::shared_ptr<Buffer>
835
QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level)
836
5.98k
{
837
5.98k
    return std::make_shared<Buffer>(as_stream(error).getStreamData(level));
838
5.98k
}
839
840
std::shared_ptr<Buffer>
841
QPDFObjectHandle::getRawStreamData()
842
0
{
843
0
    return std::make_shared<Buffer>(as_stream(error).getRawStreamData());
844
0
}
845
846
bool
847
QPDFObjectHandle::pipeStreamData(
848
    Pipeline* p,
849
    bool* filtering_attempted,
850
    int encode_flags,
851
    qpdf_stream_decode_level_e decode_level,
852
    bool suppress_warnings,
853
    bool will_retry)
854
0
{
855
0
    return as_stream(error).pipeStreamData(
856
0
        p, filtering_attempted, encode_flags, decode_level, suppress_warnings, will_retry);
857
0
}
858
859
bool
860
QPDFObjectHandle::pipeStreamData(
861
    Pipeline* p,
862
    int encode_flags,
863
    qpdf_stream_decode_level_e decode_level,
864
    bool suppress_warnings,
865
    bool will_retry)
866
80.0k
{
867
80.0k
    bool filtering_attempted;
868
80.0k
    as_stream(error).pipeStreamData(
869
80.0k
        p, &filtering_attempted, encode_flags, decode_level, suppress_warnings, will_retry);
870
80.0k
    return filtering_attempted;
871
80.0k
}
872
873
bool
874
QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter, bool normalize, bool compress)
875
0
{
876
0
    int encode_flags = 0;
877
0
    qpdf_stream_decode_level_e decode_level = qpdf_dl_none;
878
0
    if (filter) {
879
0
        decode_level = qpdf_dl_generalized;
880
0
        if (normalize) {
881
0
            encode_flags |= qpdf_ef_normalize;
882
0
        }
883
0
        if (compress) {
884
0
            encode_flags |= qpdf_ef_compress;
885
0
        }
886
0
    }
887
0
    return pipeStreamData(p, encode_flags, decode_level, false);
888
0
}
889
890
void
891
QPDFObjectHandle::replaceStreamData(
892
    std::shared_ptr<Buffer> data,
893
    QPDFObjectHandle const& filter,
894
    QPDFObjectHandle const& decode_parms)
895
0
{
896
0
    as_stream(error).replaceStreamData(data, filter, decode_parms);
897
0
}
898
899
void
900
QPDFObjectHandle::replaceStreamData(
901
    std::string const& data, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms)
902
0
{
903
0
    std::string s(data);
904
0
    as_stream(error).replaceStreamData(std::move(s), filter, decode_parms);
905
0
}
906
907
void
908
QPDFObjectHandle::replaceStreamData(
909
    std::shared_ptr<StreamDataProvider> provider,
910
    QPDFObjectHandle const& filter,
911
    QPDFObjectHandle const& decode_parms)
912
0
{
913
0
    as_stream(error).replaceStreamData(provider, filter, decode_parms);
914
0
}
915
916
namespace
917
{
918
    class FunctionProvider: public QPDFObjectHandle::StreamDataProvider
919
    {
920
      public:
921
        FunctionProvider(std::function<void(Pipeline*)> provider) :
922
0
            StreamDataProvider(false),
923
0
            p1(provider),
924
0
            p2(nullptr)
925
0
        {
926
0
        }
927
        FunctionProvider(std::function<bool(Pipeline*, bool, bool)> provider) :
928
0
            StreamDataProvider(true),
929
0
            p1(nullptr),
930
0
            p2(provider)
931
0
        {
932
0
        }
933
934
        void
935
        provideStreamData(QPDFObjGen const&, Pipeline* pipeline) override
936
0
        {
937
0
            p1(pipeline);
938
0
        }
939
940
        bool
941
        provideStreamData(
942
            QPDFObjGen const&, Pipeline* pipeline, bool suppress_warnings, bool will_retry) override
943
0
        {
944
0
            return p2(pipeline, suppress_warnings, will_retry);
945
0
        }
946
947
      private:
948
        std::function<void(Pipeline*)> p1;
949
        std::function<bool(Pipeline*, bool, bool)> p2;
950
    };
951
} // namespace
952
953
void
954
QPDFObjectHandle::replaceStreamData(
955
    std::function<void(Pipeline*)> provider,
956
    QPDFObjectHandle const& filter,
957
    QPDFObjectHandle const& decode_parms)
958
0
{
959
0
    auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider));
960
0
    as_stream(error).replaceStreamData(sdp, filter, decode_parms);
961
0
}
962
963
void
964
QPDFObjectHandle::replaceStreamData(
965
    std::function<bool(Pipeline*, bool, bool)> provider,
966
    QPDFObjectHandle const& filter,
967
    QPDFObjectHandle const& decode_parms)
968
0
{
969
0
    auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider));
970
0
    as_stream(error).replaceStreamData(sdp, filter, decode_parms);
971
0
}
972
973
JSON
974
QPDFObjectHandle::getStreamJSON(
975
    int json_version,
976
    qpdf_json_stream_data_e json_data,
977
    qpdf_stream_decode_level_e decode_level,
978
    Pipeline* p,
979
    std::string const& data_filename)
980
0
{
981
0
    return as_stream(error).getStreamJSON(json_version, json_data, decode_level, p, data_filename);
982
0
}
983
984
QPDFObjectHandle
985
QPDFObjectHandle::copyStream()
986
0
{
987
0
    return as_stream(error).copy();
988
0
}