Coverage Report

Created: 2025-12-05 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDFWriter.cc
Line
Count
Source
1
#include <qpdf/qpdf-config.h> // include early for large file support
2
3
#include <qpdf/QPDFWriter_private.hh>
4
5
#include <qpdf/MD5.hh>
6
#include <qpdf/Pl_AES_PDF.hh>
7
#include <qpdf/Pl_Flate.hh>
8
#include <qpdf/Pl_MD5.hh>
9
#include <qpdf/Pl_PNGFilter.hh>
10
#include <qpdf/Pl_RC4.hh>
11
#include <qpdf/Pl_StdioFile.hh>
12
#include <qpdf/QIntC.hh>
13
#include <qpdf/QPDFObjectHandle_private.hh>
14
#include <qpdf/QPDFObject_private.hh>
15
#include <qpdf/QPDF_private.hh>
16
#include <qpdf/QTC.hh>
17
#include <qpdf/QUtil.hh>
18
#include <qpdf/RC4.hh>
19
#include <qpdf/Util.hh>
20
21
#include <algorithm>
22
#include <concepts>
23
#include <cstdlib>
24
#include <stdexcept>
25
#include <tuple>
26
27
using namespace std::literals;
28
using namespace qpdf;
29
30
using Encryption = impl::Doc::Encryption;
31
using Config = Writer::Config;
32
33
QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default)
34
0
{
35
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
36
0
}
37
38
QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) :
39
0
    handler(handler)
40
0
{
41
0
}
42
43
QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT
44
                                                                  // (modernize-use-equals-default)
45
0
{
46
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
47
0
}
48
49
void
50
QPDFWriter::FunctionProgressReporter::reportProgress(int progress)
51
0
{
52
0
    handler(progress);
53
0
}
54
55
namespace
56
{
57
    class Pl_stack
58
    {
59
        // A pipeline Popper is normally returned by Pl_stack::activate, or, if necessary, a
60
        // reference to a Popper instance can be passed into activate. When the Popper goes out of
61
        // scope, the pipeline stack is popped. This causes finish to be called on the current
62
        // pipeline and the pipeline stack to be popped until the top of stack is a previous active
63
        // top of stack and restores the pipeline to that point. It deletes any pipelines that it
64
        // pops.
65
        class Popper
66
        {
67
            friend class Pl_stack;
68
69
          public:
70
            Popper() = default;
71
            Popper(Popper const&) = delete;
72
            Popper(Popper&& other) noexcept
73
0
            {
74
0
                // For MSVC, default pops the stack
75
0
                if (this != &other) {
76
0
                    stack = other.stack;
77
0
                    stack_id = other.stack_id;
78
0
                    other.stack = nullptr;
79
0
                    other.stack_id = 0;
80
0
                };
81
0
            }
82
            Popper& operator=(Popper const&) = delete;
83
            Popper&
84
            operator=(Popper&& other) noexcept
85
0
            {
86
0
                // For MSVC, default pops the stack
87
0
                if (this != &other) {
88
0
                    stack = other.stack;
89
0
                    stack_id = other.stack_id;
90
0
                    other.stack = nullptr;
91
0
                    other.stack_id = 0;
92
0
                };
93
0
                return *this;
94
0
            }
95
96
            ~Popper();
97
98
            // Manually pop pipeline from the pipeline stack.
99
            void pop();
100
101
          private:
102
            Popper(Pl_stack& stack) :
103
115k
                stack(&stack)
104
115k
            {
105
115k
            }
106
107
            Pl_stack* stack{nullptr};
108
            unsigned long stack_id{0};
109
        };
110
111
      public:
112
        Pl_stack(pl::Count*& top) :
113
9.56k
            top(top)
114
9.56k
        {
115
9.56k
        }
116
117
        Popper
118
        popper()
119
16.8k
        {
120
16.8k
            return {*this};
121
16.8k
        }
122
123
        void
124
        initialize(Pipeline* p)
125
9.56k
        {
126
9.56k
            auto c = std::make_unique<pl::Count>(++last_id, p);
127
9.56k
            top = c.get();
128
9.56k
            stack.emplace_back(std::move(c));
129
9.56k
        }
130
131
        Popper
132
        activate(std::string& str)
133
93.3k
        {
134
93.3k
            Popper pp{*this};
135
93.3k
            activate(pp, str);
136
93.3k
            return pp;
137
93.3k
        }
138
139
        void
140
        activate(Popper& pp, std::string& str)
141
93.3k
        {
142
93.3k
            activate(pp, false, &str, nullptr);
143
93.3k
        }
144
145
        void
146
        activate(Popper& pp, std::unique_ptr<Pipeline> next)
147
0
        {
148
0
            count_buffer.clear();
149
0
            activate(pp, false, &count_buffer, std::move(next));
150
0
        }
151
152
        Popper
153
        activate(
154
            bool discard = false,
155
            std::string* str = nullptr,
156
            std::unique_ptr<Pipeline> next = nullptr)
157
4.99k
        {
158
4.99k
            Popper pp{*this};
159
4.99k
            activate(pp, discard, str, std::move(next));
160
4.99k
            return pp;
161
4.99k
        }
162
163
        void
164
        activate(
165
            Popper& pp,
166
            bool discard = false,
167
            std::string* str = nullptr,
168
            std::unique_ptr<Pipeline> next = nullptr)
169
106k
        {
170
106k
            std::unique_ptr<pl::Count> c;
171
106k
            if (next) {
172
0
                c = std::make_unique<pl::Count>(++last_id, count_buffer, std::move(next));
173
106k
            } else if (discard) {
174
13.4k
                c = std::make_unique<pl::Count>(++last_id, nullptr);
175
93.3k
            } else if (!str) {
176
0
                c = std::make_unique<pl::Count>(++last_id, top);
177
93.3k
            } else {
178
93.3k
                c = std::make_unique<pl::Count>(++last_id, *str);
179
93.3k
            }
180
106k
            pp.stack_id = last_id;
181
106k
            top = c.get();
182
106k
            stack.emplace_back(std::move(c));
183
106k
        }
184
        void
185
        activate_md5(Popper& pp)
186
0
        {
187
0
            qpdf_assert_debug(!md5_pipeline);
188
0
            qpdf_assert_debug(md5_id == 0);
189
0
            qpdf_assert_debug(top->getCount() == 0);
190
0
            md5_pipeline = std::make_unique<Pl_MD5>("qpdf md5", top);
191
0
            md5_pipeline->persistAcrossFinish(true);
192
            // Special case code in pop clears m->md5_pipeline upon deletion.
193
0
            auto c = std::make_unique<pl::Count>(++last_id, md5_pipeline.get());
194
0
            pp.stack_id = last_id;
195
0
            md5_id = last_id;
196
0
            top = c.get();
197
0
            stack.emplace_back(std::move(c));
198
0
        }
199
200
        // Return the hex digest and disable the MD5 pipeline.
201
        std::string
202
        hex_digest()
203
0
        {
204
0
            qpdf_assert_debug(md5_pipeline);
205
0
            auto digest = md5_pipeline->getHexDigest();
206
0
            md5_pipeline->enable(false);
207
0
            return digest;
208
0
        }
209
210
        void
211
        clear_buffer()
212
0
        {
213
0
            count_buffer.clear();
214
0
        }
215
216
      private:
217
        void
218
        pop(unsigned long stack_id)
219
115k
        {
220
115k
            if (!stack_id) {
221
8.43k
                return;
222
8.43k
            }
223
106k
            qpdf_assert_debug(stack.size() >= 2);
224
106k
            top->finish();
225
106k
            qpdf_assert_debug(stack.back().get() == top);
226
            // It used to be possible for this assertion to fail if writeLinearized exits by
227
            // exception when deterministic ID. There are no longer any cases in which two
228
            // dynamically allocated pipeline Popper objects ever exist at the same time, so the
229
            // assertion will fail if they get popped out of order from automatic destruction.
230
106k
            qpdf_assert_debug(top->id() == stack_id);
231
106k
            if (stack_id == md5_id) {
232
0
                md5_pipeline = nullptr;
233
0
                md5_id = 0;
234
0
            }
235
106k
            stack.pop_back();
236
106k
            top = stack.back().get();
237
106k
        }
238
239
        std::vector<std::unique_ptr<pl::Count>> stack;
240
        pl::Count*& top;
241
        std::unique_ptr<Pl_MD5> md5_pipeline{nullptr};
242
        unsigned long last_id{0};
243
        unsigned long md5_id{0};
244
        std::string count_buffer;
245
    };
246
} // namespace
247
248
Pl_stack::Popper::~Popper()
249
115k
{
250
115k
    if (stack) {
251
107k
        stack->pop(stack_id);
252
107k
    }
253
115k
}
254
255
void
256
Pl_stack::Popper::pop()
257
7.96k
{
258
7.96k
    if (stack) {
259
7.96k
        stack->pop(stack_id);
260
7.96k
    }
261
7.96k
    stack_id = 0;
262
7.96k
    stack = nullptr;
263
7.96k
}
264
265
namespace qpdf::impl
266
{
267
    // Writer class is restricted to QPDFWriter so that only it can call certain methods.
268
    class Writer: protected Doc::Common
269
    {
270
      public:
271
        // flags used by unparseObject
272
        static int const f_stream = 1 << 0;
273
        static int const f_filtered = 1 << 1;
274
        static int const f_in_ostream = 1 << 2;
275
        static int const f_hex_string = 1 << 3;
276
        static int const f_no_encryption = 1 << 4;
277
278
        enum trailer_e { t_normal, t_lin_first, t_lin_second };
279
280
        Writer() = delete;
281
        Writer(Writer const&) = delete;
282
        Writer(Writer&&) = delete;
283
        Writer& operator=(Writer const&) = delete;
284
        Writer& operator=(Writer&&) = delete;
285
        ~Writer()
286
9.56k
        {
287
9.56k
            if (file && close_file) {
288
0
                fclose(file);
289
0
            }
290
9.56k
            delete output_buffer;
291
9.56k
        }
292
        Writer(QPDF& qpdf, QPDFWriter& w) :
293
9.78k
            Common(qpdf.doc()),
294
9.78k
            lin(qpdf.doc().linearization()),
295
9.78k
            cfg(true),
296
9.78k
            root_og(qpdf.getRoot().indirect() ? qpdf.getRoot().id_gen() : QPDFObjGen(-1, 0)),
297
9.78k
            pipeline_stack(pipeline)
298
9.78k
        {
299
9.78k
        }
300
301
        void write();
302
        std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable();
303
        void setMinimumPDFVersion(std::string const& version, int extension_level = 0);
304
        void copyEncryptionParameters(QPDF&);
305
        void doWriteSetup();
306
        void prepareFileForWrite();
307
308
        void disableIncompatibleEncryption(int major, int minor, int extension_level);
309
        void interpretR3EncryptionParameters(
310
            bool allow_accessibility,
311
            bool allow_extract,
312
            bool allow_assemble,
313
            bool allow_annotate_and_form,
314
            bool allow_form_filling,
315
            bool allow_modify_other,
316
            qpdf_r3_print_e print,
317
            qpdf_r3_modify_e modify);
318
        void setEncryptionParameters(char const* user_password, char const* owner_password);
319
        void setEncryptionMinimumVersion();
320
        void parseVersion(std::string const& version, int& major, int& minor) const;
321
        int compareVersions(int major1, int minor1, int major2, int minor2) const;
322
        void generateID(bool encrypted);
323
        std::string getOriginalID1();
324
        void initializeTables(size_t extra = 0);
325
        void preserveObjectStreams();
326
        void generateObjectStreams();
327
        void initializeSpecialStreams();
328
        void enqueue(QPDFObjectHandle const& object);
329
        void enqueueObjectsStandard();
330
        void enqueueObjectsPCLm();
331
        void enqueuePart(std::vector<QPDFObjectHandle>& part);
332
        void assignCompressedObjectNumbers(QPDFObjGen og);
333
        Dictionary trimmed_trailer();
334
335
        // Returns tuple<filter, compress_stream, is_root_metadata>
336
        std::tuple<const bool, const bool, const bool>
337
        will_filter_stream(QPDFObjectHandle stream, std::string* stream_data);
338
339
        // Test whether stream would be filtered if it were written.
340
        bool will_filter_stream(QPDFObjectHandle stream);
341
        unsigned int bytesNeeded(long long n);
342
        void writeBinary(unsigned long long val, unsigned int bytes);
343
        Writer& write(std::string_view str);
344
        Writer& write(size_t count, char c);
345
        Writer& write(std::integral auto val);
346
        Writer& write_name(std::string const& str);
347
        Writer& write_string(std::string const& str, bool force_binary = false);
348
        Writer& write_encrypted(std::string_view str);
349
350
        template <typename... Args>
351
        Writer& write_qdf(Args&&... args);
352
        template <typename... Args>
353
        Writer& write_no_qdf(Args&&... args);
354
        void writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj);
355
        void writeObjectStream(QPDFObjectHandle object);
356
        void writeObject(QPDFObjectHandle object, int object_stream_index = -1);
357
        void writeTrailer(
358
            trailer_e which,
359
            int size,
360
            bool xref_stream,
361
            qpdf_offset_t prev,
362
            int linearization_pass);
363
        void unparseObject(
364
            QPDFObjectHandle object,
365
            size_t level,
366
            int flags,
367
            // for stream dictionaries
368
            size_t stream_length = 0,
369
            bool compress = false);
370
        void unparseChild(QPDFObjectHandle const& child, size_t level, int flags);
371
        int openObject(int objid = 0);
372
        void closeObject(int objid);
373
        void writeStandard();
374
        void writeLinearized();
375
        void writeEncryptionDictionary();
376
        void writeHeader();
377
        void writeHintStream(int hint_id);
378
        qpdf_offset_t writeXRefTable(trailer_e which, int first, int last, int size);
379
        qpdf_offset_t writeXRefTable(
380
            trailer_e which,
381
            int first,
382
            int last,
383
            int size,
384
            // for linearization
385
            qpdf_offset_t prev,
386
            bool suppress_offsets,
387
            int hint_id,
388
            qpdf_offset_t hint_offset,
389
            qpdf_offset_t hint_length,
390
            int linearization_pass);
391
        qpdf_offset_t writeXRefStream(
392
            int objid,
393
            int max_id,
394
            qpdf_offset_t max_offset,
395
            trailer_e which,
396
            int first,
397
            int last,
398
            int size);
399
        qpdf_offset_t writeXRefStream(
400
            int objid,
401
            int max_id,
402
            qpdf_offset_t max_offset,
403
            trailer_e which,
404
            int first,
405
            int last,
406
            int size,
407
            // for linearization
408
            qpdf_offset_t prev,
409
            int hint_id,
410
            qpdf_offset_t hint_offset,
411
            qpdf_offset_t hint_length,
412
            bool skip_compression,
413
            int linearization_pass);
414
415
        void setDataKey(int objid);
416
        void indicateProgress(bool decrement, bool finished);
417
        size_t calculateXrefStreamPadding(qpdf_offset_t xref_bytes);
418
419
        void adjustAESStreamLength(size_t& length);
420
        void computeDeterministicIDData();
421
422
      protected:
423
        Doc::Linearization& lin;
424
425
        qpdf::Writer::Config cfg;
426
427
        QPDFObjGen root_og{-1, 0};
428
        char const* filename{"unspecified"};
429
        FILE* file{nullptr};
430
        bool close_file{false};
431
        std::unique_ptr<Pl_Buffer> buffer_pipeline{nullptr};
432
        Buffer* output_buffer{nullptr};
433
434
        std::unique_ptr<QPDF::Doc::Encryption> encryption;
435
        std::string encryption_key;
436
437
        std::string id1; // for /ID key of
438
        std::string id2; // trailer dictionary
439
        std::string final_pdf_version;
440
        int final_extension_level{0};
441
        std::string min_pdf_version;
442
        int min_extension_level{0};
443
        int encryption_dict_objid{0};
444
        std::string cur_data_key;
445
        std::unique_ptr<Pipeline> file_pl;
446
        qpdf::pl::Count* pipeline{nullptr};
447
        std::vector<QPDFObjectHandle> object_queue;
448
        size_t object_queue_front{0};
449
        QPDFWriter::ObjTable obj;
450
        QPDFWriter::NewObjTable new_obj;
451
        int next_objid{1};
452
        int cur_stream_length_id{0};
453
        size_t cur_stream_length{0};
454
        bool added_newline{false};
455
        size_t max_ostream_index{0};
456
        std::set<QPDFObjGen> normalized_streams;
457
        std::map<QPDFObjGen, int> page_object_to_seq;
458
        std::map<QPDFObjGen, int> contents_to_page_seq;
459
        std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects;
460
        Pl_stack pipeline_stack;
461
        std::string deterministic_id_data;
462
        bool did_write_setup{false};
463
464
        // For progress reporting
465
        std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter;
466
        int events_expected{0};
467
        int events_seen{0};
468
        int next_progress_report{0};
469
    }; // class qpdf::impl::Writer
470
471
} // namespace qpdf::impl
472
473
class QPDFWriter::Members: impl::Writer
474
{
475
    friend class QPDFWriter;
476
    friend class qpdf::Writer;
477
478
  public:
479
    Members(QPDFWriter& w, QPDF& qpdf) :
480
9.78k
        impl::Writer(qpdf, w)
481
9.78k
    {
482
9.78k
    }
483
};
484
485
qpdf::Writer::Writer(QPDF& qpdf, Config cfg) :
486
0
    QPDFWriter(qpdf)
487
0
{
488
0
    m->cfg = cfg;
489
0
}
490
QPDFWriter::QPDFWriter(QPDF& pdf) :
491
9.78k
    m(std::make_shared<Members>(*this, pdf))
492
9.78k
{
493
9.78k
}
494
495
QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) :
496
0
    m(std::make_shared<Members>(*this, pdf))
497
0
{
498
0
    setOutputFilename(filename);
499
0
}
500
501
QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) :
502
0
    m(std::make_shared<Members>(*this, pdf))
503
0
{
504
0
    setOutputFile(description, file, close_file);
505
0
}
506
507
void
508
QPDFWriter::setOutputFilename(char const* filename)
509
0
{
510
0
    char const* description = filename;
511
0
    FILE* f = nullptr;
512
0
    bool close_file = false;
513
0
    if (filename == nullptr) {
514
0
        description = "standard output";
515
0
        f = stdout;
516
0
        QUtil::binary_stdout();
517
0
    } else {
518
0
        f = QUtil::safe_fopen(filename, "wb+");
519
0
        close_file = true;
520
0
    }
521
0
    setOutputFile(description, f, close_file);
522
0
}
523
524
void
525
QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file)
526
0
{
527
0
    m->filename = description;
528
0
    m->file = file;
529
0
    m->close_file = close_file;
530
0
    m->file_pl = std::make_unique<Pl_StdioFile>("qpdf output", file);
531
0
    m->pipeline_stack.initialize(m->file_pl.get());
532
0
}
533
534
void
535
QPDFWriter::setOutputMemory()
536
0
{
537
0
    m->filename = "memory buffer";
538
0
    m->buffer_pipeline = std::make_unique<Pl_Buffer>("qpdf output");
539
0
    m->pipeline_stack.initialize(m->buffer_pipeline.get());
540
0
}
541
542
Buffer*
543
QPDFWriter::getBuffer()
544
0
{
545
0
    Buffer* result = m->output_buffer;
546
0
    m->output_buffer = nullptr;
547
0
    return result;
548
0
}
549
550
std::shared_ptr<Buffer>
551
QPDFWriter::getBufferSharedPointer()
552
0
{
553
0
    return std::shared_ptr<Buffer>(getBuffer());
554
0
}
555
556
void
557
QPDFWriter::setOutputPipeline(Pipeline* p)
558
9.56k
{
559
9.56k
    m->filename = "custom pipeline";
560
9.56k
    m->pipeline_stack.initialize(p);
561
9.56k
}
562
563
void
564
QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode)
565
0
{
566
0
    m->cfg.object_streams(mode);
567
0
}
568
569
void
570
QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode)
571
0
{
572
0
    m->cfg.stream_data(mode);
573
0
}
574
575
Config&
576
Config::stream_data(qpdf_stream_data_e mode)
577
0
{
578
0
    switch (mode) {
579
0
    case qpdf_s_uncompress:
580
0
        decode_level(std::max(qpdf_dl_generalized, decode_level_));
581
0
        compress_streams(false);
582
0
        return *this;
583
584
0
    case qpdf_s_preserve:
585
0
        decode_level(qpdf_dl_none);
586
0
        compress_streams(false);
587
0
        return *this;
588
589
0
    case qpdf_s_compress:
590
0
        decode_level(std::max(qpdf_dl_generalized, decode_level_));
591
0
        compress_streams(true);
592
0
    }
593
0
    return *this;
594
0
}
595
596
void
597
QPDFWriter::setCompressStreams(bool val)
598
0
{
599
0
    m->cfg.compress_streams(val);
600
0
}
601
602
Config&
603
Config::compress_streams(bool val)
604
0
{
605
0
    if (pclm_) {
606
0
        usage("compress_streams cannot be set when pclm is set");
607
0
        return *this;
608
0
    }
609
0
    compress_streams_set_ = true;
610
0
    compress_streams_ = val;
611
0
    return *this;
612
0
}
613
614
void
615
QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
616
9.56k
{
617
9.56k
    m->cfg.decode_level(val);
618
9.56k
}
619
620
Config&
621
Config::decode_level(qpdf_stream_decode_level_e val)
622
9.56k
{
623
9.56k
    if (pclm_) {
624
0
        usage("stream_decode_level cannot be set when pclm is set");
625
0
        return *this;
626
0
    }
627
9.56k
    decode_level_set_ = true;
628
9.56k
    decode_level_ = val;
629
9.56k
    return *this;
630
9.56k
}
631
632
void
633
QPDFWriter::setRecompressFlate(bool val)
634
0
{
635
0
    m->cfg.recompress_flate(val);
636
0
}
637
638
void
639
QPDFWriter::setContentNormalization(bool val)
640
0
{
641
0
    m->cfg.normalize_content(val);
642
0
}
643
644
void
645
QPDFWriter::setQDFMode(bool val)
646
0
{
647
0
    m->cfg.qdf(val);
648
0
}
649
650
Config&
651
Config::qdf(bool val)
652
9.54k
{
653
9.54k
    if (pclm_ || linearize_) {
654
9.54k
        usage("qdf cannot be set when linearize or pclm are set");
655
9.54k
    }
656
9.54k
    if (preserve_encryption_) {
657
9.54k
        usage("preserve_encryption cannot be set when qdf is set");
658
9.54k
    }
659
9.54k
    qdf_ = val;
660
9.54k
    if (val) {
661
0
        if (!normalize_content_set_) {
662
0
            normalize_content(true);
663
0
        }
664
0
        if (!compress_streams_set_) {
665
0
            compress_streams(false);
666
0
        }
667
0
        if (!decode_level_set_) {
668
0
            decode_level(qpdf_dl_generalized);
669
0
        }
670
0
        preserve_encryption_ = false;
671
        // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing
672
        // recomputed stream length data. Certain streams such as object streams, xref streams, and
673
        // hint streams always get direct stream lengths.
674
0
        direct_stream_lengths_ = false;
675
0
    }
676
9.54k
    return *this;
677
9.54k
}
678
679
void
680
QPDFWriter::setPreserveUnreferencedObjects(bool val)
681
0
{
682
0
    m->cfg.preserve_unreferenced(val);
683
0
}
684
685
void
686
QPDFWriter::setNewlineBeforeEndstream(bool val)
687
0
{
688
0
    m->cfg.newline_before_endstream(val);
689
0
}
690
691
void
692
QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level)
693
0
{
694
0
    m->setMinimumPDFVersion(version, extension_level);
695
0
}
696
697
void
698
impl::Writer::setMinimumPDFVersion(std::string const& version, int extension_level)
699
19.7k
{
700
19.7k
    bool set_version = false;
701
19.7k
    bool set_extension_level = false;
702
19.7k
    if (min_pdf_version.empty()) {
703
9.54k
        set_version = true;
704
9.54k
        set_extension_level = true;
705
10.2k
    } else {
706
10.2k
        int old_major = 0;
707
10.2k
        int old_minor = 0;
708
10.2k
        int min_major = 0;
709
10.2k
        int min_minor = 0;
710
10.2k
        parseVersion(version, old_major, old_minor);
711
10.2k
        parseVersion(min_pdf_version, min_major, min_minor);
712
10.2k
        int compare = compareVersions(old_major, old_minor, min_major, min_minor);
713
10.2k
        if (compare > 0) {
714
355
            QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1);
715
355
            set_version = true;
716
355
            set_extension_level = true;
717
9.89k
        } else if (compare == 0) {
718
1.32k
            if (extension_level > min_extension_level) {
719
1
                set_extension_level = true;
720
1
            }
721
1.32k
        }
722
10.2k
    }
723
724
19.7k
    if (set_version) {
725
9.90k
        min_pdf_version = version;
726
9.90k
    }
727
19.7k
    if (set_extension_level) {
728
9.90k
        min_extension_level = extension_level;
729
9.90k
    }
730
19.7k
}
731
732
void
733
QPDFWriter::setMinimumPDFVersion(PDFVersion const& v)
734
0
{
735
0
    std::string version;
736
0
    int extension_level;
737
0
    v.getVersion(version, extension_level);
738
0
    setMinimumPDFVersion(version, extension_level);
739
0
}
740
741
void
742
QPDFWriter::forcePDFVersion(std::string const& version, int extension_level)
743
0
{
744
0
    m->cfg.forced_pdf_version(version, extension_level);
745
0
}
746
747
void
748
QPDFWriter::setExtraHeaderText(std::string const& text)
749
0
{
750
0
    m->cfg.extra_header_text(text);
751
0
}
752
753
Config&
754
Config::extra_header_text(std::string const& val)
755
0
{
756
0
    extra_header_text_ = val;
757
0
    if (!extra_header_text_.empty() && extra_header_text_.back() != '\n') {
758
0
        extra_header_text_ += "\n";
759
0
    } else {
760
0
        QTC::TC("qpdf", "QPDFWriter extra header text no newline");
761
0
    }
762
0
    return *this;
763
0
}
764
765
void
766
QPDFWriter::setStaticID(bool val)
767
9.56k
{
768
9.56k
    m->cfg.static_id(val);
769
9.56k
}
770
771
void
772
QPDFWriter::setDeterministicID(bool val)
773
0
{
774
0
    m->cfg.deterministic_id(val);
775
0
}
776
777
void
778
QPDFWriter::setStaticAesIV(bool val)
779
0
{
780
0
    if (val) {
781
0
        Pl_AES_PDF::useStaticIV();
782
0
    }
783
0
}
784
785
void
786
QPDFWriter::setSuppressOriginalObjectIDs(bool val)
787
0
{
788
0
    m->cfg.no_original_object_ids(val);
789
0
}
790
791
void
792
QPDFWriter::setPreserveEncryption(bool val)
793
0
{
794
0
    m->cfg.preserve_encryption(val);
795
0
}
796
797
void
798
QPDFWriter::setLinearization(bool val)
799
9.56k
{
800
9.56k
    m->cfg.linearize(val);
801
9.56k
}
802
803
Config&
804
Config::linearize(bool val)
805
9.56k
{
806
9.56k
    if (pclm_ || qdf_) {
807
0
        usage("linearize cannot be set when qdf or pclm are set");
808
0
        return *this;
809
0
    }
810
9.56k
    linearize_ = val;
811
9.56k
    return *this;
812
9.56k
}
813
814
void
815
QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
816
0
{
817
0
    m->cfg.linearize_pass1(filename);
818
0
}
819
820
void
821
QPDFWriter::setPCLm(bool val)
822
0
{
823
0
    m->cfg.pclm(val);
824
0
}
825
826
Config&
827
Config::pclm(bool val)
828
0
{
829
0
    if (decode_level_set_ || compress_streams_set_ || linearize_) {
830
0
        usage(
831
0
            "pclm cannot be set when stream_decode_level, compress_streams, linearize or qdf are "
832
0
            "set");
833
0
        return *this;
834
0
    }
835
0
    pclm_ = val;
836
0
    if (val) {
837
0
        decode_level_ = qpdf_dl_none;
838
0
        compress_streams_ = false;
839
0
        linearize_ = false;
840
0
    }
841
842
0
    return *this;
843
0
}
844
845
void
846
QPDFWriter::setR2EncryptionParametersInsecure(
847
    char const* user_password,
848
    char const* owner_password,
849
    bool allow_print,
850
    bool allow_modify,
851
    bool allow_extract,
852
    bool allow_annotate)
853
0
{
854
0
    m->encryption = std::make_unique<Encryption>(1, 2, 5, true);
855
0
    if (!allow_print) {
856
0
        m->encryption->setP(3, false);
857
0
    }
858
0
    if (!allow_modify) {
859
0
        m->encryption->setP(4, false);
860
0
    }
861
0
    if (!allow_extract) {
862
0
        m->encryption->setP(5, false);
863
0
    }
864
0
    if (!allow_annotate) {
865
0
        m->encryption->setP(6, false);
866
0
    }
867
0
    m->setEncryptionParameters(user_password, owner_password);
868
0
}
869
870
void
871
QPDFWriter::setR3EncryptionParametersInsecure(
872
    char const* user_password,
873
    char const* owner_password,
874
    bool allow_accessibility,
875
    bool allow_extract,
876
    bool allow_assemble,
877
    bool allow_annotate_and_form,
878
    bool allow_form_filling,
879
    bool allow_modify_other,
880
    qpdf_r3_print_e print)
881
0
{
882
0
    m->encryption = std::make_unique<Encryption>(2, 3, 16, true);
883
0
    m->interpretR3EncryptionParameters(
884
0
        allow_accessibility,
885
0
        allow_extract,
886
0
        allow_assemble,
887
0
        allow_annotate_and_form,
888
0
        allow_form_filling,
889
0
        allow_modify_other,
890
0
        print,
891
0
        qpdf_r3m_all);
892
0
    m->setEncryptionParameters(user_password, owner_password);
893
0
}
894
895
void
896
QPDFWriter::setR4EncryptionParametersInsecure(
897
    char const* user_password,
898
    char const* owner_password,
899
    bool allow_accessibility,
900
    bool allow_extract,
901
    bool allow_assemble,
902
    bool allow_annotate_and_form,
903
    bool allow_form_filling,
904
    bool allow_modify_other,
905
    qpdf_r3_print_e print,
906
    bool encrypt_metadata,
907
    bool use_aes)
908
0
{
909
0
    m->encryption = std::make_unique<Encryption>(4, 4, 16, encrypt_metadata);
910
0
    m->cfg.encrypt_use_aes(use_aes);
911
0
    m->interpretR3EncryptionParameters(
912
0
        allow_accessibility,
913
0
        allow_extract,
914
0
        allow_assemble,
915
0
        allow_annotate_and_form,
916
0
        allow_form_filling,
917
0
        allow_modify_other,
918
0
        print,
919
0
        qpdf_r3m_all);
920
0
    m->setEncryptionParameters(user_password, owner_password);
921
0
}
922
923
void
924
QPDFWriter::setR5EncryptionParameters(
925
    char const* user_password,
926
    char const* owner_password,
927
    bool allow_accessibility,
928
    bool allow_extract,
929
    bool allow_assemble,
930
    bool allow_annotate_and_form,
931
    bool allow_form_filling,
932
    bool allow_modify_other,
933
    qpdf_r3_print_e print,
934
    bool encrypt_metadata)
935
0
{
936
0
    m->encryption = std::make_unique<Encryption>(5, 5, 32, encrypt_metadata);
937
0
    m->cfg.encrypt_use_aes(true);
938
0
    m->interpretR3EncryptionParameters(
939
0
        allow_accessibility,
940
0
        allow_extract,
941
0
        allow_assemble,
942
0
        allow_annotate_and_form,
943
0
        allow_form_filling,
944
0
        allow_modify_other,
945
0
        print,
946
0
        qpdf_r3m_all);
947
0
    m->setEncryptionParameters(user_password, owner_password);
948
0
}
949
950
void
951
QPDFWriter::setR6EncryptionParameters(
952
    char const* user_password,
953
    char const* owner_password,
954
    bool allow_accessibility,
955
    bool allow_extract,
956
    bool allow_assemble,
957
    bool allow_annotate_and_form,
958
    bool allow_form_filling,
959
    bool allow_modify_other,
960
    qpdf_r3_print_e print,
961
    bool encrypt_metadata)
962
9.56k
{
963
9.56k
    m->encryption = std::make_unique<Encryption>(5, 6, 32, encrypt_metadata);
964
9.56k
    m->interpretR3EncryptionParameters(
965
9.56k
        allow_accessibility,
966
9.56k
        allow_extract,
967
9.56k
        allow_assemble,
968
9.56k
        allow_annotate_and_form,
969
9.56k
        allow_form_filling,
970
9.56k
        allow_modify_other,
971
9.56k
        print,
972
9.56k
        qpdf_r3m_all);
973
9.56k
    m->cfg.encrypt_use_aes(true);
974
9.56k
    m->setEncryptionParameters(user_password, owner_password);
975
9.56k
}
976
977
void
978
impl::Writer::interpretR3EncryptionParameters(
979
    bool allow_accessibility,
980
    bool allow_extract,
981
    bool allow_assemble,
982
    bool allow_annotate_and_form,
983
    bool allow_form_filling,
984
    bool allow_modify_other,
985
    qpdf_r3_print_e print,
986
    qpdf_r3_modify_e modify)
987
9.56k
{
988
    // Acrobat 5 security options:
989
990
    // Checkboxes:
991
    //   Enable Content Access for the Visually Impaired
992
    //   Allow Content Copying and Extraction
993
994
    // Allowed changes menu:
995
    //   None
996
    //   Only Document Assembly
997
    //   Only Form Field Fill-in or Signing
998
    //   Comment Authoring, Form Field Fill-in or Signing
999
    //   General Editing, Comment and Form Field Authoring
1000
1001
    // Allowed printing menu:
1002
    //   None
1003
    //   Low Resolution
1004
    //   Full printing
1005
1006
    // Meanings of bits in P when R >= 3
1007
    //
1008
    //  3: low-resolution printing
1009
    //  4: document modification except as controlled by 6, 9, and 11
1010
    //  5: extraction
1011
    //  6: add/modify annotations (comment), fill in forms
1012
    //     if 4+6 are set, also allows modification of form fields
1013
    //  9: fill in forms even if 6 is clear
1014
    // 10: accessibility; ignored by readers, should always be set
1015
    // 11: document assembly even if 4 is clear
1016
    // 12: high-resolution printing
1017
9.56k
    if (!allow_accessibility && encryption->getR() <= 3) {
1018
        // Bit 10 is deprecated and should always be set.  This used to mean accessibility.  There
1019
        // is no way to disable accessibility with R > 3.
1020
0
        encryption->setP(10, false);
1021
0
    }
1022
9.56k
    if (!allow_extract) {
1023
0
        encryption->setP(5, false);
1024
0
    }
1025
1026
9.56k
    switch (print) {
1027
0
    case qpdf_r3p_none:
1028
0
        encryption->setP(3, false); // any printing
1029
0
        [[fallthrough]];
1030
0
    case qpdf_r3p_low:
1031
0
        encryption->setP(12, false); // high resolution printing
1032
0
        [[fallthrough]];
1033
9.56k
    case qpdf_r3p_full:
1034
9.56k
        break;
1035
        // no default so gcc warns for missing cases
1036
9.56k
    }
1037
1038
    // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full
1039
    // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're
1040
    // stuck with it. See also allow checks below to control the bits individually.
1041
1042
    // NOT EXERCISED IN TEST SUITE
1043
9.56k
    switch (modify) {
1044
0
    case qpdf_r3m_none:
1045
0
        encryption->setP(11, false); // document assembly
1046
0
        [[fallthrough]];
1047
0
    case qpdf_r3m_assembly:
1048
0
        encryption->setP(9, false); // filling in form fields
1049
0
        [[fallthrough]];
1050
0
    case qpdf_r3m_form:
1051
0
        encryption->setP(6, false); // modify annotations, fill in form fields
1052
0
        [[fallthrough]];
1053
0
    case qpdf_r3m_annotate:
1054
0
        encryption->setP(4, false); // other modifications
1055
0
        [[fallthrough]];
1056
9.56k
    case qpdf_r3m_all:
1057
9.56k
        break;
1058
        // no default so gcc warns for missing cases
1059
9.56k
    }
1060
    // END NOT EXERCISED IN TEST SUITE
1061
1062
9.56k
    if (!allow_assemble) {
1063
0
        encryption->setP(11, false);
1064
0
    }
1065
9.56k
    if (!allow_annotate_and_form) {
1066
0
        encryption->setP(6, false);
1067
0
    }
1068
9.56k
    if (!allow_form_filling) {
1069
0
        encryption->setP(9, false);
1070
0
    }
1071
9.56k
    if (!allow_modify_other) {
1072
0
        encryption->setP(4, false);
1073
0
    }
1074
9.56k
}
1075
1076
void
1077
impl::Writer::setEncryptionParameters(char const* user_password, char const* owner_password)
1078
9.56k
{
1079
9.56k
    generateID(true);
1080
9.56k
    encryption->setId1(id1);
1081
9.56k
    encryption_key = encryption->compute_parameters(user_password, owner_password);
1082
9.56k
    setEncryptionMinimumVersion();
1083
9.56k
}
1084
1085
void
1086
QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
1087
0
{
1088
0
    m->copyEncryptionParameters(qpdf);
1089
0
}
1090
1091
void
1092
impl::Writer::copyEncryptionParameters(QPDF& qpdf)
1093
0
{
1094
0
    cfg.preserve_encryption(false);
1095
0
    QPDFObjectHandle trailer = qpdf.getTrailer();
1096
0
    if (trailer.hasKey("/Encrypt")) {
1097
0
        generateID(true);
1098
0
        id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue();
1099
0
        QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
1100
0
        int V = encrypt.getKey("/V").getIntValueAsInt();
1101
0
        int key_len = 5;
1102
0
        if (V > 1) {
1103
0
            key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8;
1104
0
        }
1105
0
        const bool encrypt_metadata =
1106
0
            encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool()
1107
0
            ? encrypt.getKey("/EncryptMetadata").getBoolValue()
1108
0
            : true;
1109
0
        if (V >= 4) {
1110
            // When copying encryption parameters, use AES even if the original file did not.
1111
            // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of
1112
            // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF
1113
            // all potentially having different values.
1114
0
            cfg.encrypt_use_aes(true);
1115
0
        }
1116
0
        QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", encrypt_metadata ? 0 : 1);
1117
0
        QTC::TC("qpdf", "QPDFWriter copy use_aes", cfg.encrypt_use_aes() ? 0 : 1);
1118
1119
0
        encryption = std::make_unique<Encryption>(
1120
0
            V,
1121
0
            encrypt.getKey("/R").getIntValueAsInt(),
1122
0
            key_len,
1123
0
            static_cast<int>(encrypt.getKey("/P").getIntValue()),
1124
0
            encrypt.getKey("/O").getStringValue(),
1125
0
            encrypt.getKey("/U").getStringValue(),
1126
0
            V < 5 ? "" : encrypt.getKey("/OE").getStringValue(),
1127
0
            V < 5 ? "" : encrypt.getKey("/UE").getStringValue(),
1128
0
            V < 5 ? "" : encrypt.getKey("/Perms").getStringValue(),
1129
0
            id1, // id1 == the other file's id1
1130
0
            encrypt_metadata);
1131
0
        encryption_key = V >= 5 ? qpdf.getEncryptionKey()
1132
0
                                : encryption->compute_encryption_key(qpdf.getPaddedUserPassword());
1133
0
        setEncryptionMinimumVersion();
1134
0
    }
1135
0
}
1136
1137
void
1138
impl::Writer::disableIncompatibleEncryption(int major, int minor, int extension_level)
1139
0
{
1140
0
    if (!encryption) {
1141
0
        return;
1142
0
    }
1143
0
    if (compareVersions(major, minor, 1, 3) < 0) {
1144
0
        encryption = nullptr;
1145
0
        return;
1146
0
    }
1147
0
    int V = encryption->getV();
1148
0
    int R = encryption->getR();
1149
0
    if (compareVersions(major, minor, 1, 4) < 0) {
1150
0
        if (V > 1 || R > 2) {
1151
0
            encryption = nullptr;
1152
0
        }
1153
0
    } else if (compareVersions(major, minor, 1, 5) < 0) {
1154
0
        if (V > 2 || R > 3) {
1155
0
            encryption = nullptr;
1156
0
        }
1157
0
    } else if (compareVersions(major, minor, 1, 6) < 0) {
1158
0
        if (cfg.encrypt_use_aes()) {
1159
0
            encryption = nullptr;
1160
0
        }
1161
0
    } else if (
1162
0
        (compareVersions(major, minor, 1, 7) < 0) ||
1163
0
        ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) {
1164
0
        if (V >= 5 || R >= 5) {
1165
0
            encryption = nullptr;
1166
0
        }
1167
0
    }
1168
1169
0
    if (!encryption) {
1170
0
        QTC::TC("qpdf", "QPDFWriter forced version disabled encryption");
1171
0
    }
1172
0
}
1173
1174
void
1175
impl::Writer::parseVersion(std::string const& version, int& major, int& minor) const
1176
20.4k
{
1177
20.4k
    major = QUtil::string_to_int(version.c_str());
1178
20.4k
    minor = 0;
1179
20.4k
    size_t p = version.find('.');
1180
20.4k
    if ((p != std::string::npos) && (version.length() > p)) {
1181
20.4k
        minor = QUtil::string_to_int(version.substr(p + 1).c_str());
1182
20.4k
    }
1183
20.4k
    std::string tmp = std::to_string(major) + "." + std::to_string(minor);
1184
20.4k
    if (tmp != version) {
1185
        // The version number in the input is probably invalid. This happens with some files that
1186
        // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately
1187
        // QPDFWriter doesn't have a way to give a warning, so we just ignore this case.
1188
44
    }
1189
20.4k
}
1190
1191
int
1192
impl::Writer::compareVersions(int major1, int minor1, int major2, int minor2) const
1193
10.2k
{
1194
10.2k
    if (major1 < major2) {
1195
187
        return -1;
1196
187
    }
1197
10.0k
    if (major1 > major2) {
1198
241
        return 1;
1199
241
    }
1200
9.81k
    if (minor1 < minor2) {
1201
8.37k
        return -1;
1202
8.37k
    }
1203
1.44k
    return minor1 > minor2 ? 1 : 0;
1204
9.81k
}
1205
1206
void
1207
impl::Writer::setEncryptionMinimumVersion()
1208
9.54k
{
1209
9.54k
    auto const R = encryption->getR();
1210
9.54k
    if (R >= 6) {
1211
9.54k
        setMinimumPDFVersion("1.7", 8);
1212
9.54k
    } else if (R == 5) {
1213
0
        setMinimumPDFVersion("1.7", 3);
1214
0
    } else if (R == 4) {
1215
0
        setMinimumPDFVersion(cfg.encrypt_use_aes() ? "1.6" : "1.5");
1216
0
    } else if (R == 3) {
1217
0
        setMinimumPDFVersion("1.4");
1218
0
    } else {
1219
0
        setMinimumPDFVersion("1.3");
1220
0
    }
1221
9.54k
}
1222
1223
void
1224
impl::Writer::setDataKey(int objid)
1225
214k
{
1226
214k
    if (encryption) {
1227
214k
        cur_data_key = QPDF::compute_data_key(
1228
214k
            encryption_key,
1229
214k
            objid,
1230
214k
            0,
1231
214k
            cfg.encrypt_use_aes(),
1232
214k
            encryption->getV(),
1233
214k
            encryption->getR());
1234
214k
    }
1235
214k
}
1236
1237
unsigned int
1238
impl::Writer::bytesNeeded(long long n)
1239
5.30k
{
1240
5.30k
    unsigned int bytes = 0;
1241
13.1k
    while (n) {
1242
7.83k
        ++bytes;
1243
7.83k
        n >>= 8;
1244
7.83k
    }
1245
5.30k
    return bytes;
1246
5.30k
}
1247
1248
void
1249
impl::Writer::writeBinary(unsigned long long val, unsigned int bytes)
1250
366k
{
1251
366k
    if (bytes > sizeof(unsigned long long)) {
1252
0
        throw std::logic_error("QPDFWriter::writeBinary called with too many bytes");
1253
0
    }
1254
366k
    unsigned char data[sizeof(unsigned long long)];
1255
925k
    for (unsigned int i = 0; i < bytes; ++i) {
1256
559k
        data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff);
1257
559k
        val >>= 8;
1258
559k
    }
1259
366k
    pipeline->write(data, bytes);
1260
366k
}
1261
1262
impl::Writer&
1263
impl::Writer::write(std::string_view str)
1264
7.72M
{
1265
7.72M
    pipeline->write(str);
1266
7.72M
    return *this;
1267
7.72M
}
1268
1269
impl::Writer&
1270
impl::Writer::write(std::integral auto val)
1271
1.09M
{
1272
1.09M
    pipeline->write(std::to_string(val));
1273
1.09M
    return *this;
1274
1.09M
}
_ZN4qpdf4impl6Writer5writeITkNSt3__18integralEiEERS1_T_
Line
Count
Source
1271
808k
{
1272
808k
    pipeline->write(std::to_string(val));
1273
808k
    return *this;
1274
808k
}
_ZN4qpdf4impl6Writer5writeITkNSt3__18integralExEERS1_T_
Line
Count
Source
1271
211k
{
1272
211k
    pipeline->write(std::to_string(val));
1273
211k
    return *this;
1274
211k
}
_ZN4qpdf4impl6Writer5writeITkNSt3__18integralEmEERS1_T_
Line
Count
Source
1271
68.6k
{
1272
68.6k
    pipeline->write(std::to_string(val));
1273
68.6k
    return *this;
1274
68.6k
}
_ZN4qpdf4impl6Writer5writeITkNSt3__18integralEjEERS1_T_
Line
Count
Source
1271
5.30k
{
1272
5.30k
    pipeline->write(std::to_string(val));
1273
5.30k
    return *this;
1274
5.30k
}
1275
1276
impl::Writer&
1277
impl::Writer::write(size_t count, char c)
1278
23.7k
{
1279
23.7k
    pipeline->write(count, c);
1280
23.7k
    return *this;
1281
23.7k
}
1282
1283
impl::Writer&
1284
impl::Writer::write_name(std::string const& str)
1285
852k
{
1286
852k
    pipeline->write(Name::normalize(str));
1287
852k
    return *this;
1288
852k
}
1289
1290
impl::Writer&
1291
impl::Writer::write_string(std::string const& str, bool force_binary)
1292
113k
{
1293
113k
    pipeline->write(QPDF_String(str).unparse(force_binary));
1294
113k
    return *this;
1295
113k
}
1296
1297
template <typename... Args>
1298
impl::Writer&
1299
impl::Writer::write_qdf(Args&&... args)
1300
596k
{
1301
596k
    if (cfg.qdf()) {
1302
0
        pipeline->write(std::forward<Args>(args)...);
1303
0
    }
1304
596k
    return *this;
1305
596k
}
qpdf::impl::Writer& qpdf::impl::Writer::write_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1300
486k
{
1301
486k
    if (cfg.qdf()) {
1302
0
        pipeline->write(std::forward<Args>(args)...);
1303
0
    }
1304
486k
    return *this;
1305
486k
}
qpdf::impl::Writer& qpdf::impl::Writer::write_qdf<char const (&) [3]>(char const (&) [3])
Line
Count
Source
1300
61.5k
{
1301
61.5k
    if (cfg.qdf()) {
1302
0
        pipeline->write(std::forward<Args>(args)...);
1303
0
    }
1304
61.5k
    return *this;
1305
61.5k
}
qpdf::impl::Writer& qpdf::impl::Writer::write_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1300
32.3k
{
1301
32.3k
    if (cfg.qdf()) {
1302
0
        pipeline->write(std::forward<Args>(args)...);
1303
0
    }
1304
32.3k
    return *this;
1305
32.3k
}
qpdf::impl::Writer& qpdf::impl::Writer::write_qdf<char const (&) [11]>(char const (&) [11])
Line
Count
Source
1300
16.3k
{
1301
16.3k
    if (cfg.qdf()) {
1302
0
        pipeline->write(std::forward<Args>(args)...);
1303
0
    }
1304
16.3k
    return *this;
1305
16.3k
}
1306
1307
template <typename... Args>
1308
impl::Writer&
1309
impl::Writer::write_no_qdf(Args&&... args)
1310
206k
{
1311
206k
    if (!cfg.qdf()) {
1312
206k
        pipeline->write(std::forward<Args>(args)...);
1313
206k
    }
1314
206k
    return *this;
1315
206k
}
qpdf::impl::Writer& qpdf::impl::Writer::write_no_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1310
173k
{
1311
173k
    if (!cfg.qdf()) {
1312
173k
        pipeline->write(std::forward<Args>(args)...);
1313
173k
    }
1314
173k
    return *this;
1315
173k
}
qpdf::impl::Writer& qpdf::impl::Writer::write_no_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1310
32.3k
{
1311
32.3k
    if (!cfg.qdf()) {
1312
32.3k
        pipeline->write(std::forward<Args>(args)...);
1313
32.3k
    }
1314
32.3k
    return *this;
1315
32.3k
}
1316
1317
void
1318
impl::Writer::adjustAESStreamLength(size_t& length)
1319
54.2k
{
1320
54.2k
    if (encryption && !cur_data_key.empty() && cfg.encrypt_use_aes()) {
1321
        // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16.  It will
1322
        // also be prepended by 16 bits of random data.
1323
54.2k
        length += 32 - (length & 0xf);
1324
54.2k
    }
1325
54.2k
}
1326
1327
impl::Writer&
1328
impl::Writer::write_encrypted(std::string_view str)
1329
53.7k
{
1330
53.7k
    if (!(encryption && !cur_data_key.empty())) {
1331
0
        write(str);
1332
53.7k
    } else if (cfg.encrypt_use_aes()) {
1333
53.7k
        write(pl::pipe<Pl_AES_PDF>(str, true, cur_data_key));
1334
53.7k
    } else {
1335
0
        write(pl::pipe<Pl_RC4>(str, cur_data_key));
1336
0
    }
1337
1338
53.7k
    return *this;
1339
53.7k
}
1340
1341
void
1342
impl::Writer::computeDeterministicIDData()
1343
0
{
1344
0
    if (!id2.empty()) {
1345
        // Can't happen in the code
1346
0
        throw std::logic_error(
1347
0
            "Deterministic ID computation enabled after ID generation has already occurred.");
1348
0
    }
1349
0
    qpdf_assert_debug(deterministic_id_data.empty());
1350
0
    deterministic_id_data = pipeline_stack.hex_digest();
1351
0
}
1352
1353
int
1354
impl::Writer::openObject(int objid)
1355
248k
{
1356
248k
    if (objid == 0) {
1357
0
        objid = next_objid++;
1358
0
    }
1359
248k
    new_obj[objid].xref = QPDFXRefEntry(pipeline->getCount());
1360
248k
    write(objid).write(" 0 obj\n");
1361
248k
    return objid;
1362
248k
}
1363
1364
void
1365
impl::Writer::closeObject(int objid)
1366
248k
{
1367
    // Write a newline before endobj as it makes the file easier to repair.
1368
248k
    write("\nendobj\n").write_qdf("\n");
1369
248k
    auto& no = new_obj[objid];
1370
248k
    no.length = pipeline->getCount() - no.xref.getOffset();
1371
248k
}
1372
1373
void
1374
impl::Writer::assignCompressedObjectNumbers(QPDFObjGen og)
1375
119k
{
1376
119k
    int objid = og.getObj();
1377
119k
    if (og.getGen() != 0 || !object_stream_to_objects.contains(objid)) {
1378
        // This is not an object stream.
1379
116k
        return;
1380
116k
    }
1381
1382
    // Reserve numbers for the objects that belong to this object stream.
1383
42.9k
    for (auto const& iter: object_stream_to_objects[objid]) {
1384
42.9k
        obj[iter].renumber = next_objid++;
1385
42.9k
    }
1386
3.22k
}
1387
1388
void
1389
impl::Writer::enqueue(QPDFObjectHandle const& object)
1390
118k
{
1391
118k
    if (object.indirect()) {
1392
118k
        util::assertion(
1393
            // This owner check can only be done for indirect objects. It is possible for a direct
1394
            // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle
1395
            // from one file was insert into another file without copying. Doing that is safe even
1396
            // if the original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from
1397
            // its owner.
1398
118k
            object.qpdf() == &qpdf,
1399
118k
            "QPDFObjectHandle from different QPDF found while writing.  "
1400
118k
            "Use QPDF::copyForeignObject to add objects from another file." //
1401
118k
        );
1402
1403
118k
        if (cfg.qdf() && object.isStreamOfType("/XRef")) {
1404
            // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so
1405
            // will confuse fix-qdf, which expects to see only one XRef stream at the end of the
1406
            // file. This case can occur when creating a QDF from a file with object streams when
1407
            // preserving unreferenced objects since the old cross reference streams are not
1408
            // actually referenced by object number.
1409
0
            return;
1410
0
        }
1411
1412
118k
        QPDFObjGen og = object.getObjGen();
1413
118k
        auto& o = obj[og];
1414
1415
118k
        if (o.renumber == 0) {
1416
118k
            if (o.object_stream > 0) {
1417
                // This is in an object stream.  Don't process it here.  Instead, enqueue the object
1418
                // stream.  Object streams always have generation 0.
1419
                // Detect loops by storing invalid object ID -1, which will get overwritten later.
1420
32
                o.renumber = -1;
1421
32
                enqueue(qpdf.getObject(o.object_stream, 0));
1422
118k
            } else {
1423
118k
                object_queue.emplace_back(object);
1424
118k
                o.renumber = next_objid++;
1425
1426
118k
                if (og.getGen() == 0 && object_stream_to_objects.contains(og.getObj())) {
1427
                    // For linearized files, uncompressed objects go at end, and we take care of
1428
                    // assigning numbers to them elsewhere.
1429
3.15k
                    if (!cfg.linearize()) {
1430
0
                        assignCompressedObjectNumbers(og);
1431
0
                    }
1432
114k
                } else if (!cfg.direct_stream_lengths() && object.isStream()) {
1433
                    // reserve next object ID for length
1434
0
                    ++next_objid;
1435
0
                }
1436
118k
            }
1437
118k
        }
1438
118k
        return;
1439
118k
    }
1440
1441
227
    if (cfg.linearize()) {
1442
227
        return;
1443
227
    }
1444
1445
0
    if (Array array = object) {
1446
0
        for (auto& item: array) {
1447
0
            enqueue(item);
1448
0
        }
1449
0
        return;
1450
0
    }
1451
1452
0
    for (auto const& item: Dictionary(object)) {
1453
0
        if (!item.second.null()) {
1454
0
            enqueue(item.second);
1455
0
        }
1456
0
    }
1457
0
}
1458
1459
void
1460
impl::Writer::unparseChild(QPDFObjectHandle const& child, size_t level, int flags)
1461
1.75M
{
1462
1.75M
    if (!cfg.linearize()) {
1463
0
        enqueue(child);
1464
0
    }
1465
1.75M
    if (child.indirect()) {
1466
379k
        write(obj[child].renumber).write(" 0 R");
1467
1.37M
    } else {
1468
1.37M
        unparseObject(child, level, flags);
1469
1.37M
    }
1470
1.75M
}
1471
1472
void
1473
impl::Writer::writeTrailer(
1474
    trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass)
1475
32.3k
{
1476
32.3k
    auto trailer = trimmed_trailer();
1477
32.3k
    if (xref_stream) {
1478
1.76k
        cur_data_key.clear();
1479
30.5k
    } else {
1480
30.5k
        write("trailer <<");
1481
30.5k
    }
1482
32.3k
    write_qdf("\n");
1483
32.3k
    if (which == t_lin_second) {
1484
15.9k
        write(" /Size ").write(size);
1485
16.3k
    } else {
1486
44.8k
        for (auto const& [key, value]: trailer) {
1487
44.8k
            if (value.null()) {
1488
12.5k
                continue;
1489
12.5k
            }
1490
32.2k
            write_qdf("  ").write_no_qdf(" ").write_name(key).write(" ");
1491
32.2k
            if (key == "/Size") {
1492
3.98k
                write(size);
1493
3.98k
                if (which == t_lin_first) {
1494
3.98k
                    write(" /Prev ");
1495
3.98k
                    qpdf_offset_t pos = pipeline->getCount();
1496
3.98k
                    write(prev).write(QIntC::to_size(pos - pipeline->getCount() + 21), ' ');
1497
3.98k
                }
1498
28.2k
            } else {
1499
28.2k
                unparseChild(value, 1, 0);
1500
28.2k
            }
1501
32.2k
            write_qdf("\n");
1502
32.2k
        }
1503
16.3k
    }
1504
1505
    // Write ID
1506
32.3k
    write_qdf(" ").write(" /ID [");
1507
32.3k
    if (linearization_pass == 1) {
1508
16.4k
        std::string original_id1 = getOriginalID1();
1509
16.4k
        if (original_id1.empty()) {
1510
14.8k
            write("<00000000000000000000000000000000>");
1511
14.8k
        } else {
1512
            // Write a string of zeroes equal in length to the representation of the original ID.
1513
            // While writing the original ID would have the same number of bytes, it would cause a
1514
            // change to the deterministic ID generated by older versions of the software that
1515
            // hard-coded the length of the ID to 16 bytes.
1516
1.58k
            size_t len = QPDF_String(original_id1).unparse(true).length() - 2;
1517
1.58k
            write("<").write(len, '0').write(">");
1518
1.58k
        }
1519
16.4k
        write("<00000000000000000000000000000000>");
1520
16.4k
    } else {
1521
15.9k
        if (linearization_pass == 0 && cfg.deterministic_id()) {
1522
0
            computeDeterministicIDData();
1523
0
        }
1524
15.9k
        generateID(encryption.get());
1525
15.9k
        write_string(id1, true).write_string(id2, true);
1526
15.9k
    }
1527
32.3k
    write("]");
1528
1529
32.3k
    if (which != t_lin_second) {
1530
        // Write reference to encryption dictionary
1531
16.3k
        if (encryption) {
1532
16.3k
            write(" /Encrypt ").write(encryption_dict_objid).write(" 0 R");
1533
16.3k
        }
1534
16.3k
    }
1535
1536
32.3k
    write_qdf("\n>>").write_no_qdf(" >>");
1537
32.3k
}
1538
1539
bool
1540
impl::Writer::will_filter_stream(QPDFObjectHandle stream)
1541
22.3k
{
1542
22.3k
    std::string s;
1543
22.3k
    [[maybe_unused]] auto [filter, ignore1, ignore2] = will_filter_stream(stream, &s);
1544
22.3k
    return filter;
1545
22.3k
}
1546
1547
std::tuple<const bool, const bool, const bool>
1548
impl::Writer::will_filter_stream(QPDFObjectHandle stream, std::string* stream_data)
1549
63.6k
{
1550
63.6k
    const bool is_root_metadata = stream.isRootMetadata();
1551
63.6k
    bool filter = false;
1552
63.6k
    auto decode_level = cfg.decode_level();
1553
63.6k
    int encode_flags = 0;
1554
63.6k
    Dictionary stream_dict = stream.getDict();
1555
1556
63.6k
    if (stream.getFilterOnWrite()) {
1557
48.6k
        filter = stream.isDataModified() || cfg.compress_streams() || decode_level != qpdf_dl_none;
1558
48.6k
        if (cfg.compress_streams()) {
1559
            // Don't filter if the stream is already compressed with FlateDecode. This way we don't
1560
            // make it worse if the original file used a better Flate algorithm, and we don't spend
1561
            // time and CPU cycles uncompressing and recompressing stuff. This can be overridden
1562
            // with setRecompressFlate(true).
1563
48.6k
            Name Filter = stream_dict["/Filter"];
1564
48.6k
            if (Filter && !cfg.recompress_flate() && !stream.isDataModified() &&
1565
19.4k
                (Filter == "/FlateDecode" || Filter == "/Fl")) {
1566
11.0k
                filter = false;
1567
11.0k
            }
1568
48.6k
        }
1569
48.6k
        if (is_root_metadata && (!encryption || !encryption->getEncryptMetadata())) {
1570
0
            filter = true;
1571
0
            decode_level = qpdf_dl_all;
1572
48.6k
        } else if (cfg.normalize_content() && normalized_streams.contains(stream)) {
1573
0
            encode_flags = qpdf_ef_normalize;
1574
0
            filter = true;
1575
48.6k
        } else if (filter && cfg.compress_streams()) {
1576
37.6k
            encode_flags = qpdf_ef_compress;
1577
37.6k
        }
1578
48.6k
    }
1579
1580
    // Disable compression for empty streams to improve compatibility
1581
63.6k
    if (Integer(stream_dict["/Length"]) == 0) {
1582
3.15k
        filter = true;
1583
3.15k
        encode_flags = 0;
1584
3.15k
    }
1585
1586
73.6k
    for (bool first_attempt: {true, false}) {
1587
73.6k
        auto pp_stream_data =
1588
73.6k
            stream_data ? pipeline_stack.activate(*stream_data) : pipeline_stack.activate(true);
1589
1590
73.6k
        try {
1591
73.6k
            if (stream.pipeStreamData(
1592
73.6k
                    pipeline,
1593
73.6k
                    filter ? encode_flags : 0,
1594
73.6k
                    filter ? decode_level : qpdf_dl_none,
1595
73.6k
                    false,
1596
73.6k
                    first_attempt)) {
1597
29.8k
                return {true, encode_flags & qpdf_ef_compress, is_root_metadata};
1598
29.8k
            }
1599
43.7k
            if (!filter) {
1600
33.7k
                break;
1601
33.7k
            }
1602
43.7k
        } catch (std::runtime_error& e) {
1603
79
            if (!(filter && first_attempt)) {
1604
12
                throw std::runtime_error(
1605
12
                    "error while getting stream data for " + stream.unparse() + ": " + e.what());
1606
12
            }
1607
67
            stream.warn("error while getting stream data: "s + e.what());
1608
67
            stream.warn("qpdf will attempt to write the damaged stream unchanged");
1609
67
        }
1610
        // Try again
1611
9.99k
        filter = false;
1612
9.99k
        stream.setFilterOnWrite(false);
1613
9.99k
        if (stream_data) {
1614
9.99k
            stream_data->clear();
1615
9.99k
        }
1616
9.99k
    }
1617
33.7k
    return {false, false, is_root_metadata};
1618
63.6k
}
1619
1620
void
1621
impl::Writer::unparseObject(
1622
    QPDFObjectHandle object, size_t level, int flags, size_t stream_length, bool compress)
1623
1.69M
{
1624
1.69M
    QPDFObjGen old_og = object.getObjGen();
1625
1.69M
    int child_flags = flags & ~f_stream;
1626
    // For non-qdf, "indent" and "indent_large" are a single space between tokens. For qdf, they
1627
    // include the preceding newline.
1628
1.69M
    std::string indent_large = " ";
1629
1.69M
    if (cfg.qdf()) {
1630
0
        indent_large.append(2 * (level + 1), ' ');
1631
0
        indent_large[0] = '\n';
1632
0
    }
1633
1.69M
    std::string_view indent{indent_large.data(), cfg.qdf() ? indent_large.size() - 2 : 1};
1634
1635
1.69M
    if (auto const tc = object.getTypeCode(); tc == ::ot_array) {
1636
        // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the
1637
        // [ in the /H key of the linearization parameter dictionary.  We'll do this unconditionally
1638
        // for all arrays because it looks nicer and doesn't make the files that much bigger.
1639
126k
        write("[");
1640
906k
        for (auto const& item: object.as_array()) {
1641
906k
            write(indent_large);
1642
906k
            unparseChild(item, level + 1, child_flags);
1643
906k
        }
1644
126k
        write(indent).write("]");
1645
1.56M
    } else if (tc == ::ot_dictionary) {
1646
        // Handle special cases for specific dictionaries.
1647
1648
275k
        if (old_og == root_og) {
1649
            // Extensions dictionaries.
1650
1651
            // We have one of several cases:
1652
            //
1653
            // * We need ADBE
1654
            //    - We already have Extensions
1655
            //       - If it has the right ADBE, preserve it
1656
            //       - Otherwise, replace ADBE
1657
            //    - We don't have Extensions: create one from scratch
1658
            // * We don't want ADBE
1659
            //    - We already have Extensions
1660
            //       - If it only has ADBE, remove it
1661
            //       - If it has other things, keep those and remove ADBE
1662
            //    - We have no extensions: no action required
1663
            //
1664
            // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE
1665
            // dictionary, so we can modify in place.
1666
1667
16.3k
            auto extensions = object.getKey("/Extensions");
1668
16.3k
            const bool has_extensions = extensions.isDictionary();
1669
16.3k
            const bool need_extensions_adbe = final_extension_level > 0;
1670
1671
16.3k
            if (has_extensions || need_extensions_adbe) {
1672
                // Make a shallow copy of this object so we can modify it safely without affecting
1673
                // the original. This code has logic to skip certain keys in agreement with
1674
                // prepareFileForWrite and with skip_stream_parameters so that replacing them
1675
                // doesn't leave unreferenced objects in the output. We can use unsafeShallowCopy
1676
                // here because all we are doing is removing or replacing top-level keys.
1677
15.7k
                object = object.unsafeShallowCopy();
1678
15.7k
                if (!has_extensions) {
1679
15.1k
                    extensions = QPDFObjectHandle();
1680
15.1k
                }
1681
1682
15.7k
                const bool have_extensions_adbe = extensions && extensions.hasKey("/ADBE");
1683
15.7k
                const bool have_extensions_other =
1684
15.7k
                    extensions && extensions.getKeys().size() > (have_extensions_adbe ? 1u : 0u);
1685
1686
15.7k
                if (need_extensions_adbe) {
1687
15.7k
                    if (!(have_extensions_other || have_extensions_adbe)) {
1688
                        // We need Extensions and don't have it.  Create it here.
1689
15.1k
                        QTC::TC("qpdf", "QPDFWriter create Extensions", cfg.qdf() ? 0 : 1);
1690
15.1k
                        extensions = object.replaceKeyAndGetNew(
1691
15.1k
                            "/Extensions", QPDFObjectHandle::newDictionary());
1692
15.1k
                    }
1693
15.7k
                } else if (!have_extensions_other) {
1694
                    // We have Extensions dictionary and don't want one.
1695
6
                    if (have_extensions_adbe) {
1696
3
                        QTC::TC("qpdf", "QPDFWriter remove existing Extensions");
1697
3
                        object.removeKey("/Extensions");
1698
3
                        extensions = QPDFObjectHandle(); // uninitialized
1699
3
                    }
1700
6
                }
1701
1702
15.7k
                if (extensions) {
1703
15.7k
                    QTC::TC("qpdf", "QPDFWriter preserve Extensions");
1704
15.7k
                    QPDFObjectHandle adbe = extensions.getKey("/ADBE");
1705
15.7k
                    if (adbe.isDictionary() &&
1706
415
                        adbe.getKey("/BaseVersion").isNameAndEquals("/" + final_pdf_version) &&
1707
289
                        adbe.getKey("/ExtensionLevel").isInteger() &&
1708
288
                        (adbe.getKey("/ExtensionLevel").getIntValue() == final_extension_level)) {
1709
15.4k
                    } else {
1710
15.4k
                        if (need_extensions_adbe) {
1711
15.4k
                            extensions.replaceKey(
1712
15.4k
                                "/ADBE",
1713
15.4k
                                QPDFObjectHandle::parse(
1714
15.4k
                                    "<< /BaseVersion /" + final_pdf_version + " /ExtensionLevel " +
1715
15.4k
                                    std::to_string(final_extension_level) + " >>"));
1716
15.4k
                        } else {
1717
22
                            extensions.removeKey("/ADBE");
1718
22
                        }
1719
15.4k
                    }
1720
15.7k
                }
1721
15.7k
            }
1722
16.3k
        }
1723
1724
        // Stream dictionaries.
1725
1726
275k
        if (flags & f_stream) {
1727
            // Suppress /Length since we will write it manually
1728
1729
            // Make a shallow copy of this object so we can modify it safely without affecting the
1730
            // original. This code has logic to skip certain keys in agreement with
1731
            // prepareFileForWrite and with skip_stream_parameters so that replacing them doesn't
1732
            // leave unreferenced objects in the output. We can use unsafeShallowCopy here because
1733
            // all we are doing is removing or replacing top-level keys.
1734
41.2k
            object = object.unsafeShallowCopy();
1735
1736
41.2k
            object.removeKey("/Length");
1737
1738
            // If /DecodeParms is an empty list, remove it.
1739
41.2k
            if (object.getKey("/DecodeParms").empty()) {
1740
38.9k
                object.removeKey("/DecodeParms");
1741
38.9k
            }
1742
1743
41.2k
            if (flags & f_filtered) {
1744
                // We will supply our own filter and decode parameters.
1745
19.4k
                object.removeKey("/Filter");
1746
19.4k
                object.removeKey("/DecodeParms");
1747
21.7k
            } else {
1748
                // Make sure, no matter what else we have, that we don't have /Crypt in the output
1749
                // filters.
1750
21.7k
                QPDFObjectHandle filter = object.getKey("/Filter");
1751
21.7k
                QPDFObjectHandle decode_parms = object.getKey("/DecodeParms");
1752
21.7k
                if (filter.isOrHasName("/Crypt")) {
1753
383
                    if (filter.isName()) {
1754
38
                        object.removeKey("/Filter");
1755
38
                        object.removeKey("/DecodeParms");
1756
345
                    } else {
1757
345
                        int idx = 0;
1758
9.71k
                        for (auto const& item: filter.as_array()) {
1759
9.71k
                            if (item.isNameAndEquals("/Crypt")) {
1760
                                // If filter is an array, then the code in QPDF_Stream has already
1761
                                // verified that DecodeParms and Filters are arrays of the same
1762
                                // length, but if they weren't for some reason, eraseItem does type
1763
                                // and bounds checking. Fuzzing tells us that this can actually
1764
                                // happen.
1765
345
                                filter.eraseItem(idx);
1766
345
                                decode_parms.eraseItem(idx);
1767
345
                                break;
1768
345
                            }
1769
9.36k
                            ++idx;
1770
9.36k
                        }
1771
345
                    }
1772
383
                }
1773
21.7k
            }
1774
41.2k
        }
1775
1776
275k
        write("<<");
1777
1778
962k
        for (auto const& [key, value]: object.as_dictionary()) {
1779
962k
            if (!value.null()) {
1780
820k
                write(indent_large).write_name(key).write(" ");
1781
820k
                if (key == "/Contents" && object.isDictionaryOfType("/Sig") &&
1782
26
                    object.hasKey("/ByteRange")) {
1783
22
                    QTC::TC("qpdf", "QPDFWriter no encryption sig contents");
1784
22
                    unparseChild(value, level + 1, child_flags | f_hex_string | f_no_encryption);
1785
820k
                } else {
1786
820k
                    unparseChild(value, level + 1, child_flags);
1787
820k
                }
1788
820k
            }
1789
962k
        }
1790
1791
275k
        if (flags & f_stream) {
1792
40.9k
            write(indent_large).write("/Length ");
1793
1794
40.9k
            if (cfg.direct_stream_lengths()) {
1795
40.9k
                write(stream_length);
1796
40.9k
            } else {
1797
0
                write(cur_stream_length_id).write(" 0 R");
1798
0
            }
1799
40.9k
            if (compress && (flags & f_filtered)) {
1800
19.2k
                write(indent_large).write("/Filter /FlateDecode");
1801
19.2k
            }
1802
40.9k
        }
1803
1804
275k
        write(indent).write(">>");
1805
1.28M
    } else if (tc == ::ot_stream) {
1806
        // Write stream data to a buffer.
1807
41.2k
        if (!cfg.direct_stream_lengths()) {
1808
0
            cur_stream_length_id = obj[old_og].renumber + 1;
1809
0
        }
1810
1811
41.2k
        flags |= f_stream;
1812
41.2k
        std::string stream_data;
1813
41.2k
        auto [filter, compress_stream, is_root_metadata] = will_filter_stream(object, &stream_data);
1814
41.2k
        if (filter) {
1815
19.4k
            flags |= f_filtered;
1816
19.4k
        }
1817
41.2k
        QPDFObjectHandle stream_dict = object.getDict();
1818
1819
41.2k
        cur_stream_length = stream_data.size();
1820
41.2k
        if (is_root_metadata && encryption && !encryption->getEncryptMetadata()) {
1821
            // Don't encrypt stream data for the metadata stream
1822
0
            cur_data_key.clear();
1823
0
        }
1824
41.2k
        adjustAESStreamLength(cur_stream_length);
1825
41.2k
        unparseObject(stream_dict, 0, flags, cur_stream_length, compress_stream);
1826
41.2k
        char last_char = stream_data.empty() ? '\0' : stream_data.back();
1827
41.2k
        write("\nstream\n").write_encrypted(stream_data);
1828
41.2k
        added_newline = cfg.newline_before_endstream() || (cfg.qdf() && last_char != '\n');
1829
41.2k
        write(added_newline ? "\nendstream" : "endstream");
1830
1.24M
    } else if (tc == ::ot_string) {
1831
35.7k
        std::string val;
1832
35.7k
        if (encryption && !(flags & f_in_ostream) && !(flags & f_no_encryption) &&
1833
29.0k
            !cur_data_key.empty()) {
1834
26.8k
            val = object.getStringValue();
1835
26.8k
            if (cfg.encrypt_use_aes()) {
1836
26.8k
                Pl_Buffer bufpl("encrypted string");
1837
26.8k
                Pl_AES_PDF pl("aes encrypt string", &bufpl, true, cur_data_key);
1838
26.8k
                pl.writeString(val);
1839
26.8k
                pl.finish();
1840
26.8k
                val = QPDF_String(bufpl.getString()).unparse(true);
1841
26.8k
            } else {
1842
0
                auto tmp_ph = QUtil::make_unique_cstr(val);
1843
0
                char* tmp = tmp_ph.get();
1844
0
                size_t vlen = val.length();
1845
0
                RC4 rc4(
1846
0
                    QUtil::unsigned_char_pointer(cur_data_key),
1847
0
                    QIntC::to_int(cur_data_key.length()));
1848
0
                auto data = QUtil::unsigned_char_pointer(tmp);
1849
0
                rc4.process(data, vlen, data);
1850
0
                val = QPDF_String(std::string(tmp, vlen)).unparse();
1851
0
            }
1852
26.8k
        } else if (flags & f_hex_string) {
1853
22
            val = QPDF_String(object.getStringValue()).unparse(true);
1854
8.94k
        } else {
1855
8.94k
            val = object.unparseResolved();
1856
8.94k
        }
1857
35.7k
        write(val);
1858
1.21M
    } else {
1859
1.21M
        write(object.unparseResolved());
1860
1.21M
    }
1861
1.69M
}
1862
1863
void
1864
impl::Writer::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj)
1865
9.99k
{
1866
9.99k
    qpdf_assert_debug(first_obj > 0);
1867
9.99k
    bool is_first = true;
1868
9.99k
    auto id = std::to_string(first_obj) + ' ';
1869
146k
    for (auto& offset: offsets) {
1870
146k
        if (is_first) {
1871
9.99k
            is_first = false;
1872
136k
        } else {
1873
136k
            write_qdf("\n").write_no_qdf(" ");
1874
136k
        }
1875
146k
        write(id);
1876
146k
        util::increment(id, 1);
1877
146k
        write(offset);
1878
146k
    }
1879
9.99k
    write("\n");
1880
9.99k
}
1881
1882
void
1883
impl::Writer::writeObjectStream(QPDFObjectHandle object)
1884
5.00k
{
1885
    // Note: object might be null if this is a place-holder for an object stream that we are
1886
    // generating from scratch.
1887
1888
5.00k
    QPDFObjGen old_og = object.getObjGen();
1889
5.00k
    qpdf_assert_debug(old_og.getGen() == 0);
1890
5.00k
    int old_id = old_og.getObj();
1891
5.00k
    int new_stream_id = obj[old_og].renumber;
1892
1893
5.00k
    std::vector<qpdf_offset_t> offsets;
1894
5.00k
    qpdf_offset_t first = 0;
1895
1896
    // Generate stream itself.  We have to do this in two passes so we can calculate offsets in the
1897
    // first pass.
1898
5.00k
    std::string stream_buffer_pass1;
1899
5.00k
    std::string stream_buffer_pass2;
1900
5.00k
    int first_obj = -1;
1901
5.00k
    const bool compressed = cfg.compress_streams() && !cfg.qdf();
1902
5.00k
    {
1903
        // Pass 1
1904
5.00k
        auto pp_ostream_pass1 = pipeline_stack.activate(stream_buffer_pass1);
1905
1906
5.00k
        int count = -1;
1907
73.3k
        for (auto const& og: object_stream_to_objects[old_id]) {
1908
73.3k
            ++count;
1909
73.3k
            int new_o = obj[og].renumber;
1910
73.3k
            if (first_obj == -1) {
1911
5.00k
                first_obj = new_o;
1912
5.00k
            }
1913
73.3k
            if (cfg.qdf()) {
1914
0
                write("%% Object stream: object ").write(new_o).write(", index ").write(count);
1915
0
                if (!cfg.no_original_object_ids()) {
1916
0
                    write("; original object ID: ").write(og.getObj());
1917
                    // For compatibility, only write the generation if non-zero.  While object
1918
                    // streams only allow objects with generation 0, if we are generating object
1919
                    // streams, the old object could have a non-zero generation.
1920
0
                    if (og.getGen() != 0) {
1921
0
                        write(" ").write(og.getGen());
1922
0
                    }
1923
0
                }
1924
0
                write("\n");
1925
0
            }
1926
1927
73.3k
            offsets.push_back(pipeline->getCount());
1928
            // To avoid double-counting objects being written in object streams for progress
1929
            // reporting, decrement in pass 1.
1930
73.3k
            indicateProgress(true, false);
1931
1932
73.3k
            QPDFObjectHandle obj_to_write = qpdf.getObject(og);
1933
73.3k
            if (obj_to_write.isStream()) {
1934
                // This condition occurred in a fuzz input. Ideally we should block it at parse
1935
                // time, but it's not clear to me how to construct a case for this.
1936
0
                obj_to_write.warn("stream found inside object stream; treating as null");
1937
0
                obj_to_write = QPDFObjectHandle::newNull();
1938
0
            }
1939
73.3k
            writeObject(obj_to_write, count);
1940
1941
73.3k
            new_obj[new_o].xref = QPDFXRefEntry(new_stream_id, count);
1942
73.3k
        }
1943
5.00k
    }
1944
5.00k
    {
1945
        // Adjust offsets to skip over comment before first object
1946
5.00k
        first = offsets.at(0);
1947
73.3k
        for (auto& iter: offsets) {
1948
73.3k
            iter -= first;
1949
73.3k
        }
1950
1951
        // Take one pass at writing pairs of numbers so we can get their size information
1952
5.00k
        {
1953
5.00k
            auto pp_discard = pipeline_stack.activate(true);
1954
5.00k
            writeObjectStreamOffsets(offsets, first_obj);
1955
5.00k
            first += pipeline->getCount();
1956
5.00k
        }
1957
1958
        // Set up a stream to write the stream data into a buffer.
1959
5.00k
        auto pp_ostream = pipeline_stack.activate(stream_buffer_pass2);
1960
1961
5.00k
        writeObjectStreamOffsets(offsets, first_obj);
1962
5.00k
        write(stream_buffer_pass1);
1963
5.00k
        stream_buffer_pass1.clear();
1964
5.00k
        stream_buffer_pass1.shrink_to_fit();
1965
5.00k
        if (compressed) {
1966
4.99k
            stream_buffer_pass2 = pl::pipe<Pl_Flate>(stream_buffer_pass2, Pl_Flate::a_deflate);
1967
4.99k
        }
1968
5.00k
    }
1969
1970
    // Write the object
1971
5.00k
    openObject(new_stream_id);
1972
5.00k
    setDataKey(new_stream_id);
1973
5.00k
    write("<<").write_qdf("\n ").write(" /Type /ObjStm").write_qdf("\n ");
1974
5.00k
    size_t length = stream_buffer_pass2.size();
1975
5.00k
    adjustAESStreamLength(length);
1976
5.00k
    write(" /Length ").write(length).write_qdf("\n ");
1977
5.00k
    if (compressed) {
1978
4.99k
        write(" /Filter /FlateDecode");
1979
4.99k
    }
1980
5.00k
    write(" /N ").write(offsets.size()).write_qdf("\n ").write(" /First ").write(first);
1981
5.00k
    if (!object.null()) {
1982
        // If the original object has an /Extends key, preserve it.
1983
1.39k
        QPDFObjectHandle dict = object.getDict();
1984
1.39k
        QPDFObjectHandle extends = dict.getKey("/Extends");
1985
1.39k
        if (extends.isIndirect()) {
1986
416
            write_qdf("\n ").write(" /Extends ");
1987
416
            unparseChild(extends, 1, f_in_ostream);
1988
416
        }
1989
1.39k
    }
1990
5.00k
    write_qdf("\n").write_no_qdf(" ").write(">>\nstream\n").write_encrypted(stream_buffer_pass2);
1991
5.00k
    write(cfg.newline_before_endstream() ? "\nendstream" : "endstream");
1992
5.00k
    if (encryption) {
1993
4.85k
        cur_data_key.clear();
1994
4.85k
    }
1995
5.00k
    closeObject(new_stream_id);
1996
5.00k
}
1997
1998
void
1999
impl::Writer::writeObject(QPDFObjectHandle object, int object_stream_index)
2000
279k
{
2001
279k
    QPDFObjGen old_og = object.getObjGen();
2002
2003
279k
    if (object_stream_index == -1 && old_og.getGen() == 0 &&
2004
204k
        object_stream_to_objects.contains(old_og.getObj())) {
2005
5.00k
        writeObjectStream(object);
2006
5.00k
        return;
2007
5.00k
    }
2008
2009
274k
    indicateProgress(false, false);
2010
274k
    auto new_id = obj[old_og].renumber;
2011
274k
    if (cfg.qdf()) {
2012
0
        if (page_object_to_seq.contains(old_og)) {
2013
0
            write("%% Page ").write(page_object_to_seq[old_og]).write("\n");
2014
0
        }
2015
0
        if (contents_to_page_seq.contains(old_og)) {
2016
0
            write("%% Contents for page ").write(contents_to_page_seq[old_og]).write("\n");
2017
0
        }
2018
0
    }
2019
274k
    if (object_stream_index == -1) {
2020
201k
        if (cfg.qdf() && !cfg.no_original_object_ids()) {
2021
0
            write("%% Original object ID: ").write(object.getObjGen().unparse(' ')).write("\n");
2022
0
        }
2023
201k
        openObject(new_id);
2024
201k
        setDataKey(new_id);
2025
201k
        unparseObject(object, 0, 0);
2026
201k
        cur_data_key.clear();
2027
201k
        closeObject(new_id);
2028
201k
    } else {
2029
73.3k
        unparseObject(object, 0, f_in_ostream);
2030
73.3k
        write("\n");
2031
73.3k
    }
2032
2033
274k
    if (!cfg.direct_stream_lengths() && object.isStream()) {
2034
0
        if (cfg.qdf()) {
2035
0
            if (added_newline) {
2036
0
                write("%QDF: ignore_newline\n");
2037
0
            }
2038
0
        }
2039
0
        openObject(new_id + 1);
2040
0
        write(cur_stream_length);
2041
0
        closeObject(new_id + 1);
2042
0
    }
2043
274k
}
2044
2045
std::string
2046
impl::Writer::getOriginalID1()
2047
25.9k
{
2048
25.9k
    QPDFObjectHandle trailer = qpdf.getTrailer();
2049
25.9k
    if (trailer.hasKey("/ID")) {
2050
2.71k
        return trailer.getKey("/ID").getArrayItem(0).getStringValue();
2051
23.2k
    } else {
2052
23.2k
        return "";
2053
23.2k
    }
2054
25.9k
}
2055
2056
void
2057
impl::Writer::generateID(bool encrypted)
2058
25.4k
{
2059
    // Generate the ID lazily so that we can handle the user's preference to use static or
2060
    // deterministic ID generation.
2061
2062
25.4k
    if (!id2.empty()) {
2063
15.9k
        return;
2064
15.9k
    }
2065
2066
9.56k
    QPDFObjectHandle trailer = qpdf.getTrailer();
2067
2068
9.56k
    std::string result;
2069
2070
9.56k
    if (cfg.static_id()) {
2071
        // For test suite use only...
2072
9.56k
        static unsigned char tmp[] = {
2073
9.56k
            0x31,
2074
9.56k
            0x41,
2075
9.56k
            0x59,
2076
9.56k
            0x26,
2077
9.56k
            0x53,
2078
9.56k
            0x58,
2079
9.56k
            0x97,
2080
9.56k
            0x93,
2081
9.56k
            0x23,
2082
9.56k
            0x84,
2083
9.56k
            0x62,
2084
9.56k
            0x64,
2085
9.56k
            0x33,
2086
9.56k
            0x83,
2087
9.56k
            0x27,
2088
9.56k
            0x95,
2089
9.56k
            0x00};
2090
9.56k
        result = reinterpret_cast<char*>(tmp);
2091
9.56k
    } else {
2092
        // The PDF specification has guidelines for creating IDs, but it states clearly that the
2093
        // only thing that's really important is that it is very likely to be unique.  We can't
2094
        // really follow the guidelines in the spec exactly because we haven't written the file yet.
2095
        // This scheme should be fine though.  The deterministic ID case uses a digest of a
2096
        // sufficient portion of the file's contents such no two non-matching files would match in
2097
        // the subsets used for this computation.  Note that we explicitly omit the filename from
2098
        // the digest calculation for deterministic ID so that the same file converted with qpdf, in
2099
        // that case, would have the same ID regardless of the output file's name.
2100
2101
0
        std::string seed;
2102
0
        if (cfg.deterministic_id()) {
2103
0
            if (encrypted) {
2104
0
                throw std::runtime_error(
2105
0
                    "QPDFWriter: unable to generated a deterministic ID because the file to be "
2106
0
                    "written is encrypted (even though the file may not require a password)");
2107
0
            }
2108
0
            if (deterministic_id_data.empty()) {
2109
0
                throw std::logic_error(
2110
0
                    "INTERNAL ERROR: QPDFWriter::generateID has no data for deterministic ID");
2111
0
            }
2112
0
            seed += deterministic_id_data;
2113
0
        } else {
2114
0
            seed += std::to_string(QUtil::get_current_time());
2115
0
            seed += filename;
2116
0
            seed += " ";
2117
0
        }
2118
0
        seed += " QPDF ";
2119
0
        if (trailer.hasKey("/Info")) {
2120
0
            for (auto const& item: trailer.getKey("/Info").as_dictionary()) {
2121
0
                if (item.second.isString()) {
2122
0
                    seed += " ";
2123
0
                    seed += item.second.getStringValue();
2124
0
                }
2125
0
            }
2126
0
        }
2127
2128
0
        MD5 md5;
2129
0
        md5.encodeString(seed.c_str());
2130
0
        MD5::Digest digest;
2131
0
        md5.digest(digest);
2132
0
        result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest));
2133
0
    }
2134
2135
    // If /ID already exists, follow the spec: use the original first word and generate a new second
2136
    // word.  Otherwise, we'll use the generated ID for both.
2137
2138
9.56k
    id2 = result;
2139
    // Note: keep /ID from old file even if --static-id was given.
2140
9.56k
    id1 = getOriginalID1();
2141
9.56k
    if (id1.empty()) {
2142
8.53k
        id1 = id2;
2143
8.53k
    }
2144
9.56k
}
2145
2146
void
2147
impl::Writer::initializeSpecialStreams()
2148
0
{
2149
    // Mark all page content streams in case we are filtering or normalizing.
2150
0
    int num = 0;
2151
0
    for (auto& page: pages) {
2152
0
        page_object_to_seq[page.getObjGen()] = ++num;
2153
0
        QPDFObjectHandle contents = page.getKey("/Contents");
2154
0
        std::vector<QPDFObjGen> contents_objects;
2155
0
        if (contents.isArray()) {
2156
0
            int n = static_cast<int>(contents.size());
2157
0
            for (int i = 0; i < n; ++i) {
2158
0
                contents_objects.push_back(contents.getArrayItem(i).getObjGen());
2159
0
            }
2160
0
        } else if (contents.isStream()) {
2161
0
            contents_objects.push_back(contents.getObjGen());
2162
0
        }
2163
2164
0
        for (auto const& c: contents_objects) {
2165
0
            contents_to_page_seq[c] = num;
2166
0
            normalized_streams.insert(c);
2167
0
        }
2168
0
    }
2169
0
}
2170
2171
void
2172
impl::Writer::preserveObjectStreams()
2173
9.54k
{
2174
9.54k
    auto const& xref = objects.xref_table();
2175
    // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
2176
    // streams out of old objects that have generation numbers greater than zero. However in an
2177
    // existing PDF, all object stream objects and all objects in them must have generation 0
2178
    // because the PDF spec does not provide any way to do otherwise. This code filters out objects
2179
    // that are not allowed to be in object streams. In addition to removing objects that were
2180
    // erroneously included in object streams in the source PDF, it also prevents unreferenced
2181
    // objects from being included.
2182
9.54k
    auto end = xref.cend();
2183
9.54k
    obj.streams_empty = true;
2184
9.54k
    if (cfg.preserve_unreferenced()) {
2185
0
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
2186
0
            if (iter->second.getType() == 2) {
2187
                // Pdf contains object streams.
2188
0
                obj.streams_empty = false;
2189
0
                obj[iter->first].object_stream = iter->second.getObjStreamNumber();
2190
0
            }
2191
0
        }
2192
9.54k
    } else {
2193
        // Start by scanning for first compressed object in case we don't have any object streams to
2194
        // process.
2195
87.9k
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
2196
79.5k
            if (iter->second.getType() == 2) {
2197
                // Pdf contains object streams.
2198
1.17k
                obj.streams_empty = false;
2199
1.17k
                auto eligible = objects.compressible_set();
2200
                // The object pointed to by iter may be a previous generation, in which case it is
2201
                // removed by compressible_set. We need to restart the loop (while the object
2202
                // table may contain multiple generations of an object).
2203
308k
                for (iter = xref.cbegin(); iter != end; ++iter) {
2204
307k
                    if (iter->second.getType() == 2) {
2205
288k
                        auto id = static_cast<size_t>(iter->first.getObj());
2206
288k
                        if (id < eligible.size() && eligible[id]) {
2207
46.7k
                            obj[iter->first].object_stream = iter->second.getObjStreamNumber();
2208
242k
                        } else {
2209
242k
                            QTC::TC("qpdf", "QPDFWriter exclude from object stream");
2210
242k
                        }
2211
288k
                    }
2212
307k
                }
2213
1.17k
                return;
2214
1.17k
            }
2215
79.5k
        }
2216
9.54k
    }
2217
9.54k
}
2218
2219
void
2220
impl::Writer::generateObjectStreams()
2221
0
{
2222
    // Basic strategy: make a list of objects that can go into an object stream.  Then figure out
2223
    // how many object streams are needed so that we can distribute objects approximately evenly
2224
    // without having any object stream exceed 100 members.  We don't have to worry about linearized
2225
    // files here -- if the file is linearized, we take care of excluding things that aren't allowed
2226
    // here later.
2227
2228
    // This code doesn't do anything with /Extends.
2229
2230
0
    auto eligible = objects.compressible_vector();
2231
0
    size_t n_object_streams = (eligible.size() + 99U) / 100U;
2232
2233
0
    initializeTables(2U * n_object_streams);
2234
0
    if (n_object_streams == 0) {
2235
0
        obj.streams_empty = true;
2236
0
        return;
2237
0
    }
2238
0
    size_t n_per = eligible.size() / n_object_streams;
2239
0
    if (n_per * n_object_streams < eligible.size()) {
2240
0
        ++n_per;
2241
0
    }
2242
0
    unsigned int n = 0;
2243
0
    int cur_ostream = qpdf.newIndirectNull().getObjectID();
2244
0
    for (auto const& item: eligible) {
2245
0
        if (n == n_per) {
2246
0
            n = 0;
2247
            // Construct a new null object as the "original" object stream.  The rest of the code
2248
            // knows that this means we're creating the object stream from scratch.
2249
0
            cur_ostream = qpdf.newIndirectNull().getObjectID();
2250
0
        }
2251
0
        auto& o = obj[item];
2252
0
        o.object_stream = cur_ostream;
2253
0
        o.gen = item.getGen();
2254
0
        ++n;
2255
0
    }
2256
0
}
2257
2258
Dictionary
2259
impl::Writer::trimmed_trailer()
2260
32.3k
{
2261
    // Remove keys from the trailer that necessarily have to be replaced when writing the file.
2262
2263
32.3k
    Dictionary trailer = qpdf.getTrailer().unsafeShallowCopy();
2264
2265
    // Remove encryption keys
2266
32.3k
    trailer.erase("/ID");
2267
32.3k
    trailer.erase("/Encrypt");
2268
2269
    // Remove modification information
2270
32.3k
    trailer.erase("/Prev");
2271
2272
    // Remove all trailer keys that potentially come from a cross-reference stream
2273
32.3k
    trailer.erase("/Index");
2274
32.3k
    trailer.erase("/W");
2275
32.3k
    trailer.erase("/Length");
2276
32.3k
    trailer.erase("/Filter");
2277
32.3k
    trailer.erase("/DecodeParms");
2278
32.3k
    trailer.erase("/Type");
2279
32.3k
    trailer.erase("/XRefStm");
2280
2281
32.3k
    return trailer;
2282
32.3k
}
2283
2284
// Make document extension level information direct as required by the spec.
2285
void
2286
impl::Writer::prepareFileForWrite()
2287
9.47k
{
2288
9.47k
    qpdf.fixDanglingReferences();
2289
9.47k
    auto root = qpdf.getRoot();
2290
9.47k
    auto oh = root.getKey("/Extensions");
2291
9.47k
    if (oh.isDictionary()) {
2292
359
        const bool extensions_indirect = oh.isIndirect();
2293
359
        if (extensions_indirect) {
2294
112
            QTC::TC("qpdf", "QPDFWriter make Extensions direct");
2295
112
            oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy());
2296
112
        }
2297
359
        if (oh.hasKey("/ADBE")) {
2298
194
            auto adbe = oh.getKey("/ADBE");
2299
194
            if (adbe.isIndirect()) {
2300
137
                QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1);
2301
137
                adbe.makeDirect();
2302
137
                oh.replaceKey("/ADBE", adbe);
2303
137
            }
2304
194
        }
2305
359
    }
2306
9.47k
}
2307
2308
void
2309
impl::Writer::initializeTables(size_t extra)
2310
9.54k
{
2311
9.54k
    auto size = objects.table_size() + 100u + extra;
2312
9.54k
    obj.resize(size);
2313
9.54k
    new_obj.resize(size);
2314
9.54k
}
2315
2316
void
2317
impl::Writer::doWriteSetup()
2318
9.54k
{
2319
9.54k
    if (did_write_setup) {
2320
0
        return;
2321
0
    }
2322
9.54k
    did_write_setup = true;
2323
2324
    // Do preliminary setup
2325
2326
9.54k
    if (cfg.linearize()) {
2327
9.54k
        cfg.qdf(false);
2328
9.54k
    }
2329
2330
9.54k
    if (cfg.pclm()) {
2331
0
        encryption = nullptr;
2332
0
    }
2333
2334
9.54k
    if (encryption) {
2335
        // Encryption has been explicitly set
2336
9.54k
        cfg.preserve_encryption(false);
2337
9.54k
    } else if (cfg.normalize_content() || cfg.pclm()) {
2338
        // Encryption makes looking at contents pretty useless.  If the user explicitly encrypted
2339
        // though, we still obey that.
2340
0
        cfg.preserve_encryption(false);
2341
0
    }
2342
2343
9.54k
    if (cfg.preserve_encryption()) {
2344
0
        copyEncryptionParameters(qpdf);
2345
0
    }
2346
2347
9.54k
    if (!cfg.forced_pdf_version().empty()) {
2348
0
        int major = 0;
2349
0
        int minor = 0;
2350
0
        parseVersion(cfg.forced_pdf_version(), major, minor);
2351
0
        disableIncompatibleEncryption(major, minor, cfg.forced_extension_level());
2352
0
        if (compareVersions(major, minor, 1, 5) < 0) {
2353
0
            cfg.object_streams(qpdf_o_disable);
2354
0
        }
2355
0
    }
2356
2357
9.54k
    if (cfg.qdf() || cfg.normalize_content()) {
2358
0
        initializeSpecialStreams();
2359
0
    }
2360
2361
9.54k
    switch (cfg.object_streams()) {
2362
0
    case qpdf_o_disable:
2363
0
        initializeTables();
2364
0
        obj.streams_empty = true;
2365
0
        break;
2366
2367
9.54k
    case qpdf_o_preserve:
2368
9.54k
        initializeTables();
2369
9.54k
        preserveObjectStreams();
2370
9.54k
        break;
2371
2372
0
    case qpdf_o_generate:
2373
0
        generateObjectStreams();
2374
0
        break;
2375
9.54k
    }
2376
2377
9.53k
    if (!obj.streams_empty) {
2378
1.16k
        if (cfg.linearize()) {
2379
            // Page dictionaries are not allowed to be compressed objects.
2380
1.53k
            for (auto& page: pages) {
2381
1.53k
                if (obj[page].object_stream > 0) {
2382
100
                    obj[page].object_stream = 0;
2383
100
                }
2384
1.53k
            }
2385
1.16k
        }
2386
2387
1.16k
        if (cfg.linearize() || encryption) {
2388
            // The document catalog is not allowed to be compressed in cfg.linearized_ files either.
2389
            // It also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
2390
            // handle encrypted files with compressed document catalogs, so we disable them in that
2391
            // case as well.
2392
1.16k
            if (obj[root_og].object_stream > 0) {
2393
7
                obj[root_og].object_stream = 0;
2394
7
            }
2395
1.16k
        }
2396
2397
        // Generate reverse mapping from object stream to objects
2398
2.70M
        obj.forEach([this](auto id, auto const& item) -> void {
2399
2.70M
            if (item.object_stream > 0) {
2400
46.5k
                auto& vec = object_stream_to_objects[item.object_stream];
2401
46.5k
                vec.emplace_back(id, item.gen);
2402
46.5k
                if (max_ostream_index < vec.size()) {
2403
14.2k
                    ++max_ostream_index;
2404
14.2k
                }
2405
46.5k
            }
2406
2.70M
        });
2407
1.16k
        --max_ostream_index;
2408
2409
1.16k
        if (object_stream_to_objects.empty()) {
2410
412
            obj.streams_empty = true;
2411
753
        } else {
2412
753
            setMinimumPDFVersion("1.5");
2413
753
        }
2414
1.16k
    }
2415
2416
9.53k
    setMinimumPDFVersion(qpdf.getPDFVersion(), qpdf.getExtensionLevel());
2417
9.53k
    final_pdf_version = min_pdf_version;
2418
9.53k
    final_extension_level = min_extension_level;
2419
9.53k
    if (!cfg.forced_pdf_version().empty()) {
2420
0
        final_pdf_version = cfg.forced_pdf_version();
2421
0
        final_extension_level = cfg.forced_extension_level();
2422
0
    }
2423
9.53k
}
2424
2425
void
2426
QPDFWriter::write()
2427
9.54k
{
2428
9.54k
    m->write();
2429
9.54k
}
2430
2431
void
2432
impl::Writer::write()
2433
9.54k
{
2434
9.54k
    doWriteSetup();
2435
2436
    // Set up progress reporting. For linearized files, we write two passes. events_expected is an
2437
    // approximation, but it's good enough for progress reporting, which is mostly a guess anyway.
2438
9.54k
    events_expected = QIntC::to_int(qpdf.getObjectCount() * (cfg.linearize() ? 2 : 1));
2439
2440
9.54k
    prepareFileForWrite();
2441
2442
9.54k
    if (cfg.linearize()) {
2443
9.44k
        writeLinearized();
2444
9.44k
    } else {
2445
104
        writeStandard();
2446
104
    }
2447
2448
9.54k
    pipeline->finish();
2449
9.54k
    if (close_file) {
2450
0
        fclose(file);
2451
0
    }
2452
9.54k
    file = nullptr;
2453
9.54k
    if (buffer_pipeline) {
2454
0
        output_buffer = buffer_pipeline->getBuffer();
2455
0
        buffer_pipeline = nullptr;
2456
0
    }
2457
9.54k
    indicateProgress(false, true);
2458
9.54k
}
2459
2460
QPDFObjGen
2461
QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
2462
0
{
2463
0
    return {m->obj[og].renumber, 0};
2464
0
}
2465
2466
std::map<QPDFObjGen, QPDFXRefEntry>
2467
QPDFWriter::getWrittenXRefTable()
2468
0
{
2469
0
    return m->getWrittenXRefTable();
2470
0
}
2471
2472
std::map<QPDFObjGen, QPDFXRefEntry>
2473
impl::Writer::getWrittenXRefTable()
2474
0
{
2475
0
    std::map<QPDFObjGen, QPDFXRefEntry> result;
2476
2477
0
    auto it = result.begin();
2478
0
    new_obj.forEach([&it, &result](auto id, auto const& item) -> void {
2479
0
        if (item.xref.getType() != 0) {
2480
0
            it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref);
2481
0
        }
2482
0
    });
2483
0
    return result;
2484
0
}
2485
2486
void
2487
impl::Writer::enqueuePart(std::vector<QPDFObjectHandle>& part)
2488
43.2k
{
2489
118k
    for (auto const& oh: part) {
2490
118k
        enqueue(oh);
2491
118k
    }
2492
43.2k
}
2493
2494
void
2495
impl::Writer::writeEncryptionDictionary()
2496
16.3k
{
2497
16.3k
    encryption_dict_objid = openObject(encryption_dict_objid);
2498
16.3k
    auto& enc = *encryption;
2499
16.3k
    auto const V = enc.getV();
2500
2501
16.3k
    write("<<");
2502
16.3k
    if (V >= 4) {
2503
16.3k
        write(" /CF << /StdCF << /AuthEvent /DocOpen /CFM ");
2504
16.3k
        write(cfg.encrypt_use_aes() ? (V < 5 ? "/AESV2" : "/AESV3") : "/V2");
2505
        // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of
2506
        // MacOS won't open encrypted files without it.
2507
16.3k
        write(V < 5 ? " /Length 16 >> >>" : " /Length 32 >> >>");
2508
16.3k
        if (!encryption->getEncryptMetadata()) {
2509
0
            write(" /EncryptMetadata false");
2510
0
        }
2511
16.3k
    }
2512
16.3k
    write(" /Filter /Standard /Length ").write(enc.getLengthBytes() * 8);
2513
16.3k
    write(" /O ").write_string(enc.getO(), true);
2514
16.3k
    if (V >= 4) {
2515
16.3k
        write(" /OE ").write_string(enc.getOE(), true);
2516
16.3k
    }
2517
16.3k
    write(" /P ").write(enc.getP());
2518
16.3k
    if (V >= 5) {
2519
16.3k
        write(" /Perms ").write_string(enc.getPerms(), true);
2520
16.3k
    }
2521
16.3k
    write(" /R ").write(enc.getR());
2522
2523
16.3k
    if (V >= 4) {
2524
16.3k
        write(" /StmF /StdCF /StrF /StdCF");
2525
16.3k
    }
2526
16.3k
    write(" /U ").write_string(enc.getU(), true);
2527
16.3k
    if (V >= 4) {
2528
16.3k
        write(" /UE ").write_string(enc.getUE(), true);
2529
16.3k
    }
2530
16.3k
    write(" /V ").write(enc.getV()).write(" >>");
2531
16.3k
    closeObject(encryption_dict_objid);
2532
16.3k
}
2533
2534
std::string
2535
QPDFWriter::getFinalVersion()
2536
0
{
2537
0
    m->doWriteSetup();
2538
0
    return m->final_pdf_version;
2539
0
}
2540
2541
void
2542
impl::Writer::writeHeader()
2543
16.3k
{
2544
16.3k
    write("%PDF-").write(final_pdf_version);
2545
16.3k
    if (cfg.pclm()) {
2546
        // PCLm version
2547
0
        write("\n%PCLm 1.0\n");
2548
16.3k
    } else {
2549
        // This string of binary characters would not be valid UTF-8, so it really should be treated
2550
        // as binary.
2551
16.3k
        write("\n%\xbf\xf7\xa2\xfe\n");
2552
16.3k
    }
2553
16.3k
    write_qdf("%QDF-1.0\n\n");
2554
2555
    // Note: do not write extra header text here.  Linearized PDFs must include the entire
2556
    // linearization parameter dictionary within the first 1024 characters of the PDF file, so for
2557
    // linearized files, we have to write extra header text after the linearization parameter
2558
    // dictionary.
2559
16.3k
}
2560
2561
void
2562
impl::Writer::writeHintStream(int hint_id)
2563
7.96k
{
2564
7.96k
    std::string hint_buffer;
2565
7.96k
    int S = 0;
2566
7.96k
    int O = 0;
2567
7.96k
    bool compressed = cfg.compress_streams();
2568
7.96k
    lin.generateHintStream(new_obj, obj, hint_buffer, S, O, compressed);
2569
2570
7.96k
    openObject(hint_id);
2571
7.96k
    setDataKey(hint_id);
2572
2573
7.96k
    size_t hlen = hint_buffer.size();
2574
2575
7.96k
    write("<< ");
2576
7.96k
    if (compressed) {
2577
7.96k
        write("/Filter /FlateDecode ");
2578
7.96k
    }
2579
7.96k
    write("/S ").write(S);
2580
7.96k
    if (O) {
2581
220
        write(" /O ").write(O);
2582
220
    }
2583
7.96k
    adjustAESStreamLength(hlen);
2584
7.96k
    write(" /Length ").write(hlen);
2585
7.96k
    write(" >>\nstream\n").write_encrypted(hint_buffer);
2586
2587
7.96k
    if (encryption) {
2588
7.96k
        QTC::TC("qpdf", "QPDFWriter encrypted hint stream");
2589
7.96k
    }
2590
2591
7.96k
    write(hint_buffer.empty() || hint_buffer.back() != '\n' ? "\nendstream" : "endstream");
2592
7.96k
    closeObject(hint_id);
2593
7.96k
}
2594
2595
qpdf_offset_t
2596
impl::Writer::writeXRefTable(trailer_e which, int first, int last, int size)
2597
0
{
2598
    // There are too many extra arguments to replace overloaded function with defaults in the header
2599
    // file...too much risk of leaving something off.
2600
0
    return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0);
2601
0
}
2602
2603
qpdf_offset_t
2604
impl::Writer::writeXRefTable(
2605
    trailer_e which,
2606
    int first,
2607
    int last,
2608
    int size,
2609
    qpdf_offset_t prev,
2610
    bool suppress_offsets,
2611
    int hint_id,
2612
    qpdf_offset_t hint_offset,
2613
    qpdf_offset_t hint_length,
2614
    int linearization_pass)
2615
30.5k
{
2616
30.5k
    write("xref\n").write(first).write(" ").write(last - first + 1);
2617
30.5k
    qpdf_offset_t space_before_zero = pipeline->getCount();
2618
30.5k
    write("\n");
2619
30.5k
    if (first == 0) {
2620
15.1k
        write("0000000000 65535 f \n");
2621
15.1k
        ++first;
2622
15.1k
    }
2623
241k
    for (int i = first; i <= last; ++i) {
2624
211k
        qpdf_offset_t offset = 0;
2625
211k
        if (!suppress_offsets) {
2626
135k
            offset = new_obj[i].xref.getOffset();
2627
135k
            if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2628
37.5k
                offset += hint_length;
2629
37.5k
            }
2630
135k
        }
2631
211k
        write(QUtil::int_to_string(offset, 10)).write(" 00000 n \n");
2632
211k
    }
2633
30.5k
    writeTrailer(which, size, false, prev, linearization_pass);
2634
30.5k
    write("\n");
2635
30.5k
    return space_before_zero;
2636
30.5k
}
2637
2638
qpdf_offset_t
2639
impl::Writer::writeXRefStream(
2640
    int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size)
2641
0
{
2642
    // There are too many extra arguments to replace overloaded function with defaults in the header
2643
    // file...too much risk of leaving something off.
2644
0
    return writeXRefStream(
2645
0
        objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0);
2646
0
}
2647
2648
qpdf_offset_t
2649
impl::Writer::writeXRefStream(
2650
    int xref_id,
2651
    int max_id,
2652
    qpdf_offset_t max_offset,
2653
    trailer_e which,
2654
    int first,
2655
    int last,
2656
    int size,
2657
    qpdf_offset_t prev,
2658
    int hint_id,
2659
    qpdf_offset_t hint_offset,
2660
    qpdf_offset_t hint_length,
2661
    bool skip_compression,
2662
    int linearization_pass)
2663
1.76k
{
2664
1.76k
    qpdf_offset_t xref_offset = pipeline->getCount();
2665
1.76k
    qpdf_offset_t space_before_zero = xref_offset - 1;
2666
2667
    // field 1 contains offsets and object stream identifiers
2668
1.76k
    unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id));
2669
2670
    // field 2 contains object stream indices
2671
1.76k
    unsigned int f2_size = bytesNeeded(QIntC::to_longlong(max_ostream_index));
2672
2673
1.76k
    unsigned int esize = 1 + f1_size + f2_size;
2674
2675
    // Must store in xref table in advance of writing the actual data rather than waiting for
2676
    // openObject to do it.
2677
1.76k
    new_obj[xref_id].xref = QPDFXRefEntry(pipeline->getCount());
2678
2679
1.76k
    std::string xref_data;
2680
1.76k
    const bool compressed = cfg.compress_streams() && !cfg.qdf();
2681
1.76k
    {
2682
1.76k
        auto pp_xref = pipeline_stack.activate(xref_data);
2683
2684
123k
        for (int i = first; i <= last; ++i) {
2685
122k
            QPDFXRefEntry& e = new_obj[i].xref;
2686
122k
            switch (e.getType()) {
2687
29.4k
            case 0:
2688
29.4k
                writeBinary(0, 1);
2689
29.4k
                writeBinary(0, f1_size);
2690
29.4k
                writeBinary(0, f2_size);
2691
29.4k
                break;
2692
2693
28.9k
            case 1:
2694
28.9k
                {
2695
28.9k
                    qpdf_offset_t offset = e.getOffset();
2696
28.9k
                    if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2697
9.51k
                        offset += hint_length;
2698
9.51k
                    }
2699
28.9k
                    writeBinary(1, 1);
2700
28.9k
                    writeBinary(QIntC::to_ulonglong(offset), f1_size);
2701
28.9k
                    writeBinary(0, f2_size);
2702
28.9k
                }
2703
28.9k
                break;
2704
2705
63.6k
            case 2:
2706
63.6k
                writeBinary(2, 1);
2707
63.6k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size);
2708
63.6k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size);
2709
63.6k
                break;
2710
2711
0
            default:
2712
0
                throw std::logic_error("invalid type writing xref stream");
2713
0
                break;
2714
122k
            }
2715
122k
        }
2716
1.76k
    }
2717
2718
1.76k
    if (compressed) {
2719
1.76k
        xref_data = pl::pipe<Pl_PNGFilter>(xref_data, Pl_PNGFilter::a_encode, esize);
2720
1.76k
        if (!skip_compression) {
2721
            // Write the stream dictionary for compression but don't actually compress.  This
2722
            // helps us with computation of padding for pass 1 of linearization.
2723
800
            xref_data = pl::pipe<Pl_Flate>(xref_data, Pl_Flate::a_deflate);
2724
800
        }
2725
1.76k
    }
2726
2727
1.76k
    openObject(xref_id);
2728
1.76k
    write("<<").write_qdf("\n ").write(" /Type /XRef").write_qdf("\n ");
2729
1.76k
    write(" /Length ").write(xref_data.size());
2730
1.76k
    if (compressed) {
2731
1.76k
        write_qdf("\n ").write(" /Filter /FlateDecode").write_qdf("\n ");
2732
1.76k
        write(" /DecodeParms << /Columns ").write(esize).write(" /Predictor 12 >>");
2733
1.76k
    }
2734
1.76k
    write_qdf("\n ").write(" /W [ 1 ").write(f1_size).write(" ").write(f2_size).write(" ]");
2735
1.76k
    if (!(first == 0 && last == (size - 1))) {
2736
965
        write(" /Index [ ").write(first).write(" ").write(last - first + 1).write(" ]");
2737
965
    }
2738
1.76k
    writeTrailer(which, size, true, prev, linearization_pass);
2739
1.76k
    write("\nstream\n").write(xref_data).write("\nendstream");
2740
1.76k
    closeObject(xref_id);
2741
1.76k
    return space_before_zero;
2742
1.76k
}
2743
2744
size_t
2745
impl::Writer::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
2746
964
{
2747
    // This routine is called right after a linearization first pass xref stream has been written
2748
    // without compression.  Calculate the amount of padding that would be required in the worst
2749
    // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is
2750
    // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add
2751
    // 10 extra bytes for number length increases.
2752
2753
964
    return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384)));
2754
964
}
2755
2756
void
2757
impl::Writer::writeLinearized()
2758
9.44k
{
2759
    // Optimize file and enqueue objects in order
2760
2761
9.44k
    std::map<int, int> stream_cache;
2762
2763
44.7k
    auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) {
2764
44.7k
        if (auto& result = stream_cache[stream.getObjectID()]) {
2765
22.4k
            return result;
2766
22.4k
        } else {
2767
22.3k
            return result = will_filter_stream(stream) ? 2 : 1;
2768
22.3k
        }
2769
44.7k
    };
2770
2771
9.44k
    lin.optimize(obj, skip_stream_parameters);
2772
2773
9.44k
    std::vector<QPDFObjectHandle> part4;
2774
9.44k
    std::vector<QPDFObjectHandle> part6;
2775
9.44k
    std::vector<QPDFObjectHandle> part7;
2776
9.44k
    std::vector<QPDFObjectHandle> part8;
2777
9.44k
    std::vector<QPDFObjectHandle> part9;
2778
9.44k
    lin.parts(obj, part4, part6, part7, part8, part9);
2779
2780
    // Object number sequence:
2781
    //
2782
    //  second half
2783
    //    second half uncompressed objects
2784
    //    second half xref stream, if any
2785
    //    second half compressed objects
2786
    //  first half
2787
    //    linearization dictionary
2788
    //    first half xref stream, if any
2789
    //    part 4 uncompresesd objects
2790
    //    encryption dictionary, if any
2791
    //    hint stream
2792
    //    part 6 uncompressed objects
2793
    //    first half compressed objects
2794
    //
2795
2796
    // Second half objects
2797
9.44k
    int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size());
2798
9.44k
    int second_half_first_obj = 1;
2799
9.44k
    int after_second_half = 1 + second_half_uncompressed;
2800
9.44k
    next_objid = after_second_half;
2801
9.44k
    int second_half_xref = 0;
2802
9.44k
    bool need_xref_stream = !obj.streams_empty;
2803
9.44k
    if (need_xref_stream) {
2804
626
        second_half_xref = next_objid++;
2805
626
    }
2806
    // Assign numbers to all compressed objects in the second half.
2807
9.44k
    std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9};
2808
35.8k
    for (int i = 0; i < 3; ++i) {
2809
42.7k
        for (auto const& oh: *vecs2[i]) {
2810
42.7k
            assignCompressedObjectNumbers(oh.getObjGen());
2811
42.7k
        }
2812
26.4k
    }
2813
9.44k
    int second_half_end = next_objid - 1;
2814
9.44k
    int second_trailer_size = next_objid;
2815
2816
    // First half objects
2817
9.44k
    int first_half_start = next_objid;
2818
9.44k
    int lindict_id = next_objid++;
2819
9.44k
    int first_half_xref = 0;
2820
9.44k
    if (need_xref_stream) {
2821
626
        first_half_xref = next_objid++;
2822
626
    }
2823
9.44k
    int part4_first_obj = next_objid;
2824
9.44k
    next_objid += QIntC::to_int(part4.size());
2825
9.44k
    int after_part4 = next_objid;
2826
9.44k
    if (encryption) {
2827
8.81k
        encryption_dict_objid = next_objid++;
2828
8.81k
    }
2829
9.44k
    int hint_id = next_objid++;
2830
9.44k
    int part6_first_obj = next_objid;
2831
9.44k
    next_objid += QIntC::to_int(part6.size());
2832
9.44k
    int after_part6 = next_objid;
2833
    // Assign numbers to all compressed objects in the first half
2834
9.44k
    std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6};
2835
27.0k
    for (int i = 0; i < 2; ++i) {
2836
76.5k
        for (auto const& oh: *vecs1[i]) {
2837
76.5k
            assignCompressedObjectNumbers(oh.getObjGen());
2838
76.5k
        }
2839
17.6k
    }
2840
9.44k
    int first_half_end = next_objid - 1;
2841
9.44k
    int first_trailer_size = next_objid;
2842
2843
9.44k
    int part4_end_marker = part4.back().getObjectID();
2844
9.44k
    int part6_end_marker = part6.back().getObjectID();
2845
9.44k
    qpdf_offset_t space_before_zero = 0;
2846
9.44k
    qpdf_offset_t file_size = 0;
2847
9.44k
    qpdf_offset_t part6_end_offset = 0;
2848
9.44k
    qpdf_offset_t first_half_max_obj_offset = 0;
2849
9.44k
    qpdf_offset_t second_xref_offset = 0;
2850
9.44k
    qpdf_offset_t first_xref_end = 0;
2851
9.44k
    qpdf_offset_t second_xref_end = 0;
2852
2853
9.44k
    next_objid = part4_first_obj;
2854
9.44k
    enqueuePart(part4);
2855
9.44k
    if (next_objid != after_part4) {
2856
        // This can happen with very botched files as in the fuzzer test. There are likely some
2857
        // faulty assumptions in calculateLinearizationData
2858
11
        throw std::runtime_error("error encountered after writing part 4 of linearized data");
2859
11
    }
2860
9.43k
    next_objid = part6_first_obj;
2861
9.43k
    enqueuePart(part6);
2862
9.43k
    if (next_objid != after_part6) {
2863
184
        throw std::runtime_error("error encountered after writing part 6 of linearized data");
2864
184
    }
2865
9.24k
    next_objid = second_half_first_obj;
2866
9.24k
    enqueuePart(part7);
2867
9.24k
    enqueuePart(part8);
2868
9.24k
    enqueuePart(part9);
2869
9.24k
    if (next_objid != after_second_half) {
2870
128
        throw std::runtime_error("error encountered after writing part 9 of cfg.linearized_ data");
2871
128
    }
2872
2873
9.11k
    qpdf_offset_t hint_length = 0;
2874
9.11k
    std::string hint_buffer;
2875
2876
    // Write file in two passes.  Part numbers refer to PDF spec 1.4.
2877
2878
9.11k
    FILE* lin_pass1_file = nullptr;
2879
9.11k
    auto pp_pass1 = pipeline_stack.popper();
2880
9.11k
    auto pp_md5 = pipeline_stack.popper();
2881
16.3k
    for (int pass: {1, 2}) {
2882
16.3k
        if (pass == 1) {
2883
8.43k
            if (!cfg.linearize_pass1().empty()) {
2884
0
                lin_pass1_file = QUtil::safe_fopen(cfg.linearize_pass1().data(), "wb");
2885
0
                pipeline_stack.activate(
2886
0
                    pp_pass1,
2887
0
                    std::make_unique<Pl_StdioFile>("linearization pass1", lin_pass1_file));
2888
8.43k
            } else {
2889
8.43k
                pipeline_stack.activate(pp_pass1, true);
2890
8.43k
            }
2891
8.43k
            if (cfg.deterministic_id()) {
2892
0
                pipeline_stack.activate_md5(pp_md5);
2893
0
            }
2894
8.43k
        }
2895
2896
        // Part 1: header
2897
2898
16.3k
        writeHeader();
2899
2900
        // Part 2: linearization parameter dictionary.  Save enough space to write real dictionary.
2901
        // 200 characters is enough space if all numerical values in the parameter dictionary that
2902
        // contain offsets are 20 digits long plus a few extra characters for safety.  The entire
2903
        // linearization parameter dictionary must appear within the first 1024 characters of the
2904
        // file.
2905
2906
16.3k
        qpdf_offset_t pos = pipeline->getCount();
2907
16.3k
        openObject(lindict_id);
2908
16.3k
        write("<<");
2909
16.3k
        if (pass == 2) {
2910
7.96k
            write(" /Linearized 1 /L ").write(file_size + hint_length);
2911
            // Implementation note 121 states that a space is mandatory after this open bracket.
2912
7.96k
            write(" /H [ ").write(new_obj[hint_id].xref.getOffset()).write(" ");
2913
7.96k
            write(hint_length);
2914
7.96k
            write(" ] /O ").write(obj[pages.all().at(0)].renumber);
2915
7.96k
            write(" /E ").write(part6_end_offset + hint_length);
2916
7.96k
            write(" /N ").write(pages.size());
2917
7.96k
            write(" /T ").write(space_before_zero + hint_length);
2918
7.96k
        }
2919
16.3k
        write(" >>");
2920
16.3k
        closeObject(lindict_id);
2921
16.3k
        static int const pad = 200;
2922
16.3k
        write(QIntC::to_size(pos - pipeline->getCount() + pad), ' ').write("\n");
2923
2924
        // If the user supplied any additional header text, write it here after the linearization
2925
        // parameter dictionary.
2926
16.3k
        write(cfg.extra_header_text());
2927
2928
        // Part 3: first page cross reference table and trailer.
2929
2930
16.3k
        qpdf_offset_t first_xref_offset = pipeline->getCount();
2931
16.3k
        qpdf_offset_t hint_offset = 0;
2932
16.3k
        if (pass == 2) {
2933
7.96k
            hint_offset = new_obj[hint_id].xref.getOffset();
2934
7.96k
        }
2935
16.3k
        if (need_xref_stream) {
2936
            // Must pad here too.
2937
965
            if (pass == 1) {
2938
                // Set first_half_max_obj_offset to a value large enough to force four bytes to be
2939
                // reserved for each file offset.  This would provide adequate space for the xref
2940
                // stream as long as the last object in page 1 starts with in the first 4 GB of the
2941
                // file, which is extremely likely.  In the second pass, we will know the actual
2942
                // value for this, but it's okay if it's smaller.
2943
563
                first_half_max_obj_offset = 1 << 25;
2944
563
            }
2945
965
            pos = pipeline->getCount();
2946
965
            writeXRefStream(
2947
965
                first_half_xref,
2948
965
                first_half_end,
2949
965
                first_half_max_obj_offset,
2950
965
                t_lin_first,
2951
965
                first_half_start,
2952
965
                first_half_end,
2953
965
                first_trailer_size,
2954
965
                hint_length + second_xref_offset,
2955
965
                hint_id,
2956
965
                hint_offset,
2957
965
                hint_length,
2958
965
                (pass == 1),
2959
965
                pass);
2960
965
            qpdf_offset_t endpos = pipeline->getCount();
2961
965
            if (pass == 1) {
2962
                // Pad so we have enough room for the real xref stream.
2963
562
                write(calculateXrefStreamPadding(endpos - pos), ' ');
2964
562
                first_xref_end = pipeline->getCount();
2965
562
            } else {
2966
                // Pad so that the next object starts at the same place as in pass 1.
2967
403
                write(QIntC::to_size(first_xref_end - endpos), ' ');
2968
2969
403
                if (pipeline->getCount() != first_xref_end) {
2970
0
                    throw std::logic_error(
2971
0
                        "insufficient padding for first pass xref stream; first_xref_end=" +
2972
0
                        std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos));
2973
0
                }
2974
403
            }
2975
965
            write("\n");
2976
15.4k
        } else {
2977
15.4k
            writeXRefTable(
2978
15.4k
                t_lin_first,
2979
15.4k
                first_half_start,
2980
15.4k
                first_half_end,
2981
15.4k
                first_trailer_size,
2982
15.4k
                hint_length + second_xref_offset,
2983
15.4k
                (pass == 1),
2984
15.4k
                hint_id,
2985
15.4k
                hint_offset,
2986
15.4k
                hint_length,
2987
15.4k
                pass);
2988
15.4k
            write("startxref\n0\n%%EOF\n");
2989
15.4k
        }
2990
2991
        // Parts 4 through 9
2992
2993
206k
        for (auto const& cur_object: object_queue) {
2994
206k
            if (cur_object.getObjectID() == part6_end_marker) {
2995
16.2k
                first_half_max_obj_offset = pipeline->getCount();
2996
16.2k
            }
2997
206k
            writeObject(cur_object);
2998
206k
            if (cur_object.getObjectID() == part4_end_marker) {
2999
16.3k
                if (encryption) {
3000
16.3k
                    writeEncryptionDictionary();
3001
16.3k
                }
3002
16.3k
                if (pass == 1) {
3003
8.36k
                    new_obj[hint_id].xref = QPDFXRefEntry(pipeline->getCount());
3004
8.36k
                } else {
3005
                    // Part 5: hint stream
3006
7.96k
                    write(hint_buffer);
3007
7.96k
                }
3008
16.3k
            }
3009
206k
            if (cur_object.getObjectID() == part6_end_marker) {
3010
15.9k
                part6_end_offset = pipeline->getCount();
3011
15.9k
            }
3012
206k
        }
3013
3014
        // Part 10: overflow hint stream -- not used
3015
3016
        // Part 11: main cross reference table and trailer
3017
3018
16.3k
        second_xref_offset = pipeline->getCount();
3019
16.3k
        if (need_xref_stream) {
3020
803
            pos = pipeline->getCount();
3021
803
            space_before_zero = writeXRefStream(
3022
803
                second_half_xref,
3023
803
                second_half_end,
3024
803
                second_xref_offset,
3025
803
                t_lin_second,
3026
803
                0,
3027
803
                second_half_end,
3028
803
                second_trailer_size,
3029
803
                0,
3030
803
                0,
3031
803
                0,
3032
803
                0,
3033
803
                (pass == 1),
3034
803
                pass);
3035
803
            qpdf_offset_t endpos = pipeline->getCount();
3036
3037
803
            if (pass == 1) {
3038
                // Pad so we have enough room for the real xref stream.  See comments for previous
3039
                // xref stream on how we calculate the padding.
3040
402
                write(calculateXrefStreamPadding(endpos - pos), ' ').write("\n");
3041
402
                second_xref_end = pipeline->getCount();
3042
402
            } else {
3043
                // Make the file size the same.
3044
401
                auto padding =
3045
401
                    QIntC::to_size(second_xref_end + hint_length - 1 - pipeline->getCount());
3046
401
                write(padding, ' ').write("\n");
3047
3048
                // If this assertion fails, maybe we didn't have enough padding above.
3049
401
                if (pipeline->getCount() != second_xref_end + hint_length) {
3050
0
                    throw std::logic_error(
3051
0
                        "count mismatch after xref stream; possible insufficient padding?");
3052
0
                }
3053
401
            }
3054
15.5k
        } else {
3055
15.5k
            space_before_zero = writeXRefTable(
3056
15.5k
                t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass);
3057
15.5k
        }
3058
16.3k
        write("startxref\n").write(first_xref_offset).write("\n%%EOF\n");
3059
3060
16.3k
        if (pass == 1) {
3061
7.96k
            if (cfg.deterministic_id()) {
3062
0
                QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1);
3063
0
                computeDeterministicIDData();
3064
0
                pp_md5.pop();
3065
0
            }
3066
3067
            // Close first pass pipeline
3068
7.96k
            file_size = pipeline->getCount();
3069
7.96k
            pp_pass1.pop();
3070
3071
            // Save hint offset since it will be set to zero by calling openObject.
3072
7.96k
            qpdf_offset_t hint_offset1 = new_obj[hint_id].xref.getOffset();
3073
3074
            // Write hint stream to a buffer
3075
7.96k
            {
3076
7.96k
                auto pp_hint = pipeline_stack.activate(hint_buffer);
3077
7.96k
                writeHintStream(hint_id);
3078
7.96k
            }
3079
7.96k
            hint_length = QIntC::to_offset(hint_buffer.size());
3080
3081
            // Restore hint offset
3082
7.96k
            new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1);
3083
7.96k
            if (lin_pass1_file) {
3084
                // Write some debugging information
3085
0
                fprintf(
3086
0
                    lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str());
3087
0
                fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str());
3088
0
                fprintf(
3089
0
                    lin_pass1_file,
3090
0
                    "%% second_xref_offset=%s\n",
3091
0
                    std::to_string(second_xref_offset).c_str());
3092
0
                fprintf(
3093
0
                    lin_pass1_file,
3094
0
                    "%% second_xref_end=%s\n",
3095
0
                    std::to_string(second_xref_end).c_str());
3096
0
                fclose(lin_pass1_file);
3097
0
                lin_pass1_file = nullptr;
3098
0
            }
3099
7.96k
        }
3100
16.3k
    }
3101
9.11k
}
3102
3103
void
3104
impl::Writer::enqueueObjectsStandard()
3105
0
{
3106
0
    if (cfg.preserve_unreferenced()) {
3107
0
        for (auto const& oh: qpdf.getAllObjects()) {
3108
0
            enqueue(oh);
3109
0
        }
3110
0
    }
3111
3112
    // Put root first on queue.
3113
0
    auto trailer = trimmed_trailer();
3114
0
    enqueue(trailer["/Root"]);
3115
3116
    // Next place any other objects referenced from the trailer dictionary into the queue, handling
3117
    // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op.
3118
0
    for (auto& item: trailer) {
3119
0
        if (!item.second.null()) {
3120
0
            enqueue(item.second);
3121
0
        }
3122
0
    }
3123
0
}
3124
3125
void
3126
impl::Writer::enqueueObjectsPCLm()
3127
0
{
3128
    // Image transform stream content for page strip images. Each of this new stream has to come
3129
    // after every page image strip written in the pclm file.
3130
0
    std::string image_transform_content = "q /image Do Q\n";
3131
3132
    // enqueue all pages first
3133
0
    for (auto& page: pages) {
3134
0
        enqueue(page);
3135
0
        enqueue(page["/Contents"]);
3136
3137
        // enqueue all the strips for each page
3138
0
        for (auto& image: Dictionary(page["/Resources"]["/XObject"])) {
3139
0
            if (!image.second.null()) {
3140
0
                enqueue(image.second);
3141
0
                enqueue(qpdf.newStream(image_transform_content));
3142
0
            }
3143
0
        }
3144
0
    }
3145
3146
0
    enqueue(trimmed_trailer()["/Root"]);
3147
0
}
3148
3149
void
3150
impl::Writer::indicateProgress(bool decrement, bool finished)
3151
355k
{
3152
355k
    if (decrement) {
3153
73.3k
        --events_seen;
3154
73.3k
        return;
3155
73.3k
    }
3156
3157
282k
    ++events_seen;
3158
3159
282k
    if (!progress_reporter.get()) {
3160
282k
        return;
3161
282k
    }
3162
3163
0
    if (finished || events_seen >= next_progress_report) {
3164
0
        int percentage =
3165
0
            (finished ? 100
3166
0
                 : next_progress_report == 0
3167
0
                 ? 0
3168
0
                 : std::min(99, 1 + ((100 * events_seen) / events_expected)));
3169
0
        progress_reporter->reportProgress(percentage);
3170
0
    }
3171
0
    int increment = std::max(1, (events_expected / 100));
3172
0
    while (events_seen >= next_progress_report) {
3173
0
        next_progress_report += increment;
3174
0
    }
3175
0
}
3176
3177
void
3178
QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr)
3179
0
{
3180
0
    m->progress_reporter = pr;
3181
0
}
3182
3183
void
3184
impl::Writer::writeStandard()
3185
0
{
3186
0
    auto pp_md5 = pipeline_stack.popper();
3187
0
    if (cfg.deterministic_id()) {
3188
0
        pipeline_stack.activate_md5(pp_md5);
3189
0
    }
3190
3191
    // Start writing
3192
3193
0
    writeHeader();
3194
0
    write(cfg.extra_header_text());
3195
3196
0
    if (cfg.pclm()) {
3197
0
        enqueueObjectsPCLm();
3198
0
    } else {
3199
0
        enqueueObjectsStandard();
3200
0
    }
3201
3202
    // Now start walking queue, outputting each object.
3203
0
    while (object_queue_front < object_queue.size()) {
3204
0
        QPDFObjectHandle cur_object = object_queue.at(object_queue_front);
3205
0
        ++object_queue_front;
3206
0
        writeObject(cur_object);
3207
0
    }
3208
3209
    // Write out the encryption dictionary, if any
3210
0
    if (encryption) {
3211
0
        writeEncryptionDictionary();
3212
0
    }
3213
3214
    // Now write out xref.  next_objid is now the number of objects.
3215
0
    qpdf_offset_t xref_offset = pipeline->getCount();
3216
0
    if (object_stream_to_objects.empty()) {
3217
        // Write regular cross-reference table
3218
0
        writeXRefTable(t_normal, 0, next_objid - 1, next_objid);
3219
0
    } else {
3220
        // Write cross-reference stream.
3221
0
        int xref_id = next_objid++;
3222
0
        writeXRefStream(xref_id, xref_id, xref_offset, t_normal, 0, next_objid - 1, next_objid);
3223
0
    }
3224
0
    write("startxref\n").write(xref_offset).write("\n%%EOF\n");
3225
3226
0
    if (cfg.deterministic_id()) {
3227
0
        QTC::TC(
3228
0
            "qpdf",
3229
0
            "QPDFWriter standard deterministic ID",
3230
0
            object_stream_to_objects.empty() ? 0 : 1);
3231
0
    }
3232
0
}