Coverage Report

Created: 2025-12-05 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDFWriter.cc
Line
Count
Source
1
#include <qpdf/qpdf-config.h> // include early for large file support
2
3
#include <qpdf/QPDFWriter_private.hh>
4
5
#include <qpdf/MD5.hh>
6
#include <qpdf/Pl_AES_PDF.hh>
7
#include <qpdf/Pl_Flate.hh>
8
#include <qpdf/Pl_MD5.hh>
9
#include <qpdf/Pl_PNGFilter.hh>
10
#include <qpdf/Pl_RC4.hh>
11
#include <qpdf/Pl_StdioFile.hh>
12
#include <qpdf/QIntC.hh>
13
#include <qpdf/QPDFObjectHandle_private.hh>
14
#include <qpdf/QPDFObject_private.hh>
15
#include <qpdf/QPDF_private.hh>
16
#include <qpdf/QTC.hh>
17
#include <qpdf/QUtil.hh>
18
#include <qpdf/RC4.hh>
19
#include <qpdf/Util.hh>
20
21
#include <algorithm>
22
#include <concepts>
23
#include <cstdlib>
24
#include <stdexcept>
25
#include <tuple>
26
27
using namespace std::literals;
28
using namespace qpdf;
29
30
using Encryption = impl::Doc::Encryption;
31
using Config = Writer::Config;
32
33
QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default)
34
0
{
35
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
36
0
}
37
38
QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) :
39
0
    handler(handler)
40
0
{
41
0
}
42
43
QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT
44
                                                                  // (modernize-use-equals-default)
45
0
{
46
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
47
0
}
48
49
void
50
QPDFWriter::FunctionProgressReporter::reportProgress(int progress)
51
0
{
52
0
    handler(progress);
53
0
}
54
55
namespace
56
{
57
    class Pl_stack
58
    {
59
        // A pipeline Popper is normally returned by Pl_stack::activate, or, if necessary, a
60
        // reference to a Popper instance can be passed into activate. When the Popper goes out of
61
        // scope, the pipeline stack is popped. This causes finish to be called on the current
62
        // pipeline and the pipeline stack to be popped until the top of stack is a previous active
63
        // top of stack and restores the pipeline to that point. It deletes any pipelines that it
64
        // pops.
65
        class Popper
66
        {
67
            friend class Pl_stack;
68
69
          public:
70
            Popper() = default;
71
            Popper(Popper const&) = delete;
72
            Popper(Popper&& other) noexcept
73
0
            {
74
0
                // For MSVC, default pops the stack
75
0
                if (this != &other) {
76
0
                    stack = other.stack;
77
0
                    stack_id = other.stack_id;
78
0
                    other.stack = nullptr;
79
0
                    other.stack_id = 0;
80
0
                };
81
0
            }
82
            Popper& operator=(Popper const&) = delete;
83
            Popper&
84
            operator=(Popper&& other) noexcept
85
0
            {
86
0
                // For MSVC, default pops the stack
87
0
                if (this != &other) {
88
0
                    stack = other.stack;
89
0
                    stack_id = other.stack_id;
90
0
                    other.stack = nullptr;
91
0
                    other.stack_id = 0;
92
0
                };
93
0
                return *this;
94
0
            }
95
96
            ~Popper();
97
98
            // Manually pop pipeline from the pipeline stack.
99
            void pop();
100
101
          private:
102
            Popper(Pl_stack& stack) :
103
45.4k
                stack(&stack)
104
45.4k
            {
105
45.4k
            }
106
107
            Pl_stack* stack{nullptr};
108
            unsigned long stack_id{0};
109
        };
110
111
      public:
112
        Pl_stack(pl::Count*& top) :
113
8.99k
            top(top)
114
8.99k
        {
115
8.99k
        }
116
117
        Popper
118
        popper()
119
8.87k
        {
120
8.87k
            return {*this};
121
8.87k
        }
122
123
        void
124
        initialize(Pipeline* p)
125
8.99k
        {
126
8.99k
            auto c = std::make_unique<pl::Count>(++last_id, p);
127
8.99k
            top = c.get();
128
8.99k
            stack.emplace_back(std::move(c));
129
8.99k
        }
130
131
        Popper
132
        activate(std::string& str)
133
35.0k
        {
134
35.0k
            Popper pp{*this};
135
35.0k
            activate(pp, str);
136
35.0k
            return pp;
137
35.0k
        }
138
139
        void
140
        activate(Popper& pp, std::string& str)
141
35.0k
        {
142
35.0k
            activate(pp, false, &str, nullptr);
143
35.0k
        }
144
145
        void
146
        activate(Popper& pp, std::unique_ptr<Pipeline> next)
147
0
        {
148
0
            count_buffer.clear();
149
0
            activate(pp, false, &count_buffer, std::move(next));
150
0
        }
151
152
        Popper
153
        activate(
154
            bool discard = false,
155
            std::string* str = nullptr,
156
            std::unique_ptr<Pipeline> next = nullptr)
157
1.57k
        {
158
1.57k
            Popper pp{*this};
159
1.57k
            activate(pp, discard, str, std::move(next));
160
1.57k
            return pp;
161
1.57k
        }
162
163
        void
164
        activate(
165
            Popper& pp,
166
            bool discard = false,
167
            std::string* str = nullptr,
168
            std::unique_ptr<Pipeline> next = nullptr)
169
36.5k
        {
170
36.5k
            std::unique_ptr<pl::Count> c;
171
36.5k
            if (next) {
172
0
                c = std::make_unique<pl::Count>(++last_id, count_buffer, std::move(next));
173
36.5k
            } else if (discard) {
174
1.57k
                c = std::make_unique<pl::Count>(++last_id, nullptr);
175
35.0k
            } else if (!str) {
176
0
                c = std::make_unique<pl::Count>(++last_id, top);
177
35.0k
            } else {
178
35.0k
                c = std::make_unique<pl::Count>(++last_id, *str);
179
35.0k
            }
180
36.5k
            pp.stack_id = last_id;
181
36.5k
            top = c.get();
182
36.5k
            stack.emplace_back(std::move(c));
183
36.5k
        }
184
        void
185
        activate_md5(Popper& pp)
186
8.87k
        {
187
8.87k
            qpdf_assert_debug(!md5_pipeline);
188
8.87k
            qpdf_assert_debug(md5_id == 0);
189
8.87k
            qpdf_assert_debug(top->getCount() == 0);
190
8.87k
            md5_pipeline = std::make_unique<Pl_MD5>("qpdf md5", top);
191
8.87k
            md5_pipeline->persistAcrossFinish(true);
192
            // Special case code in pop clears m->md5_pipeline upon deletion.
193
8.87k
            auto c = std::make_unique<pl::Count>(++last_id, md5_pipeline.get());
194
8.87k
            pp.stack_id = last_id;
195
8.87k
            md5_id = last_id;
196
8.87k
            top = c.get();
197
8.87k
            stack.emplace_back(std::move(c));
198
8.87k
        }
199
200
        // Return the hex digest and disable the MD5 pipeline.
201
        std::string
202
        hex_digest()
203
8.71k
        {
204
8.71k
            qpdf_assert_debug(md5_pipeline);
205
8.71k
            auto digest = md5_pipeline->getHexDigest();
206
8.71k
            md5_pipeline->enable(false);
207
8.71k
            return digest;
208
8.71k
        }
209
210
        void
211
        clear_buffer()
212
0
        {
213
0
            count_buffer.clear();
214
0
        }
215
216
      private:
217
        void
218
        pop(unsigned long stack_id)
219
45.4k
        {
220
45.4k
            if (!stack_id) {
221
0
                return;
222
0
            }
223
45.4k
            qpdf_assert_debug(stack.size() >= 2);
224
45.4k
            top->finish();
225
45.4k
            qpdf_assert_debug(stack.back().get() == top);
226
            // It used to be possible for this assertion to fail if writeLinearized exits by
227
            // exception when deterministic ID. There are no longer any cases in which two
228
            // dynamically allocated pipeline Popper objects ever exist at the same time, so the
229
            // assertion will fail if they get popped out of order from automatic destruction.
230
45.4k
            qpdf_assert_debug(top->id() == stack_id);
231
45.4k
            if (stack_id == md5_id) {
232
8.87k
                md5_pipeline = nullptr;
233
8.87k
                md5_id = 0;
234
8.87k
            }
235
45.4k
            stack.pop_back();
236
45.4k
            top = stack.back().get();
237
45.4k
        }
238
239
        std::vector<std::unique_ptr<pl::Count>> stack;
240
        pl::Count*& top;
241
        std::unique_ptr<Pl_MD5> md5_pipeline{nullptr};
242
        unsigned long last_id{0};
243
        unsigned long md5_id{0};
244
        std::string count_buffer;
245
    };
246
} // namespace
247
248
Pl_stack::Popper::~Popper()
249
45.4k
{
250
45.4k
    if (stack) {
251
45.4k
        stack->pop(stack_id);
252
45.4k
    }
253
45.4k
}
254
255
void
256
Pl_stack::Popper::pop()
257
0
{
258
0
    if (stack) {
259
0
        stack->pop(stack_id);
260
0
    }
261
0
    stack_id = 0;
262
0
    stack = nullptr;
263
0
}
264
265
namespace qpdf::impl
266
{
267
    // Writer class is restricted to QPDFWriter so that only it can call certain methods.
268
    class Writer: protected Doc::Common
269
    {
270
      public:
271
        // flags used by unparseObject
272
        static int const f_stream = 1 << 0;
273
        static int const f_filtered = 1 << 1;
274
        static int const f_in_ostream = 1 << 2;
275
        static int const f_hex_string = 1 << 3;
276
        static int const f_no_encryption = 1 << 4;
277
278
        enum trailer_e { t_normal, t_lin_first, t_lin_second };
279
280
        Writer() = delete;
281
        Writer(Writer const&) = delete;
282
        Writer(Writer&&) = delete;
283
        Writer& operator=(Writer const&) = delete;
284
        Writer& operator=(Writer&&) = delete;
285
        ~Writer()
286
8.99k
        {
287
8.99k
            if (file && close_file) {
288
0
                fclose(file);
289
0
            }
290
8.99k
            delete output_buffer;
291
8.99k
        }
292
        Writer(QPDF& qpdf, QPDFWriter& w) :
293
9.20k
            Common(qpdf.doc()),
294
9.20k
            lin(qpdf.doc().linearization()),
295
9.20k
            cfg(true),
296
9.20k
            root_og(qpdf.getRoot().indirect() ? qpdf.getRoot().id_gen() : QPDFObjGen(-1, 0)),
297
9.20k
            pipeline_stack(pipeline)
298
9.20k
        {
299
9.20k
        }
300
301
        void write();
302
        std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable();
303
        void setMinimumPDFVersion(std::string const& version, int extension_level = 0);
304
        void copyEncryptionParameters(QPDF&);
305
        void doWriteSetup();
306
        void prepareFileForWrite();
307
308
        void disableIncompatibleEncryption(int major, int minor, int extension_level);
309
        void interpretR3EncryptionParameters(
310
            bool allow_accessibility,
311
            bool allow_extract,
312
            bool allow_assemble,
313
            bool allow_annotate_and_form,
314
            bool allow_form_filling,
315
            bool allow_modify_other,
316
            qpdf_r3_print_e print,
317
            qpdf_r3_modify_e modify);
318
        void setEncryptionParameters(char const* user_password, char const* owner_password);
319
        void setEncryptionMinimumVersion();
320
        void parseVersion(std::string const& version, int& major, int& minor) const;
321
        int compareVersions(int major1, int minor1, int major2, int minor2) const;
322
        void generateID(bool encrypted);
323
        std::string getOriginalID1();
324
        void initializeTables(size_t extra = 0);
325
        void preserveObjectStreams();
326
        void generateObjectStreams();
327
        void initializeSpecialStreams();
328
        void enqueue(QPDFObjectHandle const& object);
329
        void enqueueObjectsStandard();
330
        void enqueueObjectsPCLm();
331
        void enqueuePart(std::vector<QPDFObjectHandle>& part);
332
        void assignCompressedObjectNumbers(QPDFObjGen og);
333
        Dictionary trimmed_trailer();
334
335
        // Returns tuple<filter, compress_stream, is_root_metadata>
336
        std::tuple<const bool, const bool, const bool>
337
        will_filter_stream(QPDFObjectHandle stream, std::string* stream_data);
338
339
        // Test whether stream would be filtered if it were written.
340
        bool will_filter_stream(QPDFObjectHandle stream);
341
        unsigned int bytesNeeded(long long n);
342
        void writeBinary(unsigned long long val, unsigned int bytes);
343
        Writer& write(std::string_view str);
344
        Writer& write(size_t count, char c);
345
        Writer& write(std::integral auto val);
346
        Writer& write_name(std::string const& str);
347
        Writer& write_string(std::string const& str, bool force_binary = false);
348
        Writer& write_encrypted(std::string_view str);
349
350
        template <typename... Args>
351
        Writer& write_qdf(Args&&... args);
352
        template <typename... Args>
353
        Writer& write_no_qdf(Args&&... args);
354
        void writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj);
355
        void writeObjectStream(QPDFObjectHandle object);
356
        void writeObject(QPDFObjectHandle object, int object_stream_index = -1);
357
        void writeTrailer(
358
            trailer_e which,
359
            int size,
360
            bool xref_stream,
361
            qpdf_offset_t prev,
362
            int linearization_pass);
363
        void unparseObject(
364
            QPDFObjectHandle object,
365
            size_t level,
366
            int flags,
367
            // for stream dictionaries
368
            size_t stream_length = 0,
369
            bool compress = false);
370
        void unparseChild(QPDFObjectHandle const& child, size_t level, int flags);
371
        int openObject(int objid = 0);
372
        void closeObject(int objid);
373
        void writeStandard();
374
        void writeLinearized();
375
        void writeEncryptionDictionary();
376
        void writeHeader();
377
        void writeHintStream(int hint_id);
378
        qpdf_offset_t writeXRefTable(trailer_e which, int first, int last, int size);
379
        qpdf_offset_t writeXRefTable(
380
            trailer_e which,
381
            int first,
382
            int last,
383
            int size,
384
            // for linearization
385
            qpdf_offset_t prev,
386
            bool suppress_offsets,
387
            int hint_id,
388
            qpdf_offset_t hint_offset,
389
            qpdf_offset_t hint_length,
390
            int linearization_pass);
391
        qpdf_offset_t writeXRefStream(
392
            int objid,
393
            int max_id,
394
            qpdf_offset_t max_offset,
395
            trailer_e which,
396
            int first,
397
            int last,
398
            int size);
399
        qpdf_offset_t writeXRefStream(
400
            int objid,
401
            int max_id,
402
            qpdf_offset_t max_offset,
403
            trailer_e which,
404
            int first,
405
            int last,
406
            int size,
407
            // for linearization
408
            qpdf_offset_t prev,
409
            int hint_id,
410
            qpdf_offset_t hint_offset,
411
            qpdf_offset_t hint_length,
412
            bool skip_compression,
413
            int linearization_pass);
414
415
        void setDataKey(int objid);
416
        void indicateProgress(bool decrement, bool finished);
417
        size_t calculateXrefStreamPadding(qpdf_offset_t xref_bytes);
418
419
        void adjustAESStreamLength(size_t& length);
420
        void computeDeterministicIDData();
421
422
      protected:
423
        Doc::Linearization& lin;
424
425
        qpdf::Writer::Config cfg;
426
427
        QPDFObjGen root_og{-1, 0};
428
        char const* filename{"unspecified"};
429
        FILE* file{nullptr};
430
        bool close_file{false};
431
        std::unique_ptr<Pl_Buffer> buffer_pipeline{nullptr};
432
        Buffer* output_buffer{nullptr};
433
434
        std::unique_ptr<QPDF::Doc::Encryption> encryption;
435
        std::string encryption_key;
436
437
        std::string id1; // for /ID key of
438
        std::string id2; // trailer dictionary
439
        std::string final_pdf_version;
440
        int final_extension_level{0};
441
        std::string min_pdf_version;
442
        int min_extension_level{0};
443
        int encryption_dict_objid{0};
444
        std::string cur_data_key;
445
        std::unique_ptr<Pipeline> file_pl;
446
        qpdf::pl::Count* pipeline{nullptr};
447
        std::vector<QPDFObjectHandle> object_queue;
448
        size_t object_queue_front{0};
449
        QPDFWriter::ObjTable obj;
450
        QPDFWriter::NewObjTable new_obj;
451
        int next_objid{1};
452
        int cur_stream_length_id{0};
453
        size_t cur_stream_length{0};
454
        bool added_newline{false};
455
        size_t max_ostream_index{0};
456
        std::set<QPDFObjGen> normalized_streams;
457
        std::map<QPDFObjGen, int> page_object_to_seq;
458
        std::map<QPDFObjGen, int> contents_to_page_seq;
459
        std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects;
460
        Pl_stack pipeline_stack;
461
        std::string deterministic_id_data;
462
        bool did_write_setup{false};
463
464
        // For progress reporting
465
        std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter;
466
        int events_expected{0};
467
        int events_seen{0};
468
        int next_progress_report{0};
469
    }; // class qpdf::impl::Writer
470
471
} // namespace qpdf::impl
472
473
class QPDFWriter::Members: impl::Writer
474
{
475
    friend class QPDFWriter;
476
    friend class qpdf::Writer;
477
478
  public:
479
    Members(QPDFWriter& w, QPDF& qpdf) :
480
9.20k
        impl::Writer(qpdf, w)
481
9.20k
    {
482
9.20k
    }
483
};
484
485
qpdf::Writer::Writer(QPDF& qpdf, Config cfg) :
486
0
    QPDFWriter(qpdf)
487
0
{
488
0
    m->cfg = cfg;
489
0
}
490
QPDFWriter::QPDFWriter(QPDF& pdf) :
491
9.20k
    m(std::make_shared<Members>(*this, pdf))
492
9.20k
{
493
9.20k
}
494
495
QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) :
496
0
    m(std::make_shared<Members>(*this, pdf))
497
0
{
498
0
    setOutputFilename(filename);
499
0
}
500
501
QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) :
502
0
    m(std::make_shared<Members>(*this, pdf))
503
0
{
504
0
    setOutputFile(description, file, close_file);
505
0
}
506
507
void
508
QPDFWriter::setOutputFilename(char const* filename)
509
0
{
510
0
    char const* description = filename;
511
0
    FILE* f = nullptr;
512
0
    bool close_file = false;
513
0
    if (filename == nullptr) {
514
0
        description = "standard output";
515
0
        f = stdout;
516
0
        QUtil::binary_stdout();
517
0
    } else {
518
0
        f = QUtil::safe_fopen(filename, "wb+");
519
0
        close_file = true;
520
0
    }
521
0
    setOutputFile(description, f, close_file);
522
0
}
523
524
void
525
QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file)
526
0
{
527
0
    m->filename = description;
528
0
    m->file = file;
529
0
    m->close_file = close_file;
530
0
    m->file_pl = std::make_unique<Pl_StdioFile>("qpdf output", file);
531
0
    m->pipeline_stack.initialize(m->file_pl.get());
532
0
}
533
534
void
535
QPDFWriter::setOutputMemory()
536
0
{
537
0
    m->filename = "memory buffer";
538
0
    m->buffer_pipeline = std::make_unique<Pl_Buffer>("qpdf output");
539
0
    m->pipeline_stack.initialize(m->buffer_pipeline.get());
540
0
}
541
542
Buffer*
543
QPDFWriter::getBuffer()
544
0
{
545
0
    Buffer* result = m->output_buffer;
546
0
    m->output_buffer = nullptr;
547
0
    return result;
548
0
}
549
550
std::shared_ptr<Buffer>
551
QPDFWriter::getBufferSharedPointer()
552
0
{
553
0
    return std::shared_ptr<Buffer>(getBuffer());
554
0
}
555
556
void
557
QPDFWriter::setOutputPipeline(Pipeline* p)
558
8.99k
{
559
8.99k
    m->filename = "custom pipeline";
560
8.99k
    m->pipeline_stack.initialize(p);
561
8.99k
}
562
563
void
564
QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode)
565
0
{
566
0
    m->cfg.object_streams(mode);
567
0
}
568
569
void
570
QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode)
571
0
{
572
0
    m->cfg.stream_data(mode);
573
0
}
574
575
Config&
576
Config::stream_data(qpdf_stream_data_e mode)
577
0
{
578
0
    switch (mode) {
579
0
    case qpdf_s_uncompress:
580
0
        decode_level(std::max(qpdf_dl_generalized, decode_level_));
581
0
        compress_streams(false);
582
0
        return *this;
583
584
0
    case qpdf_s_preserve:
585
0
        decode_level(qpdf_dl_none);
586
0
        compress_streams(false);
587
0
        return *this;
588
589
0
    case qpdf_s_compress:
590
0
        decode_level(std::max(qpdf_dl_generalized, decode_level_));
591
0
        compress_streams(true);
592
0
    }
593
0
    return *this;
594
0
}
595
596
void
597
QPDFWriter::setCompressStreams(bool val)
598
0
{
599
0
    m->cfg.compress_streams(val);
600
0
}
601
602
Config&
603
Config::compress_streams(bool val)
604
8.99k
{
605
8.99k
    if (pclm_) {
606
0
        usage("compress_streams cannot be set when pclm is set");
607
0
        return *this;
608
0
    }
609
8.99k
    compress_streams_set_ = true;
610
8.99k
    compress_streams_ = val;
611
8.99k
    return *this;
612
8.99k
}
613
614
void
615
QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
616
8.99k
{
617
8.99k
    m->cfg.decode_level(val);
618
8.99k
}
619
620
Config&
621
Config::decode_level(qpdf_stream_decode_level_e val)
622
8.99k
{
623
8.99k
    if (pclm_) {
624
0
        usage("stream_decode_level cannot be set when pclm is set");
625
0
        return *this;
626
0
    }
627
8.99k
    decode_level_set_ = true;
628
8.99k
    decode_level_ = val;
629
8.99k
    return *this;
630
8.99k
}
631
632
void
633
QPDFWriter::setRecompressFlate(bool val)
634
0
{
635
0
    m->cfg.recompress_flate(val);
636
0
}
637
638
void
639
QPDFWriter::setContentNormalization(bool val)
640
0
{
641
0
    m->cfg.normalize_content(val);
642
0
}
643
644
void
645
QPDFWriter::setQDFMode(bool val)
646
8.99k
{
647
8.99k
    m->cfg.qdf(val);
648
8.99k
}
649
650
Config&
651
Config::qdf(bool val)
652
8.99k
{
653
8.99k
    if (pclm_ || linearize_) {
654
0
        usage("qdf cannot be set when linearize or pclm are set");
655
0
    }
656
8.99k
    if (preserve_encryption_) {
657
8.99k
        usage("preserve_encryption cannot be set when qdf is set");
658
8.99k
    }
659
8.99k
    qdf_ = val;
660
8.99k
    if (val) {
661
8.99k
        if (!normalize_content_set_) {
662
8.99k
            normalize_content(true);
663
8.99k
        }
664
8.99k
        if (!compress_streams_set_) {
665
8.99k
            compress_streams(false);
666
8.99k
        }
667
8.99k
        if (!decode_level_set_) {
668
0
            decode_level(qpdf_dl_generalized);
669
0
        }
670
8.99k
        preserve_encryption_ = false;
671
        // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing
672
        // recomputed stream length data. Certain streams such as object streams, xref streams, and
673
        // hint streams always get direct stream lengths.
674
8.99k
        direct_stream_lengths_ = false;
675
8.99k
    }
676
8.99k
    return *this;
677
8.99k
}
678
679
void
680
QPDFWriter::setPreserveUnreferencedObjects(bool val)
681
0
{
682
0
    m->cfg.preserve_unreferenced(val);
683
0
}
684
685
void
686
QPDFWriter::setNewlineBeforeEndstream(bool val)
687
0
{
688
0
    m->cfg.newline_before_endstream(val);
689
0
}
690
691
void
692
QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level)
693
0
{
694
0
    m->setMinimumPDFVersion(version, extension_level);
695
0
}
696
697
void
698
impl::Writer::setMinimumPDFVersion(std::string const& version, int extension_level)
699
9.27k
{
700
9.27k
    bool set_version = false;
701
9.27k
    bool set_extension_level = false;
702
9.27k
    if (min_pdf_version.empty()) {
703
8.93k
        set_version = true;
704
8.93k
        set_extension_level = true;
705
8.93k
    } else {
706
348
        int old_major = 0;
707
348
        int old_minor = 0;
708
348
        int min_major = 0;
709
348
        int min_minor = 0;
710
348
        parseVersion(version, old_major, old_minor);
711
348
        parseVersion(min_pdf_version, min_major, min_minor);
712
348
        int compare = compareVersions(old_major, old_minor, min_major, min_minor);
713
348
        if (compare > 0) {
714
122
            QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1);
715
122
            set_version = true;
716
122
            set_extension_level = true;
717
226
        } else if (compare == 0) {
718
3
            if (extension_level > min_extension_level) {
719
1
                set_extension_level = true;
720
1
            }
721
3
        }
722
348
    }
723
724
9.27k
    if (set_version) {
725
9.05k
        min_pdf_version = version;
726
9.05k
    }
727
9.27k
    if (set_extension_level) {
728
9.05k
        min_extension_level = extension_level;
729
9.05k
    }
730
9.27k
}
731
732
void
733
QPDFWriter::setMinimumPDFVersion(PDFVersion const& v)
734
0
{
735
0
    std::string version;
736
0
    int extension_level;
737
0
    v.getVersion(version, extension_level);
738
0
    setMinimumPDFVersion(version, extension_level);
739
0
}
740
741
void
742
QPDFWriter::forcePDFVersion(std::string const& version, int extension_level)
743
0
{
744
0
    m->cfg.forced_pdf_version(version, extension_level);
745
0
}
746
747
void
748
QPDFWriter::setExtraHeaderText(std::string const& text)
749
0
{
750
0
    m->cfg.extra_header_text(text);
751
0
}
752
753
Config&
754
Config::extra_header_text(std::string const& val)
755
0
{
756
0
    extra_header_text_ = val;
757
0
    if (!extra_header_text_.empty() && extra_header_text_.back() != '\n') {
758
0
        extra_header_text_ += "\n";
759
0
    } else {
760
0
        QTC::TC("qpdf", "QPDFWriter extra header text no newline");
761
0
    }
762
0
    return *this;
763
0
}
764
765
void
766
QPDFWriter::setStaticID(bool val)
767
0
{
768
0
    m->cfg.static_id(val);
769
0
}
770
771
void
772
QPDFWriter::setDeterministicID(bool val)
773
8.99k
{
774
8.99k
    m->cfg.deterministic_id(val);
775
8.99k
}
776
777
void
778
QPDFWriter::setStaticAesIV(bool val)
779
0
{
780
0
    if (val) {
781
0
        Pl_AES_PDF::useStaticIV();
782
0
    }
783
0
}
784
785
void
786
QPDFWriter::setSuppressOriginalObjectIDs(bool val)
787
0
{
788
0
    m->cfg.no_original_object_ids(val);
789
0
}
790
791
void
792
QPDFWriter::setPreserveEncryption(bool val)
793
0
{
794
0
    m->cfg.preserve_encryption(val);
795
0
}
796
797
void
798
QPDFWriter::setLinearization(bool val)
799
0
{
800
0
    m->cfg.linearize(val);
801
0
}
802
803
Config&
804
Config::linearize(bool val)
805
0
{
806
0
    if (pclm_ || qdf_) {
807
0
        usage("linearize cannot be set when qdf or pclm are set");
808
0
        return *this;
809
0
    }
810
0
    linearize_ = val;
811
0
    return *this;
812
0
}
813
814
void
815
QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
816
0
{
817
0
    m->cfg.linearize_pass1(filename);
818
0
}
819
820
void
821
QPDFWriter::setPCLm(bool val)
822
0
{
823
0
    m->cfg.pclm(val);
824
0
}
825
826
Config&
827
Config::pclm(bool val)
828
0
{
829
0
    if (decode_level_set_ || compress_streams_set_ || linearize_) {
830
0
        usage(
831
0
            "pclm cannot be set when stream_decode_level, compress_streams, linearize or qdf are "
832
0
            "set");
833
0
        return *this;
834
0
    }
835
0
    pclm_ = val;
836
0
    if (val) {
837
0
        decode_level_ = qpdf_dl_none;
838
0
        compress_streams_ = false;
839
0
        linearize_ = false;
840
0
    }
841
842
0
    return *this;
843
0
}
844
845
void
846
QPDFWriter::setR2EncryptionParametersInsecure(
847
    char const* user_password,
848
    char const* owner_password,
849
    bool allow_print,
850
    bool allow_modify,
851
    bool allow_extract,
852
    bool allow_annotate)
853
0
{
854
0
    m->encryption = std::make_unique<Encryption>(1, 2, 5, true);
855
0
    if (!allow_print) {
856
0
        m->encryption->setP(3, false);
857
0
    }
858
0
    if (!allow_modify) {
859
0
        m->encryption->setP(4, false);
860
0
    }
861
0
    if (!allow_extract) {
862
0
        m->encryption->setP(5, false);
863
0
    }
864
0
    if (!allow_annotate) {
865
0
        m->encryption->setP(6, false);
866
0
    }
867
0
    m->setEncryptionParameters(user_password, owner_password);
868
0
}
869
870
void
871
QPDFWriter::setR3EncryptionParametersInsecure(
872
    char const* user_password,
873
    char const* owner_password,
874
    bool allow_accessibility,
875
    bool allow_extract,
876
    bool allow_assemble,
877
    bool allow_annotate_and_form,
878
    bool allow_form_filling,
879
    bool allow_modify_other,
880
    qpdf_r3_print_e print)
881
0
{
882
0
    m->encryption = std::make_unique<Encryption>(2, 3, 16, true);
883
0
    m->interpretR3EncryptionParameters(
884
0
        allow_accessibility,
885
0
        allow_extract,
886
0
        allow_assemble,
887
0
        allow_annotate_and_form,
888
0
        allow_form_filling,
889
0
        allow_modify_other,
890
0
        print,
891
0
        qpdf_r3m_all);
892
0
    m->setEncryptionParameters(user_password, owner_password);
893
0
}
894
895
void
896
QPDFWriter::setR4EncryptionParametersInsecure(
897
    char const* user_password,
898
    char const* owner_password,
899
    bool allow_accessibility,
900
    bool allow_extract,
901
    bool allow_assemble,
902
    bool allow_annotate_and_form,
903
    bool allow_form_filling,
904
    bool allow_modify_other,
905
    qpdf_r3_print_e print,
906
    bool encrypt_metadata,
907
    bool use_aes)
908
0
{
909
0
    m->encryption = std::make_unique<Encryption>(4, 4, 16, encrypt_metadata);
910
0
    m->cfg.encrypt_use_aes(use_aes);
911
0
    m->interpretR3EncryptionParameters(
912
0
        allow_accessibility,
913
0
        allow_extract,
914
0
        allow_assemble,
915
0
        allow_annotate_and_form,
916
0
        allow_form_filling,
917
0
        allow_modify_other,
918
0
        print,
919
0
        qpdf_r3m_all);
920
0
    m->setEncryptionParameters(user_password, owner_password);
921
0
}
922
923
void
924
QPDFWriter::setR5EncryptionParameters(
925
    char const* user_password,
926
    char const* owner_password,
927
    bool allow_accessibility,
928
    bool allow_extract,
929
    bool allow_assemble,
930
    bool allow_annotate_and_form,
931
    bool allow_form_filling,
932
    bool allow_modify_other,
933
    qpdf_r3_print_e print,
934
    bool encrypt_metadata)
935
0
{
936
0
    m->encryption = std::make_unique<Encryption>(5, 5, 32, encrypt_metadata);
937
0
    m->cfg.encrypt_use_aes(true);
938
0
    m->interpretR3EncryptionParameters(
939
0
        allow_accessibility,
940
0
        allow_extract,
941
0
        allow_assemble,
942
0
        allow_annotate_and_form,
943
0
        allow_form_filling,
944
0
        allow_modify_other,
945
0
        print,
946
0
        qpdf_r3m_all);
947
0
    m->setEncryptionParameters(user_password, owner_password);
948
0
}
949
950
void
951
QPDFWriter::setR6EncryptionParameters(
952
    char const* user_password,
953
    char const* owner_password,
954
    bool allow_accessibility,
955
    bool allow_extract,
956
    bool allow_assemble,
957
    bool allow_annotate_and_form,
958
    bool allow_form_filling,
959
    bool allow_modify_other,
960
    qpdf_r3_print_e print,
961
    bool encrypt_metadata)
962
0
{
963
0
    m->encryption = std::make_unique<Encryption>(5, 6, 32, encrypt_metadata);
964
0
    m->interpretR3EncryptionParameters(
965
0
        allow_accessibility,
966
0
        allow_extract,
967
0
        allow_assemble,
968
0
        allow_annotate_and_form,
969
0
        allow_form_filling,
970
0
        allow_modify_other,
971
0
        print,
972
0
        qpdf_r3m_all);
973
0
    m->cfg.encrypt_use_aes(true);
974
0
    m->setEncryptionParameters(user_password, owner_password);
975
0
}
976
977
void
978
impl::Writer::interpretR3EncryptionParameters(
979
    bool allow_accessibility,
980
    bool allow_extract,
981
    bool allow_assemble,
982
    bool allow_annotate_and_form,
983
    bool allow_form_filling,
984
    bool allow_modify_other,
985
    qpdf_r3_print_e print,
986
    qpdf_r3_modify_e modify)
987
0
{
988
    // Acrobat 5 security options:
989
990
    // Checkboxes:
991
    //   Enable Content Access for the Visually Impaired
992
    //   Allow Content Copying and Extraction
993
994
    // Allowed changes menu:
995
    //   None
996
    //   Only Document Assembly
997
    //   Only Form Field Fill-in or Signing
998
    //   Comment Authoring, Form Field Fill-in or Signing
999
    //   General Editing, Comment and Form Field Authoring
1000
1001
    // Allowed printing menu:
1002
    //   None
1003
    //   Low Resolution
1004
    //   Full printing
1005
1006
    // Meanings of bits in P when R >= 3
1007
    //
1008
    //  3: low-resolution printing
1009
    //  4: document modification except as controlled by 6, 9, and 11
1010
    //  5: extraction
1011
    //  6: add/modify annotations (comment), fill in forms
1012
    //     if 4+6 are set, also allows modification of form fields
1013
    //  9: fill in forms even if 6 is clear
1014
    // 10: accessibility; ignored by readers, should always be set
1015
    // 11: document assembly even if 4 is clear
1016
    // 12: high-resolution printing
1017
0
    if (!allow_accessibility && encryption->getR() <= 3) {
1018
        // Bit 10 is deprecated and should always be set.  This used to mean accessibility.  There
1019
        // is no way to disable accessibility with R > 3.
1020
0
        encryption->setP(10, false);
1021
0
    }
1022
0
    if (!allow_extract) {
1023
0
        encryption->setP(5, false);
1024
0
    }
1025
1026
0
    switch (print) {
1027
0
    case qpdf_r3p_none:
1028
0
        encryption->setP(3, false); // any printing
1029
0
        [[fallthrough]];
1030
0
    case qpdf_r3p_low:
1031
0
        encryption->setP(12, false); // high resolution printing
1032
0
        [[fallthrough]];
1033
0
    case qpdf_r3p_full:
1034
0
        break;
1035
        // no default so gcc warns for missing cases
1036
0
    }
1037
1038
    // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full
1039
    // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're
1040
    // stuck with it. See also allow checks below to control the bits individually.
1041
1042
    // NOT EXERCISED IN TEST SUITE
1043
0
    switch (modify) {
1044
0
    case qpdf_r3m_none:
1045
0
        encryption->setP(11, false); // document assembly
1046
0
        [[fallthrough]];
1047
0
    case qpdf_r3m_assembly:
1048
0
        encryption->setP(9, false); // filling in form fields
1049
0
        [[fallthrough]];
1050
0
    case qpdf_r3m_form:
1051
0
        encryption->setP(6, false); // modify annotations, fill in form fields
1052
0
        [[fallthrough]];
1053
0
    case qpdf_r3m_annotate:
1054
0
        encryption->setP(4, false); // other modifications
1055
0
        [[fallthrough]];
1056
0
    case qpdf_r3m_all:
1057
0
        break;
1058
        // no default so gcc warns for missing cases
1059
0
    }
1060
    // END NOT EXERCISED IN TEST SUITE
1061
1062
0
    if (!allow_assemble) {
1063
0
        encryption->setP(11, false);
1064
0
    }
1065
0
    if (!allow_annotate_and_form) {
1066
0
        encryption->setP(6, false);
1067
0
    }
1068
0
    if (!allow_form_filling) {
1069
0
        encryption->setP(9, false);
1070
0
    }
1071
0
    if (!allow_modify_other) {
1072
0
        encryption->setP(4, false);
1073
0
    }
1074
0
}
1075
1076
void
1077
impl::Writer::setEncryptionParameters(char const* user_password, char const* owner_password)
1078
0
{
1079
0
    generateID(true);
1080
0
    encryption->setId1(id1);
1081
0
    encryption_key = encryption->compute_parameters(user_password, owner_password);
1082
0
    setEncryptionMinimumVersion();
1083
0
}
1084
1085
void
1086
QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
1087
0
{
1088
0
    m->copyEncryptionParameters(qpdf);
1089
0
}
1090
1091
void
1092
impl::Writer::copyEncryptionParameters(QPDF& qpdf)
1093
0
{
1094
0
    cfg.preserve_encryption(false);
1095
0
    QPDFObjectHandle trailer = qpdf.getTrailer();
1096
0
    if (trailer.hasKey("/Encrypt")) {
1097
0
        generateID(true);
1098
0
        id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue();
1099
0
        QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
1100
0
        int V = encrypt.getKey("/V").getIntValueAsInt();
1101
0
        int key_len = 5;
1102
0
        if (V > 1) {
1103
0
            key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8;
1104
0
        }
1105
0
        const bool encrypt_metadata =
1106
0
            encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool()
1107
0
            ? encrypt.getKey("/EncryptMetadata").getBoolValue()
1108
0
            : true;
1109
0
        if (V >= 4) {
1110
            // When copying encryption parameters, use AES even if the original file did not.
1111
            // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of
1112
            // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF
1113
            // all potentially having different values.
1114
0
            cfg.encrypt_use_aes(true);
1115
0
        }
1116
0
        QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", encrypt_metadata ? 0 : 1);
1117
0
        QTC::TC("qpdf", "QPDFWriter copy use_aes", cfg.encrypt_use_aes() ? 0 : 1);
1118
1119
0
        encryption = std::make_unique<Encryption>(
1120
0
            V,
1121
0
            encrypt.getKey("/R").getIntValueAsInt(),
1122
0
            key_len,
1123
0
            static_cast<int>(encrypt.getKey("/P").getIntValue()),
1124
0
            encrypt.getKey("/O").getStringValue(),
1125
0
            encrypt.getKey("/U").getStringValue(),
1126
0
            V < 5 ? "" : encrypt.getKey("/OE").getStringValue(),
1127
0
            V < 5 ? "" : encrypt.getKey("/UE").getStringValue(),
1128
0
            V < 5 ? "" : encrypt.getKey("/Perms").getStringValue(),
1129
0
            id1, // id1 == the other file's id1
1130
0
            encrypt_metadata);
1131
0
        encryption_key = V >= 5 ? qpdf.getEncryptionKey()
1132
0
                                : encryption->compute_encryption_key(qpdf.getPaddedUserPassword());
1133
0
        setEncryptionMinimumVersion();
1134
0
    }
1135
0
}
1136
1137
void
1138
impl::Writer::disableIncompatibleEncryption(int major, int minor, int extension_level)
1139
0
{
1140
0
    if (!encryption) {
1141
0
        return;
1142
0
    }
1143
0
    if (compareVersions(major, minor, 1, 3) < 0) {
1144
0
        encryption = nullptr;
1145
0
        return;
1146
0
    }
1147
0
    int V = encryption->getV();
1148
0
    int R = encryption->getR();
1149
0
    if (compareVersions(major, minor, 1, 4) < 0) {
1150
0
        if (V > 1 || R > 2) {
1151
0
            encryption = nullptr;
1152
0
        }
1153
0
    } else if (compareVersions(major, minor, 1, 5) < 0) {
1154
0
        if (V > 2 || R > 3) {
1155
0
            encryption = nullptr;
1156
0
        }
1157
0
    } else if (compareVersions(major, minor, 1, 6) < 0) {
1158
0
        if (cfg.encrypt_use_aes()) {
1159
0
            encryption = nullptr;
1160
0
        }
1161
0
    } else if (
1162
0
        (compareVersions(major, minor, 1, 7) < 0) ||
1163
0
        ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) {
1164
0
        if (V >= 5 || R >= 5) {
1165
0
            encryption = nullptr;
1166
0
        }
1167
0
    }
1168
1169
0
    if (!encryption) {
1170
0
        QTC::TC("qpdf", "QPDFWriter forced version disabled encryption");
1171
0
    }
1172
0
}
1173
1174
void
1175
impl::Writer::parseVersion(std::string const& version, int& major, int& minor) const
1176
692
{
1177
692
    major = QUtil::string_to_int(version.c_str());
1178
692
    minor = 0;
1179
692
    size_t p = version.find('.');
1180
692
    if ((p != std::string::npos) && (version.length() > p)) {
1181
691
        minor = QUtil::string_to_int(version.substr(p + 1).c_str());
1182
691
    }
1183
692
    std::string tmp = std::to_string(major) + "." + std::to_string(minor);
1184
692
    if (tmp != version) {
1185
        // The version number in the input is probably invalid. This happens with some files that
1186
        // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately
1187
        // QPDFWriter doesn't have a way to give a warning, so we just ignore this case.
1188
19
    }
1189
692
}
1190
1191
int
1192
impl::Writer::compareVersions(int major1, int minor1, int major2, int minor2) const
1193
344
{
1194
344
    if (major1 < major2) {
1195
9
        return -1;
1196
9
    }
1197
335
    if (major1 > major2) {
1198
24
        return 1;
1199
24
    }
1200
311
    if (minor1 < minor2) {
1201
210
        return -1;
1202
210
    }
1203
101
    return minor1 > minor2 ? 1 : 0;
1204
311
}
1205
1206
void
1207
impl::Writer::setEncryptionMinimumVersion()
1208
0
{
1209
0
    auto const R = encryption->getR();
1210
0
    if (R >= 6) {
1211
0
        setMinimumPDFVersion("1.7", 8);
1212
0
    } else if (R == 5) {
1213
0
        setMinimumPDFVersion("1.7", 3);
1214
0
    } else if (R == 4) {
1215
0
        setMinimumPDFVersion(cfg.encrypt_use_aes() ? "1.6" : "1.5");
1216
0
    } else if (R == 3) {
1217
0
        setMinimumPDFVersion("1.4");
1218
0
    } else {
1219
0
        setMinimumPDFVersion("1.3");
1220
0
    }
1221
0
}
1222
1223
void
1224
impl::Writer::setDataKey(int objid)
1225
71.8k
{
1226
71.8k
    if (encryption) {
1227
0
        cur_data_key = QPDF::compute_data_key(
1228
0
            encryption_key,
1229
0
            objid,
1230
0
            0,
1231
0
            cfg.encrypt_use_aes(),
1232
0
            encryption->getV(),
1233
0
            encryption->getR());
1234
0
    }
1235
71.8k
}
1236
1237
unsigned int
1238
impl::Writer::bytesNeeded(long long n)
1239
861
{
1240
861
    unsigned int bytes = 0;
1241
1.87k
    while (n) {
1242
1.01k
        ++bytes;
1243
1.01k
        n >>= 8;
1244
1.01k
    }
1245
861
    return bytes;
1246
861
}
1247
1248
void
1249
impl::Writer::writeBinary(unsigned long long val, unsigned int bytes)
1250
69.9k
{
1251
69.9k
    if (bytes > sizeof(unsigned long long)) {
1252
0
        throw std::logic_error("QPDFWriter::writeBinary called with too many bytes");
1253
0
    }
1254
69.9k
    unsigned char data[sizeof(unsigned long long)];
1255
165k
    for (unsigned int i = 0; i < bytes; ++i) {
1256
95.4k
        data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff);
1257
95.4k
        val >>= 8;
1258
95.4k
    }
1259
69.9k
    pipeline->write(data, bytes);
1260
69.9k
}
1261
1262
impl::Writer&
1263
impl::Writer::write(std::string_view str)
1264
2.91M
{
1265
2.91M
    pipeline->write(str);
1266
2.91M
    return *this;
1267
2.91M
}
1268
1269
impl::Writer&
1270
impl::Writer::write(std::integral auto val)
1271
365k
{
1272
365k
    pipeline->write(std::to_string(val));
1273
365k
    return *this;
1274
365k
}
_ZN4qpdf4impl6Writer5writeITkNSt3__18integralEiEERS1_T_
Line
Count
Source
1271
303k
{
1272
303k
    pipeline->write(std::to_string(val));
1273
303k
    return *this;
1274
303k
}
_ZN4qpdf4impl6Writer5writeITkNSt3__18integralExEERS1_T_
Line
Count
Source
1271
37.5k
{
1272
37.5k
    pipeline->write(std::to_string(val));
1273
37.5k
    return *this;
1274
37.5k
}
_ZN4qpdf4impl6Writer5writeITkNSt3__18integralEmEERS1_T_
Line
Count
Source
1271
24.6k
{
1272
24.6k
    pipeline->write(std::to_string(val));
1273
24.6k
    return *this;
1274
24.6k
}
_ZN4qpdf4impl6Writer5writeITkNSt3__18integralEjEERS1_T_
Line
Count
Source
1271
574
{
1272
574
    pipeline->write(std::to_string(val));
1273
574
    return *this;
1274
574
}
1275
1276
impl::Writer&
1277
impl::Writer::write(size_t count, char c)
1278
0
{
1279
0
    pipeline->write(count, c);
1280
0
    return *this;
1281
0
}
1282
1283
impl::Writer&
1284
impl::Writer::write_name(std::string const& str)
1285
282k
{
1286
282k
    pipeline->write(Name::normalize(str));
1287
282k
    return *this;
1288
282k
}
1289
1290
impl::Writer&
1291
impl::Writer::write_string(std::string const& str, bool force_binary)
1292
16.9k
{
1293
16.9k
    pipeline->write(QPDF_String(str).unparse(force_binary));
1294
16.9k
    return *this;
1295
16.9k
}
1296
1297
template <typename... Args>
1298
impl::Writer&
1299
impl::Writer::write_qdf(Args&&... args)
1300
188k
{
1301
188k
    if (cfg.qdf()) {
1302
188k
        pipeline->write(std::forward<Args>(args)...);
1303
188k
    }
1304
188k
    return *this;
1305
188k
}
qpdf::impl::Writer& qpdf::impl::Writer::write_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1300
149k
{
1301
149k
    if (cfg.qdf()) {
1302
149k
        pipeline->write(std::forward<Args>(args)...);
1303
149k
    }
1304
149k
    return *this;
1305
149k
}
qpdf::impl::Writer& qpdf::impl::Writer::write_qdf<char const (&) [3]>(char const (&) [3])
Line
Count
Source
1300
20.7k
{
1301
20.7k
    if (cfg.qdf()) {
1302
20.7k
        pipeline->write(std::forward<Args>(args)...);
1303
20.7k
    }
1304
20.7k
    return *this;
1305
20.7k
}
qpdf::impl::Writer& qpdf::impl::Writer::write_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1300
8.46k
{
1301
8.46k
    if (cfg.qdf()) {
1302
8.46k
        pipeline->write(std::forward<Args>(args)...);
1303
8.46k
    }
1304
8.46k
    return *this;
1305
8.46k
}
qpdf::impl::Writer& qpdf::impl::Writer::write_qdf<char const (&) [11]>(char const (&) [11])
Line
Count
Source
1300
8.87k
{
1301
8.87k
    if (cfg.qdf()) {
1302
8.87k
        pipeline->write(std::forward<Args>(args)...);
1303
8.87k
    }
1304
8.87k
    return *this;
1305
8.87k
}
1306
1307
template <typename... Args>
1308
impl::Writer&
1309
impl::Writer::write_no_qdf(Args&&... args)
1310
47.8k
{
1311
47.8k
    if (!cfg.qdf()) {
1312
0
        pipeline->write(std::forward<Args>(args)...);
1313
0
    }
1314
47.8k
    return *this;
1315
47.8k
}
qpdf::impl::Writer& qpdf::impl::Writer::write_no_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1310
39.3k
{
1311
39.3k
    if (!cfg.qdf()) {
1312
0
        pipeline->write(std::forward<Args>(args)...);
1313
0
    }
1314
39.3k
    return *this;
1315
39.3k
}
qpdf::impl::Writer& qpdf::impl::Writer::write_no_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1310
8.46k
{
1311
8.46k
    if (!cfg.qdf()) {
1312
0
        pipeline->write(std::forward<Args>(args)...);
1313
0
    }
1314
8.46k
    return *this;
1315
8.46k
}
1316
1317
void
1318
impl::Writer::adjustAESStreamLength(size_t& length)
1319
22.7k
{
1320
22.7k
    if (encryption && !cur_data_key.empty() && cfg.encrypt_use_aes()) {
1321
        // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16.  It will
1322
        // also be prepended by 16 bits of random data.
1323
0
        length += 32 - (length & 0xf);
1324
0
    }
1325
22.7k
}
1326
1327
impl::Writer&
1328
impl::Writer::write_encrypted(std::string_view str)
1329
22.6k
{
1330
22.6k
    if (!(encryption && !cur_data_key.empty())) {
1331
22.6k
        write(str);
1332
22.6k
    } else if (cfg.encrypt_use_aes()) {
1333
0
        write(pl::pipe<Pl_AES_PDF>(str, true, cur_data_key));
1334
0
    } else {
1335
0
        write(pl::pipe<Pl_RC4>(str, cur_data_key));
1336
0
    }
1337
1338
22.6k
    return *this;
1339
22.6k
}
1340
1341
void
1342
impl::Writer::computeDeterministicIDData()
1343
8.71k
{
1344
8.71k
    if (!id2.empty()) {
1345
        // Can't happen in the code
1346
0
        throw std::logic_error(
1347
0
            "Deterministic ID computation enabled after ID generation has already occurred.");
1348
0
    }
1349
8.71k
    qpdf_assert_debug(deterministic_id_data.empty());
1350
8.71k
    deterministic_id_data = pipeline_stack.hex_digest();
1351
8.71k
}
1352
1353
int
1354
impl::Writer::openObject(int objid)
1355
93.3k
{
1356
93.3k
    if (objid == 0) {
1357
0
        objid = next_objid++;
1358
0
    }
1359
93.3k
    new_obj[objid].xref = QPDFXRefEntry(pipeline->getCount());
1360
93.3k
    write(objid).write(" 0 obj\n");
1361
93.3k
    return objid;
1362
93.3k
}
1363
1364
void
1365
impl::Writer::closeObject(int objid)
1366
93.1k
{
1367
    // Write a newline before endobj as it makes the file easier to repair.
1368
93.1k
    write("\nendobj\n").write_qdf("\n");
1369
93.1k
    auto& no = new_obj[objid];
1370
93.1k
    no.length = pipeline->getCount() - no.xref.getOffset();
1371
93.1k
}
1372
1373
void
1374
impl::Writer::assignCompressedObjectNumbers(QPDFObjGen og)
1375
1.59k
{
1376
1.59k
    int objid = og.getObj();
1377
1.59k
    if (og.getGen() != 0 || !object_stream_to_objects.contains(objid)) {
1378
        // This is not an object stream.
1379
0
        return;
1380
0
    }
1381
1382
    // Reserve numbers for the objects that belong to this object stream.
1383
13.7k
    for (auto const& iter: object_stream_to_objects[objid]) {
1384
13.7k
        obj[iter].renumber = next_objid++;
1385
13.7k
    }
1386
1.59k
}
1387
1388
void
1389
impl::Writer::enqueue(QPDFObjectHandle const& object)
1390
2.65M
{
1391
2.65M
    if (object.indirect()) {
1392
207k
        util::assertion(
1393
            // This owner check can only be done for indirect objects. It is possible for a direct
1394
            // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle
1395
            // from one file was insert into another file without copying. Doing that is safe even
1396
            // if the original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from
1397
            // its owner.
1398
207k
            object.qpdf() == &qpdf,
1399
207k
            "QPDFObjectHandle from different QPDF found while writing.  "
1400
207k
            "Use QPDF::copyForeignObject to add objects from another file." //
1401
207k
        );
1402
1403
207k
        if (cfg.qdf() && object.isStreamOfType("/XRef")) {
1404
            // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so
1405
            // will confuse fix-qdf, which expects to see only one XRef stream at the end of the
1406
            // file. This case can occur when creating a QDF from a file with object streams when
1407
            // preserving unreferenced objects since the old cross reference streams are not
1408
            // actually referenced by object number.
1409
681
            return;
1410
681
        }
1411
1412
206k
        QPDFObjGen og = object.getObjGen();
1413
206k
        auto& o = obj[og];
1414
1415
206k
        if (o.renumber == 0) {
1416
74.2k
            if (o.object_stream > 0) {
1417
                // This is in an object stream.  Don't process it here.  Instead, enqueue the object
1418
                // stream.  Object streams always have generation 0.
1419
                // Detect loops by storing invalid object ID -1, which will get overwritten later.
1420
2.08k
                o.renumber = -1;
1421
2.08k
                enqueue(qpdf.getObject(o.object_stream, 0));
1422
72.1k
            } else {
1423
72.1k
                object_queue.emplace_back(object);
1424
72.1k
                o.renumber = next_objid++;
1425
1426
72.1k
                if (og.getGen() == 0 && object_stream_to_objects.contains(og.getObj())) {
1427
                    // For linearized files, uncompressed objects go at end, and we take care of
1428
                    // assigning numbers to them elsewhere.
1429
1.59k
                    if (!cfg.linearize()) {
1430
1.59k
                        assignCompressedObjectNumbers(og);
1431
1.59k
                    }
1432
70.5k
                } else if (!cfg.direct_stream_lengths() && object.isStream()) {
1433
                    // reserve next object ID for length
1434
21.3k
                    ++next_objid;
1435
21.3k
                }
1436
72.1k
            }
1437
74.2k
        }
1438
206k
        return;
1439
207k
    }
1440
1441
2.44M
    if (cfg.linearize()) {
1442
0
        return;
1443
0
    }
1444
1445
2.44M
    if (Array array = object) {
1446
1.73M
        for (auto& item: array) {
1447
1.73M
            enqueue(item);
1448
1.73M
        }
1449
1.10M
        return;
1450
1.10M
    }
1451
1452
1.33M
    for (auto const& item: Dictionary(object)) {
1453
261k
        if (!item.second.null()) {
1454
245k
            enqueue(item.second);
1455
245k
        }
1456
261k
    }
1457
1.33M
}
1458
1459
void
1460
impl::Writer::unparseChild(QPDFObjectHandle const& child, size_t level, int flags)
1461
646k
{
1462
646k
    if (!cfg.linearize()) {
1463
646k
        enqueue(child);
1464
646k
    }
1465
646k
    if (child.indirect()) {
1466
113k
        write(obj[child].renumber).write(" 0 R");
1467
533k
    } else {
1468
533k
        unparseObject(child, level, flags);
1469
533k
    }
1470
646k
}
1471
1472
void
1473
impl::Writer::writeTrailer(
1474
    trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass)
1475
8.71k
{
1476
8.71k
    auto trailer = trimmed_trailer();
1477
8.71k
    if (xref_stream) {
1478
287
        cur_data_key.clear();
1479
8.42k
    } else {
1480
8.42k
        write("trailer <<");
1481
8.42k
    }
1482
8.71k
    write_qdf("\n");
1483
8.71k
    if (which == t_lin_second) {
1484
0
        write(" /Size ").write(size);
1485
8.71k
    } else {
1486
16.5k
        for (auto const& [key, value]: trailer) {
1487
16.5k
            if (value.null()) {
1488
3.11k
                continue;
1489
3.11k
            }
1490
13.4k
            write_qdf("  ").write_no_qdf(" ").write_name(key).write(" ");
1491
13.4k
            if (key == "/Size") {
1492
1.40k
                write(size);
1493
1.40k
                if (which == t_lin_first) {
1494
0
                    write(" /Prev ");
1495
0
                    qpdf_offset_t pos = pipeline->getCount();
1496
0
                    write(prev).write(QIntC::to_size(pos - pipeline->getCount() + 21), ' ');
1497
0
                }
1498
12.0k
            } else {
1499
12.0k
                unparseChild(value, 1, 0);
1500
12.0k
            }
1501
13.4k
            write_qdf("\n");
1502
13.4k
        }
1503
8.71k
    }
1504
1505
    // Write ID
1506
8.71k
    write_qdf(" ").write(" /ID [");
1507
8.71k
    if (linearization_pass == 1) {
1508
0
        std::string original_id1 = getOriginalID1();
1509
0
        if (original_id1.empty()) {
1510
0
            write("<00000000000000000000000000000000>");
1511
0
        } else {
1512
            // Write a string of zeroes equal in length to the representation of the original ID.
1513
            // While writing the original ID would have the same number of bytes, it would cause a
1514
            // change to the deterministic ID generated by older versions of the software that
1515
            // hard-coded the length of the ID to 16 bytes.
1516
0
            size_t len = QPDF_String(original_id1).unparse(true).length() - 2;
1517
0
            write("<").write(len, '0').write(">");
1518
0
        }
1519
0
        write("<00000000000000000000000000000000>");
1520
8.71k
    } else {
1521
8.71k
        if (linearization_pass == 0 && cfg.deterministic_id()) {
1522
8.71k
            computeDeterministicIDData();
1523
8.71k
        }
1524
8.71k
        generateID(encryption.get());
1525
8.71k
        write_string(id1, true).write_string(id2, true);
1526
8.71k
    }
1527
8.71k
    write("]");
1528
1529
8.71k
    if (which != t_lin_second) {
1530
        // Write reference to encryption dictionary
1531
8.46k
        if (encryption) {
1532
0
            write(" /Encrypt ").write(encryption_dict_objid).write(" 0 R");
1533
0
        }
1534
8.46k
    }
1535
1536
8.71k
    write_qdf("\n>>").write_no_qdf(" >>");
1537
8.71k
}
1538
1539
bool
1540
impl::Writer::will_filter_stream(QPDFObjectHandle stream)
1541
0
{
1542
0
    std::string s;
1543
0
    [[maybe_unused]] auto [filter, ignore1, ignore2] = will_filter_stream(stream, &s);
1544
0
    return filter;
1545
0
}
1546
1547
std::tuple<const bool, const bool, const bool>
1548
impl::Writer::will_filter_stream(QPDFObjectHandle stream, std::string* stream_data)
1549
21.2k
{
1550
21.2k
    const bool is_root_metadata = stream.isRootMetadata();
1551
21.2k
    bool filter = false;
1552
21.2k
    auto decode_level = cfg.decode_level();
1553
21.2k
    int encode_flags = 0;
1554
21.2k
    Dictionary stream_dict = stream.getDict();
1555
1556
21.2k
    if (stream.getFilterOnWrite()) {
1557
21.2k
        filter = stream.isDataModified() || cfg.compress_streams() || decode_level != qpdf_dl_none;
1558
21.2k
        if (cfg.compress_streams()) {
1559
            // Don't filter if the stream is already compressed with FlateDecode. This way we don't
1560
            // make it worse if the original file used a better Flate algorithm, and we don't spend
1561
            // time and CPU cycles uncompressing and recompressing stuff. This can be overridden
1562
            // with setRecompressFlate(true).
1563
0
            Name Filter = stream_dict["/Filter"];
1564
0
            if (Filter && !cfg.recompress_flate() && !stream.isDataModified() &&
1565
0
                (Filter == "/FlateDecode" || Filter == "/Fl")) {
1566
0
                filter = false;
1567
0
            }
1568
0
        }
1569
21.2k
        if (is_root_metadata && (!encryption || !encryption->getEncryptMetadata())) {
1570
69
            filter = true;
1571
69
            decode_level = qpdf_dl_all;
1572
21.1k
        } else if (cfg.normalize_content() && normalized_streams.contains(stream)) {
1573
2.91k
            encode_flags = qpdf_ef_normalize;
1574
2.91k
            filter = true;
1575
18.2k
        } else if (filter && cfg.compress_streams()) {
1576
0
            encode_flags = qpdf_ef_compress;
1577
0
        }
1578
21.2k
    }
1579
1580
    // Disable compression for empty streams to improve compatibility
1581
21.2k
    if (Integer(stream_dict["/Length"]) == 0) {
1582
82
        filter = true;
1583
82
        encode_flags = 0;
1584
82
    }
1585
1586
31.5k
    for (bool first_attempt: {true, false}) {
1587
31.5k
        auto pp_stream_data =
1588
31.5k
            stream_data ? pipeline_stack.activate(*stream_data) : pipeline_stack.activate(true);
1589
1590
31.5k
        try {
1591
31.5k
            if (stream.pipeStreamData(
1592
31.5k
                    pipeline,
1593
31.5k
                    filter ? encode_flags : 0,
1594
31.5k
                    filter ? decode_level : qpdf_dl_none,
1595
31.5k
                    false,
1596
31.5k
                    first_attempt)) {
1597
10.8k
                return {true, encode_flags & qpdf_ef_compress, is_root_metadata};
1598
10.8k
            }
1599
20.7k
            if (!filter) {
1600
10.3k
                break;
1601
10.3k
            }
1602
20.7k
        } catch (std::runtime_error& e) {
1603
29
            if (!(filter && first_attempt)) {
1604
5
                throw std::runtime_error(
1605
5
                    "error while getting stream data for " + stream.unparse() + ": " + e.what());
1606
5
            }
1607
24
            stream.warn("error while getting stream data: "s + e.what());
1608
24
            stream.warn("qpdf will attempt to write the damaged stream unchanged");
1609
24
        }
1610
        // Try again
1611
10.3k
        filter = false;
1612
10.3k
        stream.setFilterOnWrite(false);
1613
10.3k
        if (stream_data) {
1614
10.3k
            stream_data->clear();
1615
10.3k
        }
1616
10.3k
    }
1617
10.3k
    return {false, false, is_root_metadata};
1618
21.2k
}
1619
1620
void
1621
impl::Writer::unparseObject(
1622
    QPDFObjectHandle object, size_t level, int flags, size_t stream_length, bool compress)
1623
638k
{
1624
638k
    QPDFObjGen old_og = object.getObjGen();
1625
638k
    int child_flags = flags & ~f_stream;
1626
    // For non-qdf, "indent" and "indent_large" are a single space between tokens. For qdf, they
1627
    // include the preceding newline.
1628
638k
    std::string indent_large = " ";
1629
638k
    if (cfg.qdf()) {
1630
638k
        indent_large.append(2 * (level + 1), ' ');
1631
638k
        indent_large[0] = '\n';
1632
638k
    }
1633
638k
    std::string_view indent{indent_large.data(), cfg.qdf() ? indent_large.size() - 2 : 1};
1634
1635
638k
    if (auto const tc = object.getTypeCode(); tc == ::ot_array) {
1636
        // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the
1637
        // [ in the /H key of the linearization parameter dictionary.  We'll do this unconditionally
1638
        // for all arrays because it looks nicer and doesn't make the files that much bigger.
1639
56.9k
        write("[");
1640
365k
        for (auto const& item: object.as_array()) {
1641
365k
            write(indent_large);
1642
365k
            unparseChild(item, level + 1, child_flags);
1643
365k
        }
1644
56.9k
        write(indent).write("]");
1645
581k
    } else if (tc == ::ot_dictionary) {
1646
        // Handle special cases for specific dictionaries.
1647
1648
91.7k
        if (old_og == root_og) {
1649
            // Extensions dictionaries.
1650
1651
            // We have one of several cases:
1652
            //
1653
            // * We need ADBE
1654
            //    - We already have Extensions
1655
            //       - If it has the right ADBE, preserve it
1656
            //       - Otherwise, replace ADBE
1657
            //    - We don't have Extensions: create one from scratch
1658
            // * We don't want ADBE
1659
            //    - We already have Extensions
1660
            //       - If it only has ADBE, remove it
1661
            //       - If it has other things, keep those and remove ADBE
1662
            //    - We have no extensions: no action required
1663
            //
1664
            // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE
1665
            // dictionary, so we can modify in place.
1666
1667
8.58k
            auto extensions = object.getKey("/Extensions");
1668
8.58k
            const bool has_extensions = extensions.isDictionary();
1669
8.58k
            const bool need_extensions_adbe = final_extension_level > 0;
1670
1671
8.58k
            if (has_extensions || need_extensions_adbe) {
1672
                // Make a shallow copy of this object so we can modify it safely without affecting
1673
                // the original. This code has logic to skip certain keys in agreement with
1674
                // prepareFileForWrite and with skip_stream_parameters so that replacing them
1675
                // doesn't leave unreferenced objects in the output. We can use unsafeShallowCopy
1676
                // here because all we are doing is removing or replacing top-level keys.
1677
140
                object = object.unsafeShallowCopy();
1678
140
                if (!has_extensions) {
1679
0
                    extensions = QPDFObjectHandle();
1680
0
                }
1681
1682
140
                const bool have_extensions_adbe = extensions && extensions.hasKey("/ADBE");
1683
140
                const bool have_extensions_other =
1684
140
                    extensions && extensions.getKeys().size() > (have_extensions_adbe ? 1u : 0u);
1685
1686
140
                if (need_extensions_adbe) {
1687
35
                    if (!(have_extensions_other || have_extensions_adbe)) {
1688
                        // We need Extensions and don't have it.  Create it here.
1689
0
                        QTC::TC("qpdf", "QPDFWriter create Extensions", cfg.qdf() ? 0 : 1);
1690
0
                        extensions = object.replaceKeyAndGetNew(
1691
0
                            "/Extensions", QPDFObjectHandle::newDictionary());
1692
0
                    }
1693
105
                } else if (!have_extensions_other) {
1694
                    // We have Extensions dictionary and don't want one.
1695
10
                    if (have_extensions_adbe) {
1696
8
                        QTC::TC("qpdf", "QPDFWriter remove existing Extensions");
1697
8
                        object.removeKey("/Extensions");
1698
8
                        extensions = QPDFObjectHandle(); // uninitialized
1699
8
                    }
1700
10
                }
1701
1702
140
                if (extensions) {
1703
132
                    QTC::TC("qpdf", "QPDFWriter preserve Extensions");
1704
132
                    QPDFObjectHandle adbe = extensions.getKey("/ADBE");
1705
132
                    if (adbe.isDictionary() &&
1706
37
                        adbe.getKey("/BaseVersion").isNameAndEquals("/" + final_pdf_version) &&
1707
21
                        adbe.getKey("/ExtensionLevel").isInteger() &&
1708
20
                        (adbe.getKey("/ExtensionLevel").getIntValue() == final_extension_level)) {
1709
127
                    } else {
1710
127
                        if (need_extensions_adbe) {
1711
30
                            extensions.replaceKey(
1712
30
                                "/ADBE",
1713
30
                                QPDFObjectHandle::parse(
1714
30
                                    "<< /BaseVersion /" + final_pdf_version + " /ExtensionLevel " +
1715
30
                                    std::to_string(final_extension_level) + " >>"));
1716
97
                        } else {
1717
97
                            extensions.removeKey("/ADBE");
1718
97
                        }
1719
127
                    }
1720
132
                }
1721
140
            }
1722
8.58k
        }
1723
1724
        // Stream dictionaries.
1725
1726
91.7k
        if (flags & f_stream) {
1727
            // Suppress /Length since we will write it manually
1728
1729
            // Make a shallow copy of this object so we can modify it safely without affecting the
1730
            // original. This code has logic to skip certain keys in agreement with
1731
            // prepareFileForWrite and with skip_stream_parameters so that replacing them doesn't
1732
            // leave unreferenced objects in the output. We can use unsafeShallowCopy here because
1733
            // all we are doing is removing or replacing top-level keys.
1734
21.2k
            object = object.unsafeShallowCopy();
1735
1736
21.2k
            object.removeKey("/Length");
1737
1738
            // If /DecodeParms is an empty list, remove it.
1739
21.2k
            if (object.getKey("/DecodeParms").empty()) {
1740
20.5k
                object.removeKey("/DecodeParms");
1741
20.5k
            }
1742
1743
21.2k
            if (flags & f_filtered) {
1744
                // We will supply our own filter and decode parameters.
1745
10.8k
                object.removeKey("/Filter");
1746
10.8k
                object.removeKey("/DecodeParms");
1747
10.8k
            } else {
1748
                // Make sure, no matter what else we have, that we don't have /Crypt in the output
1749
                // filters.
1750
10.3k
                QPDFObjectHandle filter = object.getKey("/Filter");
1751
10.3k
                QPDFObjectHandle decode_parms = object.getKey("/DecodeParms");
1752
10.3k
                if (filter.isOrHasName("/Crypt")) {
1753
197
                    if (filter.isName()) {
1754
25
                        object.removeKey("/Filter");
1755
25
                        object.removeKey("/DecodeParms");
1756
172
                    } else {
1757
172
                        int idx = 0;
1758
1.24k
                        for (auto const& item: filter.as_array()) {
1759
1.24k
                            if (item.isNameAndEquals("/Crypt")) {
1760
                                // If filter is an array, then the code in QPDF_Stream has already
1761
                                // verified that DecodeParms and Filters are arrays of the same
1762
                                // length, but if they weren't for some reason, eraseItem does type
1763
                                // and bounds checking. Fuzzing tells us that this can actually
1764
                                // happen.
1765
172
                                filter.eraseItem(idx);
1766
172
                                decode_parms.eraseItem(idx);
1767
172
                                break;
1768
172
                            }
1769
1.06k
                            ++idx;
1770
1.06k
                        }
1771
172
                    }
1772
197
                }
1773
10.3k
            }
1774
21.2k
        }
1775
1776
91.7k
        write("<<");
1777
1778
310k
        for (auto const& [key, value]: object.as_dictionary()) {
1779
310k
            if (!value.null()) {
1780
268k
                write(indent_large).write_name(key).write(" ");
1781
268k
                if (key == "/Contents" && object.isDictionaryOfType("/Sig") &&
1782
104
                    object.hasKey("/ByteRange")) {
1783
102
                    QTC::TC("qpdf", "QPDFWriter no encryption sig contents");
1784
102
                    unparseChild(value, level + 1, child_flags | f_hex_string | f_no_encryption);
1785
268k
                } else {
1786
268k
                    unparseChild(value, level + 1, child_flags);
1787
268k
                }
1788
268k
            }
1789
310k
        }
1790
1791
91.7k
        if (flags & f_stream) {
1792
21.1k
            write(indent_large).write("/Length ");
1793
1794
21.1k
            if (cfg.direct_stream_lengths()) {
1795
0
                write(stream_length);
1796
21.1k
            } else {
1797
21.1k
                write(cur_stream_length_id).write(" 0 R");
1798
21.1k
            }
1799
21.1k
            if (compress && (flags & f_filtered)) {
1800
0
                write(indent_large).write("/Filter /FlateDecode");
1801
0
            }
1802
21.1k
        }
1803
1804
91.7k
        write(indent).write(">>");
1805
490k
    } else if (tc == ::ot_stream) {
1806
        // Write stream data to a buffer.
1807
21.2k
        if (!cfg.direct_stream_lengths()) {
1808
21.2k
            cur_stream_length_id = obj[old_og].renumber + 1;
1809
21.2k
        }
1810
1811
21.2k
        flags |= f_stream;
1812
21.2k
        std::string stream_data;
1813
21.2k
        auto [filter, compress_stream, is_root_metadata] = will_filter_stream(object, &stream_data);
1814
21.2k
        if (filter) {
1815
10.8k
            flags |= f_filtered;
1816
10.8k
        }
1817
21.2k
        QPDFObjectHandle stream_dict = object.getDict();
1818
1819
21.2k
        cur_stream_length = stream_data.size();
1820
21.2k
        if (is_root_metadata && encryption && !encryption->getEncryptMetadata()) {
1821
            // Don't encrypt stream data for the metadata stream
1822
0
            cur_data_key.clear();
1823
0
        }
1824
21.2k
        adjustAESStreamLength(cur_stream_length);
1825
21.2k
        unparseObject(stream_dict, 0, flags, cur_stream_length, compress_stream);
1826
21.2k
        char last_char = stream_data.empty() ? '\0' : stream_data.back();
1827
21.2k
        write("\nstream\n").write_encrypted(stream_data);
1828
21.2k
        added_newline = cfg.newline_before_endstream() || (cfg.qdf() && last_char != '\n');
1829
21.2k
        write(added_newline ? "\nendstream" : "endstream");
1830
468k
    } else if (tc == ::ot_string) {
1831
25.9k
        std::string val;
1832
25.9k
        if (encryption && !(flags & f_in_ostream) && !(flags & f_no_encryption) &&
1833
0
            !cur_data_key.empty()) {
1834
0
            val = object.getStringValue();
1835
0
            if (cfg.encrypt_use_aes()) {
1836
0
                Pl_Buffer bufpl("encrypted string");
1837
0
                Pl_AES_PDF pl("aes encrypt string", &bufpl, true, cur_data_key);
1838
0
                pl.writeString(val);
1839
0
                pl.finish();
1840
0
                val = QPDF_String(bufpl.getString()).unparse(true);
1841
0
            } else {
1842
0
                auto tmp_ph = QUtil::make_unique_cstr(val);
1843
0
                char* tmp = tmp_ph.get();
1844
0
                size_t vlen = val.length();
1845
0
                RC4 rc4(
1846
0
                    QUtil::unsigned_char_pointer(cur_data_key),
1847
0
                    QIntC::to_int(cur_data_key.length()));
1848
0
                auto data = QUtil::unsigned_char_pointer(tmp);
1849
0
                rc4.process(data, vlen, data);
1850
0
                val = QPDF_String(std::string(tmp, vlen)).unparse();
1851
0
            }
1852
25.9k
        } else if (flags & f_hex_string) {
1853
102
            val = QPDF_String(object.getStringValue()).unparse(true);
1854
25.8k
        } else {
1855
25.8k
            val = object.unparseResolved();
1856
25.8k
        }
1857
25.9k
        write(val);
1858
442k
    } else {
1859
442k
        write(object.unparseResolved());
1860
442k
    }
1861
638k
}
1862
1863
void
1864
impl::Writer::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj)
1865
3.15k
{
1866
3.15k
    qpdf_assert_debug(first_obj > 0);
1867
3.15k
    bool is_first = true;
1868
3.15k
    auto id = std::to_string(first_obj) + ' ';
1869
27.5k
    for (auto& offset: offsets) {
1870
27.5k
        if (is_first) {
1871
3.15k
            is_first = false;
1872
24.3k
        } else {
1873
24.3k
            write_qdf("\n").write_no_qdf(" ");
1874
24.3k
        }
1875
27.5k
        write(id);
1876
27.5k
        util::increment(id, 1);
1877
27.5k
        write(offset);
1878
27.5k
    }
1879
3.15k
    write("\n");
1880
3.15k
}
1881
1882
void
1883
impl::Writer::writeObjectStream(QPDFObjectHandle object)
1884
1.57k
{
1885
    // Note: object might be null if this is a place-holder for an object stream that we are
1886
    // generating from scratch.
1887
1888
1.57k
    QPDFObjGen old_og = object.getObjGen();
1889
1.57k
    qpdf_assert_debug(old_og.getGen() == 0);
1890
1.57k
    int old_id = old_og.getObj();
1891
1.57k
    int new_stream_id = obj[old_og].renumber;
1892
1893
1.57k
    std::vector<qpdf_offset_t> offsets;
1894
1.57k
    qpdf_offset_t first = 0;
1895
1896
    // Generate stream itself.  We have to do this in two passes so we can calculate offsets in the
1897
    // first pass.
1898
1.57k
    std::string stream_buffer_pass1;
1899
1.57k
    std::string stream_buffer_pass2;
1900
1.57k
    int first_obj = -1;
1901
1.57k
    const bool compressed = cfg.compress_streams() && !cfg.qdf();
1902
1.57k
    {
1903
        // Pass 1
1904
1.57k
        auto pp_ostream_pass1 = pipeline_stack.activate(stream_buffer_pass1);
1905
1906
1.57k
        int count = -1;
1907
13.7k
        for (auto const& og: object_stream_to_objects[old_id]) {
1908
13.7k
            ++count;
1909
13.7k
            int new_o = obj[og].renumber;
1910
13.7k
            if (first_obj == -1) {
1911
1.57k
                first_obj = new_o;
1912
1.57k
            }
1913
13.7k
            if (cfg.qdf()) {
1914
13.7k
                write("%% Object stream: object ").write(new_o).write(", index ").write(count);
1915
13.7k
                if (!cfg.no_original_object_ids()) {
1916
13.7k
                    write("; original object ID: ").write(og.getObj());
1917
                    // For compatibility, only write the generation if non-zero.  While object
1918
                    // streams only allow objects with generation 0, if we are generating object
1919
                    // streams, the old object could have a non-zero generation.
1920
13.7k
                    if (og.getGen() != 0) {
1921
0
                        write(" ").write(og.getGen());
1922
0
                    }
1923
13.7k
                }
1924
13.7k
                write("\n");
1925
13.7k
            }
1926
1927
13.7k
            offsets.push_back(pipeline->getCount());
1928
            // To avoid double-counting objects being written in object streams for progress
1929
            // reporting, decrement in pass 1.
1930
13.7k
            indicateProgress(true, false);
1931
1932
13.7k
            QPDFObjectHandle obj_to_write = qpdf.getObject(og);
1933
13.7k
            if (obj_to_write.isStream()) {
1934
                // This condition occurred in a fuzz input. Ideally we should block it at parse
1935
                // time, but it's not clear to me how to construct a case for this.
1936
0
                obj_to_write.warn("stream found inside object stream; treating as null");
1937
0
                obj_to_write = QPDFObjectHandle::newNull();
1938
0
            }
1939
13.7k
            writeObject(obj_to_write, count);
1940
1941
13.7k
            new_obj[new_o].xref = QPDFXRefEntry(new_stream_id, count);
1942
13.7k
        }
1943
1.57k
    }
1944
1.57k
    {
1945
        // Adjust offsets to skip over comment before first object
1946
1.57k
        first = offsets.at(0);
1947
13.7k
        for (auto& iter: offsets) {
1948
13.7k
            iter -= first;
1949
13.7k
        }
1950
1951
        // Take one pass at writing pairs of numbers so we can get their size information
1952
1.57k
        {
1953
1.57k
            auto pp_discard = pipeline_stack.activate(true);
1954
1.57k
            writeObjectStreamOffsets(offsets, first_obj);
1955
1.57k
            first += pipeline->getCount();
1956
1.57k
        }
1957
1958
        // Set up a stream to write the stream data into a buffer.
1959
1.57k
        auto pp_ostream = pipeline_stack.activate(stream_buffer_pass2);
1960
1961
1.57k
        writeObjectStreamOffsets(offsets, first_obj);
1962
1.57k
        write(stream_buffer_pass1);
1963
1.57k
        stream_buffer_pass1.clear();
1964
1.57k
        stream_buffer_pass1.shrink_to_fit();
1965
1.57k
        if (compressed) {
1966
0
            stream_buffer_pass2 = pl::pipe<Pl_Flate>(stream_buffer_pass2, Pl_Flate::a_deflate);
1967
0
        }
1968
1.57k
    }
1969
1970
    // Write the object
1971
1.57k
    openObject(new_stream_id);
1972
1.57k
    setDataKey(new_stream_id);
1973
1.57k
    write("<<").write_qdf("\n ").write(" /Type /ObjStm").write_qdf("\n ");
1974
1.57k
    size_t length = stream_buffer_pass2.size();
1975
1.57k
    adjustAESStreamLength(length);
1976
1.57k
    write(" /Length ").write(length).write_qdf("\n ");
1977
1.57k
    if (compressed) {
1978
0
        write(" /Filter /FlateDecode");
1979
0
    }
1980
1.57k
    write(" /N ").write(offsets.size()).write_qdf("\n ").write(" /First ").write(first);
1981
1.57k
    if (!object.null()) {
1982
        // If the original object has an /Extends key, preserve it.
1983
384
        QPDFObjectHandle dict = object.getDict();
1984
384
        QPDFObjectHandle extends = dict.getKey("/Extends");
1985
384
        if (extends.isIndirect()) {
1986
76
            write_qdf("\n ").write(" /Extends ");
1987
76
            unparseChild(extends, 1, f_in_ostream);
1988
76
        }
1989
384
    }
1990
1.57k
    write_qdf("\n").write_no_qdf(" ").write(">>\nstream\n").write_encrypted(stream_buffer_pass2);
1991
1.57k
    write(cfg.newline_before_endstream() ? "\nendstream" : "endstream");
1992
1.57k
    if (encryption) {
1993
0
        cur_data_key.clear();
1994
0
    }
1995
1.57k
    closeObject(new_stream_id);
1996
1.57k
}
1997
1998
void
1999
impl::Writer::writeObject(QPDFObjectHandle object, int object_stream_index)
2000
85.6k
{
2001
85.6k
    QPDFObjGen old_og = object.getObjGen();
2002
2003
85.6k
    if (object_stream_index == -1 && old_og.getGen() == 0 &&
2004
70.9k
        object_stream_to_objects.contains(old_og.getObj())) {
2005
1.57k
        writeObjectStream(object);
2006
1.57k
        return;
2007
1.57k
    }
2008
2009
84.0k
    indicateProgress(false, false);
2010
84.0k
    auto new_id = obj[old_og].renumber;
2011
84.0k
    if (cfg.qdf()) {
2012
84.0k
        if (page_object_to_seq.contains(old_og)) {
2013
10.4k
            write("%% Page ").write(page_object_to_seq[old_og]).write("\n");
2014
10.4k
        }
2015
84.0k
        if (contents_to_page_seq.contains(old_og)) {
2016
5.75k
            write("%% Contents for page ").write(contents_to_page_seq[old_og]).write("\n");
2017
5.75k
        }
2018
84.0k
    }
2019
84.0k
    if (object_stream_index == -1) {
2020
70.2k
        if (cfg.qdf() && !cfg.no_original_object_ids()) {
2021
70.2k
            write("%% Original object ID: ").write(object.getObjGen().unparse(' ')).write("\n");
2022
70.2k
        }
2023
70.2k
        openObject(new_id);
2024
70.2k
        setDataKey(new_id);
2025
70.2k
        unparseObject(object, 0, 0);
2026
70.2k
        cur_data_key.clear();
2027
70.2k
        closeObject(new_id);
2028
70.2k
    } else {
2029
13.7k
        unparseObject(object, 0, f_in_ostream);
2030
13.7k
        write("\n");
2031
13.7k
    }
2032
2033
84.0k
    if (!cfg.direct_stream_lengths() && object.isStream()) {
2034
21.1k
        if (cfg.qdf()) {
2035
21.1k
            if (added_newline) {
2036
12.0k
                write("%QDF: ignore_newline\n");
2037
12.0k
            }
2038
21.1k
        }
2039
21.1k
        openObject(new_id + 1);
2040
21.1k
        write(cur_stream_length);
2041
21.1k
        closeObject(new_id + 1);
2042
21.1k
    }
2043
84.0k
}
2044
2045
std::string
2046
impl::Writer::getOriginalID1()
2047
8.71k
{
2048
8.71k
    QPDFObjectHandle trailer = qpdf.getTrailer();
2049
8.71k
    if (trailer.hasKey("/ID")) {
2050
933
        return trailer.getKey("/ID").getArrayItem(0).getStringValue();
2051
7.78k
    } else {
2052
7.78k
        return "";
2053
7.78k
    }
2054
8.71k
}
2055
2056
void
2057
impl::Writer::generateID(bool encrypted)
2058
8.71k
{
2059
    // Generate the ID lazily so that we can handle the user's preference to use static or
2060
    // deterministic ID generation.
2061
2062
8.71k
    if (!id2.empty()) {
2063
0
        return;
2064
0
    }
2065
2066
8.71k
    QPDFObjectHandle trailer = qpdf.getTrailer();
2067
2068
8.71k
    std::string result;
2069
2070
8.71k
    if (cfg.static_id()) {
2071
        // For test suite use only...
2072
0
        static unsigned char tmp[] = {
2073
0
            0x31,
2074
0
            0x41,
2075
0
            0x59,
2076
0
            0x26,
2077
0
            0x53,
2078
0
            0x58,
2079
0
            0x97,
2080
0
            0x93,
2081
0
            0x23,
2082
0
            0x84,
2083
0
            0x62,
2084
0
            0x64,
2085
0
            0x33,
2086
0
            0x83,
2087
0
            0x27,
2088
0
            0x95,
2089
0
            0x00};
2090
0
        result = reinterpret_cast<char*>(tmp);
2091
8.71k
    } else {
2092
        // The PDF specification has guidelines for creating IDs, but it states clearly that the
2093
        // only thing that's really important is that it is very likely to be unique.  We can't
2094
        // really follow the guidelines in the spec exactly because we haven't written the file yet.
2095
        // This scheme should be fine though.  The deterministic ID case uses a digest of a
2096
        // sufficient portion of the file's contents such no two non-matching files would match in
2097
        // the subsets used for this computation.  Note that we explicitly omit the filename from
2098
        // the digest calculation for deterministic ID so that the same file converted with qpdf, in
2099
        // that case, would have the same ID regardless of the output file's name.
2100
2101
8.71k
        std::string seed;
2102
8.71k
        if (cfg.deterministic_id()) {
2103
8.71k
            if (encrypted) {
2104
0
                throw std::runtime_error(
2105
0
                    "QPDFWriter: unable to generated a deterministic ID because the file to be "
2106
0
                    "written is encrypted (even though the file may not require a password)");
2107
0
            }
2108
8.71k
            if (deterministic_id_data.empty()) {
2109
0
                throw std::logic_error(
2110
0
                    "INTERNAL ERROR: QPDFWriter::generateID has no data for deterministic ID");
2111
0
            }
2112
8.71k
            seed += deterministic_id_data;
2113
8.71k
        } else {
2114
0
            seed += std::to_string(QUtil::get_current_time());
2115
0
            seed += filename;
2116
0
            seed += " ";
2117
0
        }
2118
8.71k
        seed += " QPDF ";
2119
8.71k
        if (trailer.hasKey("/Info")) {
2120
1.38k
            for (auto const& item: trailer.getKey("/Info").as_dictionary()) {
2121
1.38k
                if (item.second.isString()) {
2122
600
                    seed += " ";
2123
600
                    seed += item.second.getStringValue();
2124
600
                }
2125
1.38k
            }
2126
311
        }
2127
2128
8.71k
        MD5 md5;
2129
8.71k
        md5.encodeString(seed.c_str());
2130
8.71k
        MD5::Digest digest;
2131
8.71k
        md5.digest(digest);
2132
8.71k
        result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest));
2133
8.71k
    }
2134
2135
    // If /ID already exists, follow the spec: use the original first word and generate a new second
2136
    // word.  Otherwise, we'll use the generated ID for both.
2137
2138
8.71k
    id2 = result;
2139
    // Note: keep /ID from old file even if --static-id was given.
2140
8.71k
    id1 = getOriginalID1();
2141
8.71k
    if (id1.empty()) {
2142
7.83k
        id1 = id2;
2143
7.83k
    }
2144
8.71k
}
2145
2146
void
2147
impl::Writer::initializeSpecialStreams()
2148
8.99k
{
2149
    // Mark all page content streams in case we are filtering or normalizing.
2150
8.99k
    int num = 0;
2151
10.7k
    for (auto& page: pages) {
2152
10.7k
        page_object_to_seq[page.getObjGen()] = ++num;
2153
10.7k
        QPDFObjectHandle contents = page.getKey("/Contents");
2154
10.7k
        std::vector<QPDFObjGen> contents_objects;
2155
10.7k
        if (contents.isArray()) {
2156
393
            int n = static_cast<int>(contents.size());
2157
33.8k
            for (int i = 0; i < n; ++i) {
2158
33.4k
                contents_objects.push_back(contents.getArrayItem(i).getObjGen());
2159
33.4k
            }
2160
10.3k
        } else if (contents.isStream()) {
2161
1.98k
            contents_objects.push_back(contents.getObjGen());
2162
1.98k
        }
2163
2164
35.4k
        for (auto const& c: contents_objects) {
2165
35.4k
            contents_to_page_seq[c] = num;
2166
35.4k
            normalized_streams.insert(c);
2167
35.4k
        }
2168
10.7k
    }
2169
8.99k
}
2170
2171
void
2172
impl::Writer::preserveObjectStreams()
2173
8.97k
{
2174
8.97k
    auto const& xref = objects.xref_table();
2175
    // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
2176
    // streams out of old objects that have generation numbers greater than zero. However in an
2177
    // existing PDF, all object stream objects and all objects in them must have generation 0
2178
    // because the PDF spec does not provide any way to do otherwise. This code filters out objects
2179
    // that are not allowed to be in object streams. In addition to removing objects that were
2180
    // erroneously included in object streams in the source PDF, it also prevents unreferenced
2181
    // objects from being included.
2182
8.97k
    auto end = xref.cend();
2183
8.97k
    obj.streams_empty = true;
2184
8.97k
    if (cfg.preserve_unreferenced()) {
2185
0
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
2186
0
            if (iter->second.getType() == 2) {
2187
                // Pdf contains object streams.
2188
0
                obj.streams_empty = false;
2189
0
                obj[iter->first].object_stream = iter->second.getObjStreamNumber();
2190
0
            }
2191
0
        }
2192
8.97k
    } else {
2193
        // Start by scanning for first compressed object in case we don't have any object streams to
2194
        // process.
2195
75.0k
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
2196
66.9k
            if (iter->second.getType() == 2) {
2197
                // Pdf contains object streams.
2198
888
                obj.streams_empty = false;
2199
888
                auto eligible = objects.compressible_set();
2200
                // The object pointed to by iter may be a previous generation, in which case it is
2201
                // removed by compressible_set. We need to restart the loop (while the object
2202
                // table may contain multiple generations of an object).
2203
145k
                for (iter = xref.cbegin(); iter != end; ++iter) {
2204
144k
                    if (iter->second.getType() == 2) {
2205
129k
                        auto id = static_cast<size_t>(iter->first.getObj());
2206
129k
                        if (id < eligible.size() && eligible[id]) {
2207
14.5k
                            obj[iter->first].object_stream = iter->second.getObjStreamNumber();
2208
114k
                        } else {
2209
114k
                            QTC::TC("qpdf", "QPDFWriter exclude from object stream");
2210
114k
                        }
2211
129k
                    }
2212
144k
                }
2213
888
                return;
2214
888
            }
2215
66.9k
        }
2216
8.97k
    }
2217
8.97k
}
2218
2219
void
2220
impl::Writer::generateObjectStreams()
2221
0
{
2222
    // Basic strategy: make a list of objects that can go into an object stream.  Then figure out
2223
    // how many object streams are needed so that we can distribute objects approximately evenly
2224
    // without having any object stream exceed 100 members.  We don't have to worry about linearized
2225
    // files here -- if the file is linearized, we take care of excluding things that aren't allowed
2226
    // here later.
2227
2228
    // This code doesn't do anything with /Extends.
2229
2230
0
    auto eligible = objects.compressible_vector();
2231
0
    size_t n_object_streams = (eligible.size() + 99U) / 100U;
2232
2233
0
    initializeTables(2U * n_object_streams);
2234
0
    if (n_object_streams == 0) {
2235
0
        obj.streams_empty = true;
2236
0
        return;
2237
0
    }
2238
0
    size_t n_per = eligible.size() / n_object_streams;
2239
0
    if (n_per * n_object_streams < eligible.size()) {
2240
0
        ++n_per;
2241
0
    }
2242
0
    unsigned int n = 0;
2243
0
    int cur_ostream = qpdf.newIndirectNull().getObjectID();
2244
0
    for (auto const& item: eligible) {
2245
0
        if (n == n_per) {
2246
0
            n = 0;
2247
            // Construct a new null object as the "original" object stream.  The rest of the code
2248
            // knows that this means we're creating the object stream from scratch.
2249
0
            cur_ostream = qpdf.newIndirectNull().getObjectID();
2250
0
        }
2251
0
        auto& o = obj[item];
2252
0
        o.object_stream = cur_ostream;
2253
0
        o.gen = item.getGen();
2254
0
        ++n;
2255
0
    }
2256
0
}
2257
2258
Dictionary
2259
impl::Writer::trimmed_trailer()
2260
17.5k
{
2261
    // Remove keys from the trailer that necessarily have to be replaced when writing the file.
2262
2263
17.5k
    Dictionary trailer = qpdf.getTrailer().unsafeShallowCopy();
2264
2265
    // Remove encryption keys
2266
17.5k
    trailer.erase("/ID");
2267
17.5k
    trailer.erase("/Encrypt");
2268
2269
    // Remove modification information
2270
17.5k
    trailer.erase("/Prev");
2271
2272
    // Remove all trailer keys that potentially come from a cross-reference stream
2273
17.5k
    trailer.erase("/Index");
2274
17.5k
    trailer.erase("/W");
2275
17.5k
    trailer.erase("/Length");
2276
17.5k
    trailer.erase("/Filter");
2277
17.5k
    trailer.erase("/DecodeParms");
2278
17.5k
    trailer.erase("/Type");
2279
17.5k
    trailer.erase("/XRefStm");
2280
2281
17.5k
    return trailer;
2282
17.5k
}
2283
2284
// Make document extension level information direct as required by the spec.
2285
void
2286
impl::Writer::prepareFileForWrite()
2287
8.92k
{
2288
8.92k
    qpdf.fixDanglingReferences();
2289
8.92k
    auto root = qpdf.getRoot();
2290
8.92k
    auto oh = root.getKey("/Extensions");
2291
8.92k
    if (oh.isDictionary()) {
2292
406
        const bool extensions_indirect = oh.isIndirect();
2293
406
        if (extensions_indirect) {
2294
96
            QTC::TC("qpdf", "QPDFWriter make Extensions direct");
2295
96
            oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy());
2296
96
        }
2297
406
        if (oh.hasKey("/ADBE")) {
2298
310
            auto adbe = oh.getKey("/ADBE");
2299
310
            if (adbe.isIndirect()) {
2300
265
                QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1);
2301
265
                adbe.makeDirect();
2302
265
                oh.replaceKey("/ADBE", adbe);
2303
265
            }
2304
310
        }
2305
406
    }
2306
8.92k
}
2307
2308
void
2309
impl::Writer::initializeTables(size_t extra)
2310
8.97k
{
2311
8.97k
    auto size = objects.table_size() + 100u + extra;
2312
8.97k
    obj.resize(size);
2313
8.97k
    new_obj.resize(size);
2314
8.97k
}
2315
2316
void
2317
impl::Writer::doWriteSetup()
2318
8.99k
{
2319
8.99k
    if (did_write_setup) {
2320
0
        return;
2321
0
    }
2322
8.99k
    did_write_setup = true;
2323
2324
    // Do preliminary setup
2325
2326
8.99k
    if (cfg.linearize()) {
2327
0
        cfg.qdf(false);
2328
0
    }
2329
2330
8.99k
    if (cfg.pclm()) {
2331
0
        encryption = nullptr;
2332
0
    }
2333
2334
8.99k
    if (encryption) {
2335
        // Encryption has been explicitly set
2336
0
        cfg.preserve_encryption(false);
2337
8.99k
    } else if (cfg.normalize_content() || cfg.pclm()) {
2338
        // Encryption makes looking at contents pretty useless.  If the user explicitly encrypted
2339
        // though, we still obey that.
2340
8.99k
        cfg.preserve_encryption(false);
2341
8.99k
    }
2342
2343
8.99k
    if (cfg.preserve_encryption()) {
2344
0
        copyEncryptionParameters(qpdf);
2345
0
    }
2346
2347
8.99k
    if (!cfg.forced_pdf_version().empty()) {
2348
0
        int major = 0;
2349
0
        int minor = 0;
2350
0
        parseVersion(cfg.forced_pdf_version(), major, minor);
2351
0
        disableIncompatibleEncryption(major, minor, cfg.forced_extension_level());
2352
0
        if (compareVersions(major, minor, 1, 5) < 0) {
2353
0
            cfg.object_streams(qpdf_o_disable);
2354
0
        }
2355
0
    }
2356
2357
8.99k
    if (cfg.qdf() || cfg.normalize_content()) {
2358
8.99k
        initializeSpecialStreams();
2359
8.99k
    }
2360
2361
8.99k
    switch (cfg.object_streams()) {
2362
0
    case qpdf_o_disable:
2363
0
        initializeTables();
2364
0
        obj.streams_empty = true;
2365
0
        break;
2366
2367
8.97k
    case qpdf_o_preserve:
2368
8.97k
        initializeTables();
2369
8.97k
        preserveObjectStreams();
2370
8.97k
        break;
2371
2372
0
    case qpdf_o_generate:
2373
0
        generateObjectStreams();
2374
0
        break;
2375
8.99k
    }
2376
2377
8.95k
    if (!obj.streams_empty) {
2378
869
        if (cfg.linearize()) {
2379
            // Page dictionaries are not allowed to be compressed objects.
2380
0
            for (auto& page: pages) {
2381
0
                if (obj[page].object_stream > 0) {
2382
0
                    obj[page].object_stream = 0;
2383
0
                }
2384
0
            }
2385
0
        }
2386
2387
869
        if (cfg.linearize() || encryption) {
2388
            // The document catalog is not allowed to be compressed in cfg.linearized_ files either.
2389
            // It also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
2390
            // handle encrypted files with compressed document catalogs, so we disable them in that
2391
            // case as well.
2392
0
            if (obj[root_og].object_stream > 0) {
2393
0
                obj[root_og].object_stream = 0;
2394
0
            }
2395
0
        }
2396
2397
        // Generate reverse mapping from object stream to objects
2398
977k
        obj.forEach([this](auto id, auto const& item) -> void {
2399
977k
            if (item.object_stream > 0) {
2400
14.5k
                auto& vec = object_stream_to_objects[item.object_stream];
2401
14.5k
                vec.emplace_back(id, item.gen);
2402
14.5k
                if (max_ostream_index < vec.size()) {
2403
5.75k
                    ++max_ostream_index;
2404
5.75k
                }
2405
14.5k
            }
2406
977k
        });
2407
869
        --max_ostream_index;
2408
2409
869
        if (object_stream_to_objects.empty()) {
2410
520
            obj.streams_empty = true;
2411
520
        } else {
2412
349
            setMinimumPDFVersion("1.5");
2413
349
        }
2414
869
    }
2415
2416
8.95k
    setMinimumPDFVersion(qpdf.getPDFVersion(), qpdf.getExtensionLevel());
2417
8.95k
    final_pdf_version = min_pdf_version;
2418
8.95k
    final_extension_level = min_extension_level;
2419
8.95k
    if (!cfg.forced_pdf_version().empty()) {
2420
0
        final_pdf_version = cfg.forced_pdf_version();
2421
0
        final_extension_level = cfg.forced_extension_level();
2422
0
    }
2423
8.95k
}
2424
2425
void
2426
QPDFWriter::write()
2427
8.99k
{
2428
8.99k
    m->write();
2429
8.99k
}
2430
2431
void
2432
impl::Writer::write()
2433
8.99k
{
2434
8.99k
    doWriteSetup();
2435
2436
    // Set up progress reporting. For linearized files, we write two passes. events_expected is an
2437
    // approximation, but it's good enough for progress reporting, which is mostly a guess anyway.
2438
8.99k
    events_expected = QIntC::to_int(qpdf.getObjectCount() * (cfg.linearize() ? 2 : 1));
2439
2440
8.99k
    prepareFileForWrite();
2441
2442
8.99k
    if (cfg.linearize()) {
2443
0
        writeLinearized();
2444
8.99k
    } else {
2445
8.99k
        writeStandard();
2446
8.99k
    }
2447
2448
8.99k
    pipeline->finish();
2449
8.99k
    if (close_file) {
2450
0
        fclose(file);
2451
0
    }
2452
8.99k
    file = nullptr;
2453
8.99k
    if (buffer_pipeline) {
2454
0
        output_buffer = buffer_pipeline->getBuffer();
2455
0
        buffer_pipeline = nullptr;
2456
0
    }
2457
8.99k
    indicateProgress(false, true);
2458
8.99k
}
2459
2460
QPDFObjGen
2461
QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
2462
0
{
2463
0
    return {m->obj[og].renumber, 0};
2464
0
}
2465
2466
std::map<QPDFObjGen, QPDFXRefEntry>
2467
QPDFWriter::getWrittenXRefTable()
2468
0
{
2469
0
    return m->getWrittenXRefTable();
2470
0
}
2471
2472
std::map<QPDFObjGen, QPDFXRefEntry>
2473
impl::Writer::getWrittenXRefTable()
2474
0
{
2475
0
    std::map<QPDFObjGen, QPDFXRefEntry> result;
2476
2477
0
    auto it = result.begin();
2478
0
    new_obj.forEach([&it, &result](auto id, auto const& item) -> void {
2479
0
        if (item.xref.getType() != 0) {
2480
0
            it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref);
2481
0
        }
2482
0
    });
2483
0
    return result;
2484
0
}
2485
2486
void
2487
impl::Writer::enqueuePart(std::vector<QPDFObjectHandle>& part)
2488
0
{
2489
0
    for (auto const& oh: part) {
2490
0
        enqueue(oh);
2491
0
    }
2492
0
}
2493
2494
void
2495
impl::Writer::writeEncryptionDictionary()
2496
0
{
2497
0
    encryption_dict_objid = openObject(encryption_dict_objid);
2498
0
    auto& enc = *encryption;
2499
0
    auto const V = enc.getV();
2500
2501
0
    write("<<");
2502
0
    if (V >= 4) {
2503
0
        write(" /CF << /StdCF << /AuthEvent /DocOpen /CFM ");
2504
0
        write(cfg.encrypt_use_aes() ? (V < 5 ? "/AESV2" : "/AESV3") : "/V2");
2505
        // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of
2506
        // MacOS won't open encrypted files without it.
2507
0
        write(V < 5 ? " /Length 16 >> >>" : " /Length 32 >> >>");
2508
0
        if (!encryption->getEncryptMetadata()) {
2509
0
            write(" /EncryptMetadata false");
2510
0
        }
2511
0
    }
2512
0
    write(" /Filter /Standard /Length ").write(enc.getLengthBytes() * 8);
2513
0
    write(" /O ").write_string(enc.getO(), true);
2514
0
    if (V >= 4) {
2515
0
        write(" /OE ").write_string(enc.getOE(), true);
2516
0
    }
2517
0
    write(" /P ").write(enc.getP());
2518
0
    if (V >= 5) {
2519
0
        write(" /Perms ").write_string(enc.getPerms(), true);
2520
0
    }
2521
0
    write(" /R ").write(enc.getR());
2522
2523
0
    if (V >= 4) {
2524
0
        write(" /StmF /StdCF /StrF /StdCF");
2525
0
    }
2526
0
    write(" /U ").write_string(enc.getU(), true);
2527
0
    if (V >= 4) {
2528
0
        write(" /UE ").write_string(enc.getUE(), true);
2529
0
    }
2530
0
    write(" /V ").write(enc.getV()).write(" >>");
2531
0
    closeObject(encryption_dict_objid);
2532
0
}
2533
2534
std::string
2535
QPDFWriter::getFinalVersion()
2536
0
{
2537
0
    m->doWriteSetup();
2538
0
    return m->final_pdf_version;
2539
0
}
2540
2541
void
2542
impl::Writer::writeHeader()
2543
8.87k
{
2544
8.87k
    write("%PDF-").write(final_pdf_version);
2545
8.87k
    if (cfg.pclm()) {
2546
        // PCLm version
2547
0
        write("\n%PCLm 1.0\n");
2548
8.87k
    } else {
2549
        // This string of binary characters would not be valid UTF-8, so it really should be treated
2550
        // as binary.
2551
8.87k
        write("\n%\xbf\xf7\xa2\xfe\n");
2552
8.87k
    }
2553
8.87k
    write_qdf("%QDF-1.0\n\n");
2554
2555
    // Note: do not write extra header text here.  Linearized PDFs must include the entire
2556
    // linearization parameter dictionary within the first 1024 characters of the PDF file, so for
2557
    // linearized files, we have to write extra header text after the linearization parameter
2558
    // dictionary.
2559
8.87k
}
2560
2561
void
2562
impl::Writer::writeHintStream(int hint_id)
2563
0
{
2564
0
    std::string hint_buffer;
2565
0
    int S = 0;
2566
0
    int O = 0;
2567
0
    bool compressed = cfg.compress_streams();
2568
0
    lin.generateHintStream(new_obj, obj, hint_buffer, S, O, compressed);
2569
2570
0
    openObject(hint_id);
2571
0
    setDataKey(hint_id);
2572
2573
0
    size_t hlen = hint_buffer.size();
2574
2575
0
    write("<< ");
2576
0
    if (compressed) {
2577
0
        write("/Filter /FlateDecode ");
2578
0
    }
2579
0
    write("/S ").write(S);
2580
0
    if (O) {
2581
0
        write(" /O ").write(O);
2582
0
    }
2583
0
    adjustAESStreamLength(hlen);
2584
0
    write(" /Length ").write(hlen);
2585
0
    write(" >>\nstream\n").write_encrypted(hint_buffer);
2586
2587
0
    if (encryption) {
2588
0
        QTC::TC("qpdf", "QPDFWriter encrypted hint stream");
2589
0
    }
2590
2591
0
    write(hint_buffer.empty() || hint_buffer.back() != '\n' ? "\nendstream" : "endstream");
2592
0
    closeObject(hint_id);
2593
0
}
2594
2595
qpdf_offset_t
2596
impl::Writer::writeXRefTable(trailer_e which, int first, int last, int size)
2597
8.42k
{
2598
    // There are too many extra arguments to replace overloaded function with defaults in the header
2599
    // file...too much risk of leaving something off.
2600
8.42k
    return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0);
2601
8.42k
}
2602
2603
qpdf_offset_t
2604
impl::Writer::writeXRefTable(
2605
    trailer_e which,
2606
    int first,
2607
    int last,
2608
    int size,
2609
    qpdf_offset_t prev,
2610
    bool suppress_offsets,
2611
    int hint_id,
2612
    qpdf_offset_t hint_offset,
2613
    qpdf_offset_t hint_length,
2614
    int linearization_pass)
2615
8.42k
{
2616
8.42k
    write("xref\n").write(first).write(" ").write(last - first + 1);
2617
8.42k
    qpdf_offset_t space_before_zero = pipeline->getCount();
2618
8.42k
    write("\n");
2619
8.42k
    if (first == 0) {
2620
8.42k
        write("0000000000 65535 f \n");
2621
8.42k
        ++first;
2622
8.42k
    }
2623
89.4k
    for (int i = first; i <= last; ++i) {
2624
81.0k
        qpdf_offset_t offset = 0;
2625
81.0k
        if (!suppress_offsets) {
2626
81.0k
            offset = new_obj[i].xref.getOffset();
2627
81.0k
            if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2628
0
                offset += hint_length;
2629
0
            }
2630
81.0k
        }
2631
81.0k
        write(QUtil::int_to_string(offset, 10)).write(" 00000 n \n");
2632
81.0k
    }
2633
8.42k
    writeTrailer(which, size, false, prev, linearization_pass);
2634
8.42k
    write("\n");
2635
8.42k
    return space_before_zero;
2636
8.42k
}
2637
2638
qpdf_offset_t
2639
impl::Writer::writeXRefStream(
2640
    int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size)
2641
287
{
2642
    // There are too many extra arguments to replace overloaded function with defaults in the header
2643
    // file...too much risk of leaving something off.
2644
287
    return writeXRefStream(
2645
287
        objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0);
2646
287
}
2647
2648
qpdf_offset_t
2649
impl::Writer::writeXRefStream(
2650
    int xref_id,
2651
    int max_id,
2652
    qpdf_offset_t max_offset,
2653
    trailer_e which,
2654
    int first,
2655
    int last,
2656
    int size,
2657
    qpdf_offset_t prev,
2658
    int hint_id,
2659
    qpdf_offset_t hint_offset,
2660
    qpdf_offset_t hint_length,
2661
    bool skip_compression,
2662
    int linearization_pass)
2663
287
{
2664
287
    qpdf_offset_t xref_offset = pipeline->getCount();
2665
287
    qpdf_offset_t space_before_zero = xref_offset - 1;
2666
2667
    // field 1 contains offsets and object stream identifiers
2668
287
    unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id));
2669
2670
    // field 2 contains object stream indices
2671
287
    unsigned int f2_size = bytesNeeded(QIntC::to_longlong(max_ostream_index));
2672
2673
287
    unsigned int esize = 1 + f1_size + f2_size;
2674
2675
    // Must store in xref table in advance of writing the actual data rather than waiting for
2676
    // openObject to do it.
2677
287
    new_obj[xref_id].xref = QPDFXRefEntry(pipeline->getCount());
2678
2679
287
    std::string xref_data;
2680
287
    const bool compressed = cfg.compress_streams() && !cfg.qdf();
2681
287
    {
2682
287
        auto pp_xref = pipeline_stack.activate(xref_data);
2683
2684
23.6k
        for (int i = first; i <= last; ++i) {
2685
23.3k
            QPDFXRefEntry& e = new_obj[i].xref;
2686
23.3k
            switch (e.getType()) {
2687
287
            case 0:
2688
287
                writeBinary(0, 1);
2689
287
                writeBinary(0, f1_size);
2690
287
                writeBinary(0, f2_size);
2691
287
                break;
2692
2693
9.69k
            case 1:
2694
9.69k
                {
2695
9.69k
                    qpdf_offset_t offset = e.getOffset();
2696
9.69k
                    if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2697
0
                        offset += hint_length;
2698
0
                    }
2699
9.69k
                    writeBinary(1, 1);
2700
9.69k
                    writeBinary(QIntC::to_ulonglong(offset), f1_size);
2701
9.69k
                    writeBinary(0, f2_size);
2702
9.69k
                }
2703
9.69k
                break;
2704
2705
13.3k
            case 2:
2706
13.3k
                writeBinary(2, 1);
2707
13.3k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size);
2708
13.3k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size);
2709
13.3k
                break;
2710
2711
0
            default:
2712
0
                throw std::logic_error("invalid type writing xref stream");
2713
0
                break;
2714
23.3k
            }
2715
23.3k
        }
2716
287
    }
2717
2718
287
    if (compressed) {
2719
0
        xref_data = pl::pipe<Pl_PNGFilter>(xref_data, Pl_PNGFilter::a_encode, esize);
2720
0
        if (!skip_compression) {
2721
            // Write the stream dictionary for compression but don't actually compress.  This
2722
            // helps us with computation of padding for pass 1 of linearization.
2723
0
            xref_data = pl::pipe<Pl_Flate>(xref_data, Pl_Flate::a_deflate);
2724
0
        }
2725
0
    }
2726
2727
287
    openObject(xref_id);
2728
287
    write("<<").write_qdf("\n ").write(" /Type /XRef").write_qdf("\n ");
2729
287
    write(" /Length ").write(xref_data.size());
2730
287
    if (compressed) {
2731
0
        write_qdf("\n ").write(" /Filter /FlateDecode").write_qdf("\n ");
2732
0
        write(" /DecodeParms << /Columns ").write(esize).write(" /Predictor 12 >>");
2733
0
    }
2734
287
    write_qdf("\n ").write(" /W [ 1 ").write(f1_size).write(" ").write(f2_size).write(" ]");
2735
287
    if (!(first == 0 && last == (size - 1))) {
2736
0
        write(" /Index [ ").write(first).write(" ").write(last - first + 1).write(" ]");
2737
0
    }
2738
287
    writeTrailer(which, size, true, prev, linearization_pass);
2739
287
    write("\nstream\n").write(xref_data).write("\nendstream");
2740
287
    closeObject(xref_id);
2741
287
    return space_before_zero;
2742
287
}
2743
2744
size_t
2745
impl::Writer::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
2746
0
{
2747
    // This routine is called right after a linearization first pass xref stream has been written
2748
    // without compression.  Calculate the amount of padding that would be required in the worst
2749
    // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is
2750
    // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add
2751
    // 10 extra bytes for number length increases.
2752
2753
0
    return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384)));
2754
0
}
2755
2756
void
2757
impl::Writer::writeLinearized()
2758
0
{
2759
    // Optimize file and enqueue objects in order
2760
2761
0
    std::map<int, int> stream_cache;
2762
2763
0
    auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) {
2764
0
        if (auto& result = stream_cache[stream.getObjectID()]) {
2765
0
            return result;
2766
0
        } else {
2767
0
            return result = will_filter_stream(stream) ? 2 : 1;
2768
0
        }
2769
0
    };
2770
2771
0
    lin.optimize(obj, skip_stream_parameters);
2772
2773
0
    std::vector<QPDFObjectHandle> part4;
2774
0
    std::vector<QPDFObjectHandle> part6;
2775
0
    std::vector<QPDFObjectHandle> part7;
2776
0
    std::vector<QPDFObjectHandle> part8;
2777
0
    std::vector<QPDFObjectHandle> part9;
2778
0
    lin.parts(obj, part4, part6, part7, part8, part9);
2779
2780
    // Object number sequence:
2781
    //
2782
    //  second half
2783
    //    second half uncompressed objects
2784
    //    second half xref stream, if any
2785
    //    second half compressed objects
2786
    //  first half
2787
    //    linearization dictionary
2788
    //    first half xref stream, if any
2789
    //    part 4 uncompresesd objects
2790
    //    encryption dictionary, if any
2791
    //    hint stream
2792
    //    part 6 uncompressed objects
2793
    //    first half compressed objects
2794
    //
2795
2796
    // Second half objects
2797
0
    int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size());
2798
0
    int second_half_first_obj = 1;
2799
0
    int after_second_half = 1 + second_half_uncompressed;
2800
0
    next_objid = after_second_half;
2801
0
    int second_half_xref = 0;
2802
0
    bool need_xref_stream = !obj.streams_empty;
2803
0
    if (need_xref_stream) {
2804
0
        second_half_xref = next_objid++;
2805
0
    }
2806
    // Assign numbers to all compressed objects in the second half.
2807
0
    std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9};
2808
0
    for (int i = 0; i < 3; ++i) {
2809
0
        for (auto const& oh: *vecs2[i]) {
2810
0
            assignCompressedObjectNumbers(oh.getObjGen());
2811
0
        }
2812
0
    }
2813
0
    int second_half_end = next_objid - 1;
2814
0
    int second_trailer_size = next_objid;
2815
2816
    // First half objects
2817
0
    int first_half_start = next_objid;
2818
0
    int lindict_id = next_objid++;
2819
0
    int first_half_xref = 0;
2820
0
    if (need_xref_stream) {
2821
0
        first_half_xref = next_objid++;
2822
0
    }
2823
0
    int part4_first_obj = next_objid;
2824
0
    next_objid += QIntC::to_int(part4.size());
2825
0
    int after_part4 = next_objid;
2826
0
    if (encryption) {
2827
0
        encryption_dict_objid = next_objid++;
2828
0
    }
2829
0
    int hint_id = next_objid++;
2830
0
    int part6_first_obj = next_objid;
2831
0
    next_objid += QIntC::to_int(part6.size());
2832
0
    int after_part6 = next_objid;
2833
    // Assign numbers to all compressed objects in the first half
2834
0
    std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6};
2835
0
    for (int i = 0; i < 2; ++i) {
2836
0
        for (auto const& oh: *vecs1[i]) {
2837
0
            assignCompressedObjectNumbers(oh.getObjGen());
2838
0
        }
2839
0
    }
2840
0
    int first_half_end = next_objid - 1;
2841
0
    int first_trailer_size = next_objid;
2842
2843
0
    int part4_end_marker = part4.back().getObjectID();
2844
0
    int part6_end_marker = part6.back().getObjectID();
2845
0
    qpdf_offset_t space_before_zero = 0;
2846
0
    qpdf_offset_t file_size = 0;
2847
0
    qpdf_offset_t part6_end_offset = 0;
2848
0
    qpdf_offset_t first_half_max_obj_offset = 0;
2849
0
    qpdf_offset_t second_xref_offset = 0;
2850
0
    qpdf_offset_t first_xref_end = 0;
2851
0
    qpdf_offset_t second_xref_end = 0;
2852
2853
0
    next_objid = part4_first_obj;
2854
0
    enqueuePart(part4);
2855
0
    if (next_objid != after_part4) {
2856
        // This can happen with very botched files as in the fuzzer test. There are likely some
2857
        // faulty assumptions in calculateLinearizationData
2858
0
        throw std::runtime_error("error encountered after writing part 4 of linearized data");
2859
0
    }
2860
0
    next_objid = part6_first_obj;
2861
0
    enqueuePart(part6);
2862
0
    if (next_objid != after_part6) {
2863
0
        throw std::runtime_error("error encountered after writing part 6 of linearized data");
2864
0
    }
2865
0
    next_objid = second_half_first_obj;
2866
0
    enqueuePart(part7);
2867
0
    enqueuePart(part8);
2868
0
    enqueuePart(part9);
2869
0
    if (next_objid != after_second_half) {
2870
0
        throw std::runtime_error("error encountered after writing part 9 of cfg.linearized_ data");
2871
0
    }
2872
2873
0
    qpdf_offset_t hint_length = 0;
2874
0
    std::string hint_buffer;
2875
2876
    // Write file in two passes.  Part numbers refer to PDF spec 1.4.
2877
2878
0
    FILE* lin_pass1_file = nullptr;
2879
0
    auto pp_pass1 = pipeline_stack.popper();
2880
0
    auto pp_md5 = pipeline_stack.popper();
2881
0
    for (int pass: {1, 2}) {
2882
0
        if (pass == 1) {
2883
0
            if (!cfg.linearize_pass1().empty()) {
2884
0
                lin_pass1_file = QUtil::safe_fopen(cfg.linearize_pass1().data(), "wb");
2885
0
                pipeline_stack.activate(
2886
0
                    pp_pass1,
2887
0
                    std::make_unique<Pl_StdioFile>("linearization pass1", lin_pass1_file));
2888
0
            } else {
2889
0
                pipeline_stack.activate(pp_pass1, true);
2890
0
            }
2891
0
            if (cfg.deterministic_id()) {
2892
0
                pipeline_stack.activate_md5(pp_md5);
2893
0
            }
2894
0
        }
2895
2896
        // Part 1: header
2897
2898
0
        writeHeader();
2899
2900
        // Part 2: linearization parameter dictionary.  Save enough space to write real dictionary.
2901
        // 200 characters is enough space if all numerical values in the parameter dictionary that
2902
        // contain offsets are 20 digits long plus a few extra characters for safety.  The entire
2903
        // linearization parameter dictionary must appear within the first 1024 characters of the
2904
        // file.
2905
2906
0
        qpdf_offset_t pos = pipeline->getCount();
2907
0
        openObject(lindict_id);
2908
0
        write("<<");
2909
0
        if (pass == 2) {
2910
0
            write(" /Linearized 1 /L ").write(file_size + hint_length);
2911
            // Implementation note 121 states that a space is mandatory after this open bracket.
2912
0
            write(" /H [ ").write(new_obj[hint_id].xref.getOffset()).write(" ");
2913
0
            write(hint_length);
2914
0
            write(" ] /O ").write(obj[pages.all().at(0)].renumber);
2915
0
            write(" /E ").write(part6_end_offset + hint_length);
2916
0
            write(" /N ").write(pages.size());
2917
0
            write(" /T ").write(space_before_zero + hint_length);
2918
0
        }
2919
0
        write(" >>");
2920
0
        closeObject(lindict_id);
2921
0
        static int const pad = 200;
2922
0
        write(QIntC::to_size(pos - pipeline->getCount() + pad), ' ').write("\n");
2923
2924
        // If the user supplied any additional header text, write it here after the linearization
2925
        // parameter dictionary.
2926
0
        write(cfg.extra_header_text());
2927
2928
        // Part 3: first page cross reference table and trailer.
2929
2930
0
        qpdf_offset_t first_xref_offset = pipeline->getCount();
2931
0
        qpdf_offset_t hint_offset = 0;
2932
0
        if (pass == 2) {
2933
0
            hint_offset = new_obj[hint_id].xref.getOffset();
2934
0
        }
2935
0
        if (need_xref_stream) {
2936
            // Must pad here too.
2937
0
            if (pass == 1) {
2938
                // Set first_half_max_obj_offset to a value large enough to force four bytes to be
2939
                // reserved for each file offset.  This would provide adequate space for the xref
2940
                // stream as long as the last object in page 1 starts with in the first 4 GB of the
2941
                // file, which is extremely likely.  In the second pass, we will know the actual
2942
                // value for this, but it's okay if it's smaller.
2943
0
                first_half_max_obj_offset = 1 << 25;
2944
0
            }
2945
0
            pos = pipeline->getCount();
2946
0
            writeXRefStream(
2947
0
                first_half_xref,
2948
0
                first_half_end,
2949
0
                first_half_max_obj_offset,
2950
0
                t_lin_first,
2951
0
                first_half_start,
2952
0
                first_half_end,
2953
0
                first_trailer_size,
2954
0
                hint_length + second_xref_offset,
2955
0
                hint_id,
2956
0
                hint_offset,
2957
0
                hint_length,
2958
0
                (pass == 1),
2959
0
                pass);
2960
0
            qpdf_offset_t endpos = pipeline->getCount();
2961
0
            if (pass == 1) {
2962
                // Pad so we have enough room for the real xref stream.
2963
0
                write(calculateXrefStreamPadding(endpos - pos), ' ');
2964
0
                first_xref_end = pipeline->getCount();
2965
0
            } else {
2966
                // Pad so that the next object starts at the same place as in pass 1.
2967
0
                write(QIntC::to_size(first_xref_end - endpos), ' ');
2968
2969
0
                if (pipeline->getCount() != first_xref_end) {
2970
0
                    throw std::logic_error(
2971
0
                        "insufficient padding for first pass xref stream; first_xref_end=" +
2972
0
                        std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos));
2973
0
                }
2974
0
            }
2975
0
            write("\n");
2976
0
        } else {
2977
0
            writeXRefTable(
2978
0
                t_lin_first,
2979
0
                first_half_start,
2980
0
                first_half_end,
2981
0
                first_trailer_size,
2982
0
                hint_length + second_xref_offset,
2983
0
                (pass == 1),
2984
0
                hint_id,
2985
0
                hint_offset,
2986
0
                hint_length,
2987
0
                pass);
2988
0
            write("startxref\n0\n%%EOF\n");
2989
0
        }
2990
2991
        // Parts 4 through 9
2992
2993
0
        for (auto const& cur_object: object_queue) {
2994
0
            if (cur_object.getObjectID() == part6_end_marker) {
2995
0
                first_half_max_obj_offset = pipeline->getCount();
2996
0
            }
2997
0
            writeObject(cur_object);
2998
0
            if (cur_object.getObjectID() == part4_end_marker) {
2999
0
                if (encryption) {
3000
0
                    writeEncryptionDictionary();
3001
0
                }
3002
0
                if (pass == 1) {
3003
0
                    new_obj[hint_id].xref = QPDFXRefEntry(pipeline->getCount());
3004
0
                } else {
3005
                    // Part 5: hint stream
3006
0
                    write(hint_buffer);
3007
0
                }
3008
0
            }
3009
0
            if (cur_object.getObjectID() == part6_end_marker) {
3010
0
                part6_end_offset = pipeline->getCount();
3011
0
            }
3012
0
        }
3013
3014
        // Part 10: overflow hint stream -- not used
3015
3016
        // Part 11: main cross reference table and trailer
3017
3018
0
        second_xref_offset = pipeline->getCount();
3019
0
        if (need_xref_stream) {
3020
0
            pos = pipeline->getCount();
3021
0
            space_before_zero = writeXRefStream(
3022
0
                second_half_xref,
3023
0
                second_half_end,
3024
0
                second_xref_offset,
3025
0
                t_lin_second,
3026
0
                0,
3027
0
                second_half_end,
3028
0
                second_trailer_size,
3029
0
                0,
3030
0
                0,
3031
0
                0,
3032
0
                0,
3033
0
                (pass == 1),
3034
0
                pass);
3035
0
            qpdf_offset_t endpos = pipeline->getCount();
3036
3037
0
            if (pass == 1) {
3038
                // Pad so we have enough room for the real xref stream.  See comments for previous
3039
                // xref stream on how we calculate the padding.
3040
0
                write(calculateXrefStreamPadding(endpos - pos), ' ').write("\n");
3041
0
                second_xref_end = pipeline->getCount();
3042
0
            } else {
3043
                // Make the file size the same.
3044
0
                auto padding =
3045
0
                    QIntC::to_size(second_xref_end + hint_length - 1 - pipeline->getCount());
3046
0
                write(padding, ' ').write("\n");
3047
3048
                // If this assertion fails, maybe we didn't have enough padding above.
3049
0
                if (pipeline->getCount() != second_xref_end + hint_length) {
3050
0
                    throw std::logic_error(
3051
0
                        "count mismatch after xref stream; possible insufficient padding?");
3052
0
                }
3053
0
            }
3054
0
        } else {
3055
0
            space_before_zero = writeXRefTable(
3056
0
                t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass);
3057
0
        }
3058
0
        write("startxref\n").write(first_xref_offset).write("\n%%EOF\n");
3059
3060
0
        if (pass == 1) {
3061
0
            if (cfg.deterministic_id()) {
3062
0
                QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1);
3063
0
                computeDeterministicIDData();
3064
0
                pp_md5.pop();
3065
0
            }
3066
3067
            // Close first pass pipeline
3068
0
            file_size = pipeline->getCount();
3069
0
            pp_pass1.pop();
3070
3071
            // Save hint offset since it will be set to zero by calling openObject.
3072
0
            qpdf_offset_t hint_offset1 = new_obj[hint_id].xref.getOffset();
3073
3074
            // Write hint stream to a buffer
3075
0
            {
3076
0
                auto pp_hint = pipeline_stack.activate(hint_buffer);
3077
0
                writeHintStream(hint_id);
3078
0
            }
3079
0
            hint_length = QIntC::to_offset(hint_buffer.size());
3080
3081
            // Restore hint offset
3082
0
            new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1);
3083
0
            if (lin_pass1_file) {
3084
                // Write some debugging information
3085
0
                fprintf(
3086
0
                    lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str());
3087
0
                fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str());
3088
0
                fprintf(
3089
0
                    lin_pass1_file,
3090
0
                    "%% second_xref_offset=%s\n",
3091
0
                    std::to_string(second_xref_offset).c_str());
3092
0
                fprintf(
3093
0
                    lin_pass1_file,
3094
0
                    "%% second_xref_end=%s\n",
3095
0
                    std::to_string(second_xref_end).c_str());
3096
0
                fclose(lin_pass1_file);
3097
0
                lin_pass1_file = nullptr;
3098
0
            }
3099
0
        }
3100
0
    }
3101
0
}
3102
3103
void
3104
impl::Writer::enqueueObjectsStandard()
3105
8.87k
{
3106
8.87k
    if (cfg.preserve_unreferenced()) {
3107
0
        for (auto const& oh: qpdf.getAllObjects()) {
3108
0
            enqueue(oh);
3109
0
        }
3110
0
    }
3111
3112
    // Put root first on queue.
3113
8.87k
    auto trailer = trimmed_trailer();
3114
8.87k
    enqueue(trailer["/Root"]);
3115
3116
    // Next place any other objects referenced from the trailer dictionary into the queue, handling
3117
    // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op.
3118
16.9k
    for (auto& item: trailer) {
3119
16.9k
        if (!item.second.null()) {
3120
13.7k
            enqueue(item.second);
3121
13.7k
        }
3122
16.9k
    }
3123
8.87k
}
3124
3125
void
3126
impl::Writer::enqueueObjectsPCLm()
3127
0
{
3128
    // Image transform stream content for page strip images. Each of this new stream has to come
3129
    // after every page image strip written in the pclm file.
3130
0
    std::string image_transform_content = "q /image Do Q\n";
3131
3132
    // enqueue all pages first
3133
0
    for (auto& page: pages) {
3134
0
        enqueue(page);
3135
0
        enqueue(page["/Contents"]);
3136
3137
        // enqueue all the strips for each page
3138
0
        for (auto& image: Dictionary(page["/Resources"]["/XObject"])) {
3139
0
            if (!image.second.null()) {
3140
0
                enqueue(image.second);
3141
0
                enqueue(qpdf.newStream(image_transform_content));
3142
0
            }
3143
0
        }
3144
0
    }
3145
3146
0
    enqueue(trimmed_trailer()["/Root"]);
3147
0
}
3148
3149
void
3150
impl::Writer::indicateProgress(bool decrement, bool finished)
3151
106k
{
3152
106k
    if (decrement) {
3153
13.7k
        --events_seen;
3154
13.7k
        return;
3155
13.7k
    }
3156
3157
92.5k
    ++events_seen;
3158
3159
92.5k
    if (!progress_reporter.get()) {
3160
92.5k
        return;
3161
92.5k
    }
3162
3163
0
    if (finished || events_seen >= next_progress_report) {
3164
0
        int percentage =
3165
0
            (finished ? 100
3166
0
                 : next_progress_report == 0
3167
0
                 ? 0
3168
0
                 : std::min(99, 1 + ((100 * events_seen) / events_expected)));
3169
0
        progress_reporter->reportProgress(percentage);
3170
0
    }
3171
0
    int increment = std::max(1, (events_expected / 100));
3172
0
    while (events_seen >= next_progress_report) {
3173
0
        next_progress_report += increment;
3174
0
    }
3175
0
}
3176
3177
void
3178
QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr)
3179
0
{
3180
0
    m->progress_reporter = pr;
3181
0
}
3182
3183
void
3184
impl::Writer::writeStandard()
3185
8.87k
{
3186
8.87k
    auto pp_md5 = pipeline_stack.popper();
3187
8.87k
    if (cfg.deterministic_id()) {
3188
8.87k
        pipeline_stack.activate_md5(pp_md5);
3189
8.87k
    }
3190
3191
    // Start writing
3192
3193
8.87k
    writeHeader();
3194
8.87k
    write(cfg.extra_header_text());
3195
3196
8.87k
    if (cfg.pclm()) {
3197
0
        enqueueObjectsPCLm();
3198
8.87k
    } else {
3199
8.87k
        enqueueObjectsStandard();
3200
8.87k
    }
3201
3202
    // Now start walking queue, outputting each object.
3203
80.7k
    while (object_queue_front < object_queue.size()) {
3204
71.8k
        QPDFObjectHandle cur_object = object_queue.at(object_queue_front);
3205
71.8k
        ++object_queue_front;
3206
71.8k
        writeObject(cur_object);
3207
71.8k
    }
3208
3209
    // Write out the encryption dictionary, if any
3210
8.87k
    if (encryption) {
3211
0
        writeEncryptionDictionary();
3212
0
    }
3213
3214
    // Now write out xref.  next_objid is now the number of objects.
3215
8.87k
    qpdf_offset_t xref_offset = pipeline->getCount();
3216
8.87k
    if (object_stream_to_objects.empty()) {
3217
        // Write regular cross-reference table
3218
8.42k
        writeXRefTable(t_normal, 0, next_objid - 1, next_objid);
3219
8.42k
    } else {
3220
        // Write cross-reference stream.
3221
445
        int xref_id = next_objid++;
3222
445
        writeXRefStream(xref_id, xref_id, xref_offset, t_normal, 0, next_objid - 1, next_objid);
3223
445
    }
3224
8.87k
    write("startxref\n").write(xref_offset).write("\n%%EOF\n");
3225
3226
8.87k
    if (cfg.deterministic_id()) {
3227
8.46k
        QTC::TC(
3228
8.46k
            "qpdf",
3229
8.46k
            "QPDFWriter standard deterministic ID",
3230
8.46k
            object_stream_to_objects.empty() ? 0 : 1);
3231
8.46k
    }
3232
8.87k
}