Coverage Report

Created: 2025-08-26 07:10

/src/qpdf/libqpdf/QPDFWriter.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/assert_debug.h>
2
3
#include <qpdf/qpdf-config.h> // include early for large file support
4
5
#include <qpdf/QPDFWriter_private.hh>
6
7
#include <qpdf/MD5.hh>
8
#include <qpdf/Pl_AES_PDF.hh>
9
#include <qpdf/Pl_Flate.hh>
10
#include <qpdf/Pl_MD5.hh>
11
#include <qpdf/Pl_PNGFilter.hh>
12
#include <qpdf/Pl_RC4.hh>
13
#include <qpdf/Pl_StdioFile.hh>
14
#include <qpdf/Pl_String.hh>
15
#include <qpdf/QIntC.hh>
16
#include <qpdf/QPDFObjectHandle_private.hh>
17
#include <qpdf/QPDFObject_private.hh>
18
#include <qpdf/QPDF_private.hh>
19
#include <qpdf/QTC.hh>
20
#include <qpdf/QUtil.hh>
21
#include <qpdf/RC4.hh>
22
#include <qpdf/Util.hh>
23
24
#include <algorithm>
25
#include <cstdlib>
26
#include <stdexcept>
27
28
using namespace std::literals;
29
using namespace qpdf;
30
31
QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default)
32
0
{
33
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
34
0
}
35
36
QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) :
37
0
    handler(handler)
38
0
{
39
0
}
40
41
QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT
42
                                                                  // (modernize-use-equals-default)
43
0
{
44
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
45
0
}
46
47
void
48
QPDFWriter::FunctionProgressReporter::reportProgress(int progress)
49
0
{
50
0
    handler(progress);
51
0
}
52
53
namespace
54
{
55
    class Pl_stack
56
    {
57
        // A pipeline Popper is normally returned by Pl_stack::activate, or, if necessary, a
58
        // reference to a Popper instance can be passed into activate. When the Popper goes out of
59
        // scope, the pipeline stack is popped. This causes finish to be called on the current
60
        // pipeline and the pipeline stack to be popped until the top of stack is a previous active
61
        // top of stack and restores the pipeline to that point. It deletes any pipelines that it
62
        // pops.
63
        class Popper
64
        {
65
            friend class Pl_stack;
66
67
          public:
68
            Popper() = default;
69
            Popper(Popper const&) = delete;
70
            Popper(Popper&& other) noexcept
71
0
            {
72
0
                // For MSVC, default pops the stack
73
0
                if (this != &other) {
74
0
                    stack = other.stack;
75
0
                    stack_id = other.stack_id;
76
0
                    other.stack = nullptr;
77
0
                    other.stack_id = 0;
78
0
                };
79
0
            }
80
            Popper& operator=(Popper const&) = delete;
81
            Popper&
82
            operator=(Popper&& other) noexcept
83
0
            {
84
0
                // For MSVC, default pops the stack
85
0
                if (this != &other) {
86
0
                    stack = other.stack;
87
0
                    stack_id = other.stack_id;
88
0
                    other.stack = nullptr;
89
0
                    other.stack_id = 0;
90
0
                };
91
0
                return *this;
92
0
            }
93
94
            ~Popper();
95
96
            // Manually pop pipeline from the pipeline stack.
97
            void pop();
98
99
          private:
100
            Popper(Pl_stack& stack) :
101
167k
                stack(&stack)
102
167k
            {
103
167k
            }
104
105
            Pl_stack* stack{nullptr};
106
            unsigned long stack_id{0};
107
        };
108
109
      public:
110
        Pl_stack(pl::Count*& top) :
111
9.75k
            top(top)
112
9.75k
        {
113
9.75k
        }
114
115
        Popper
116
        popper()
117
14.1k
        {
118
14.1k
            return {*this};
119
14.1k
        }
120
121
        void
122
        initialize(Pipeline* p)
123
9.75k
        {
124
9.75k
            auto c = std::make_unique<pl::Count>(++last_id, p);
125
9.75k
            top = c.get();
126
9.75k
            stack.emplace_back(std::move(c));
127
9.75k
        }
128
129
        Popper
130
        activate(std::string& str)
131
106k
        {
132
106k
            Popper pp{*this};
133
106k
            activate(pp, str);
134
106k
            return pp;
135
106k
        }
136
137
        void
138
        activate(Popper& pp, std::string& str)
139
106k
        {
140
106k
            activate(pp, false, &str, nullptr);
141
106k
        }
142
143
        void
144
        activate(Popper& pp, std::unique_ptr<Pipeline> next)
145
0
        {
146
0
            count_buffer.clear();
147
0
            activate(pp, false, &count_buffer, std::move(next));
148
0
        }
149
150
        Popper
151
        activate(
152
            bool discard = false,
153
            std::string* str = nullptr,
154
            std::unique_ptr<Pipeline> next = nullptr)
155
47.1k
        {
156
47.1k
            Popper pp{*this};
157
47.1k
            activate(pp, discard, str, std::move(next));
158
47.1k
            return pp;
159
47.1k
        }
160
161
        void
162
        activate(
163
            Popper& pp,
164
            bool discard = false,
165
            std::string* str = nullptr,
166
            std::unique_ptr<Pipeline> next = nullptr)
167
160k
        {
168
160k
            std::unique_ptr<pl::Count> c;
169
160k
            if (next) {
170
0
                c = std::make_unique<pl::Count>(++last_id, count_buffer, std::move(next));
171
160k
            } else if (discard) {
172
54.2k
                c = std::make_unique<pl::Count>(++last_id, nullptr);
173
106k
            } else if (!str) {
174
0
                c = std::make_unique<pl::Count>(++last_id, top);
175
106k
            } else {
176
106k
                c = std::make_unique<pl::Count>(++last_id, *str);
177
106k
            }
178
160k
            pp.stack_id = last_id;
179
160k
            top = c.get();
180
160k
            stack.emplace_back(std::move(c));
181
160k
        }
182
        void
183
        activate_md5(Popper& pp)
184
7.07k
        {
185
7.07k
            qpdf_assert_debug(!md5_pipeline);
186
7.07k
            qpdf_assert_debug(md5_id == 0);
187
7.07k
            qpdf_assert_debug(top->getCount() == 0);
188
7.07k
            md5_pipeline = std::make_unique<Pl_MD5>("qpdf md5", top);
189
7.07k
            md5_pipeline->persistAcrossFinish(true);
190
            // Special case code in pop clears m->md5_pipeline upon deletion.
191
7.07k
            auto c = std::make_unique<pl::Count>(++last_id, md5_pipeline.get());
192
7.07k
            pp.stack_id = last_id;
193
7.07k
            md5_id = last_id;
194
7.07k
            top = c.get();
195
7.07k
            stack.emplace_back(std::move(c));
196
7.07k
        }
197
198
        // Return the hex digest and disable the MD5 pipeline.
199
        std::string
200
        hex_digest()
201
6.55k
        {
202
6.55k
            qpdf_assert_debug(md5_pipeline);
203
6.55k
            auto digest = md5_pipeline->getHexDigest();
204
6.55k
            md5_pipeline->enable(false);
205
6.55k
            return digest;
206
6.55k
        }
207
208
        void
209
        clear_buffer()
210
0
        {
211
0
            count_buffer.clear();
212
0
        }
213
214
      private:
215
        void
216
        pop(unsigned long stack_id)
217
167k
        {
218
167k
            if (!stack_id) {
219
0
                return;
220
0
            }
221
167k
            qpdf_assert_debug(stack.size() >= 2);
222
167k
            top->finish();
223
167k
            qpdf_assert_debug(stack.back().get() == top);
224
            // It used to be possible for this assertion to fail if writeLinearized exits by
225
            // exception when deterministic ID. There are no longer any cases in which two
226
            // dynamically allocated pipeline Popper objects ever exist at the same time, so the
227
            // assertion will fail if they get popped out of order from automatic destruction.
228
167k
            qpdf_assert_debug(top->id() == stack_id);
229
167k
            if (stack_id == md5_id) {
230
7.07k
                md5_pipeline = nullptr;
231
7.07k
                md5_id = 0;
232
7.07k
            }
233
167k
            stack.pop_back();
234
167k
            top = stack.back().get();
235
167k
        }
236
237
        std::vector<std::unique_ptr<pl::Count>> stack;
238
        pl::Count*& top;
239
        std::unique_ptr<Pl_MD5> md5_pipeline{nullptr};
240
        unsigned long last_id{0};
241
        unsigned long md5_id{0};
242
        std::string count_buffer;
243
    };
244
} // namespace
245
246
Pl_stack::Popper::~Popper()
247
167k
{
248
167k
    if (stack) {
249
154k
        stack->pop(stack_id);
250
154k
    }
251
167k
}
252
253
void
254
Pl_stack::Popper::pop()
255
13.1k
{
256
13.1k
    if (stack) {
257
13.1k
        stack->pop(stack_id);
258
13.1k
    }
259
13.1k
    stack_id = 0;
260
13.1k
    stack = nullptr;
261
13.1k
}
262
263
class QPDFWriter::Members
264
{
265
    friend class QPDFWriter;
266
267
  public:
268
    ~Members();
269
270
  private:
271
    Members(QPDF& pdf);
272
    Members(Members const&) = delete;
273
274
    QPDF& pdf;
275
    QPDFObjGen root_og{-1, 0};
276
    char const* filename{"unspecified"};
277
    FILE* file{nullptr};
278
    bool close_file{false};
279
    std::unique_ptr<Pl_Buffer> buffer_pipeline{nullptr};
280
    Buffer* output_buffer{nullptr};
281
    bool normalize_content_set{false};
282
    bool normalize_content{false};
283
    bool compress_streams{true};
284
    bool compress_streams_set{false};
285
    qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_generalized};
286
    bool stream_decode_level_set{false};
287
    bool recompress_flate{false};
288
    bool qdf_mode{false};
289
    bool preserve_unreferenced_objects{false};
290
    bool newline_before_endstream{false};
291
    bool static_id{false};
292
    bool suppress_original_object_ids{false};
293
    bool direct_stream_lengths{true};
294
    bool preserve_encryption{true};
295
    bool linearized{false};
296
    bool pclm{false};
297
    qpdf_object_stream_e object_stream_mode{qpdf_o_preserve};
298
299
    std::unique_ptr<QPDF::EncryptionData> encryption;
300
    std::string encryption_key;
301
    bool encrypt_use_aes{false};
302
303
    std::string id1; // for /ID key of
304
    std::string id2; // trailer dictionary
305
    std::string final_pdf_version;
306
    int final_extension_level{0};
307
    std::string min_pdf_version;
308
    int min_extension_level{0};
309
    std::string forced_pdf_version;
310
    int forced_extension_level{0};
311
    std::string extra_header_text;
312
    int encryption_dict_objid{0};
313
    std::string cur_data_key;
314
    std::unique_ptr<Pipeline> file_pl;
315
    qpdf::pl::Count* pipeline{nullptr};
316
    std::vector<QPDFObjectHandle> object_queue;
317
    size_t object_queue_front{0};
318
    QPDFWriter::ObjTable obj;
319
    QPDFWriter::NewObjTable new_obj;
320
    int next_objid{1};
321
    int cur_stream_length_id{0};
322
    size_t cur_stream_length{0};
323
    bool added_newline{false};
324
    size_t max_ostream_index{0};
325
    std::set<QPDFObjGen> normalized_streams;
326
    std::map<QPDFObjGen, int> page_object_to_seq;
327
    std::map<QPDFObjGen, int> contents_to_page_seq;
328
    std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects;
329
    Pl_stack pipeline_stack;
330
    bool deterministic_id{false};
331
    std::string deterministic_id_data;
332
    bool did_write_setup{false};
333
334
    // For linearization only
335
    std::string lin_pass1_filename;
336
337
    // For progress reporting
338
    std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter;
339
    int events_expected{0};
340
    int events_seen{0};
341
    int next_progress_report{0};
342
};
343
344
QPDFWriter::Members::Members(QPDF& pdf) :
345
9.87k
    pdf(pdf),
346
9.87k
    root_og(pdf.getRoot().getObjGen().isIndirect() ? pdf.getRoot().getObjGen() : QPDFObjGen(-1, 0)),
347
9.87k
    pipeline_stack(pipeline)
348
9.87k
{
349
9.87k
}
350
351
QPDFWriter::Members::~Members()
352
9.75k
{
353
9.75k
    if (file && close_file) {
354
0
        fclose(file);
355
0
    }
356
9.75k
    delete output_buffer;
357
9.75k
}
358
359
QPDFWriter::QPDFWriter(QPDF& pdf) :
360
9.87k
    m(new Members(pdf))
361
9.87k
{
362
9.87k
}
363
364
QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) :
365
0
    m(new Members(pdf))
366
0
{
367
0
    setOutputFilename(filename);
368
0
}
369
370
QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) :
371
0
    m(new Members(pdf))
372
0
{
373
0
    setOutputFile(description, file, close_file);
374
0
}
375
376
void
377
QPDFWriter::setOutputFilename(char const* filename)
378
0
{
379
0
    char const* description = filename;
380
0
    FILE* f = nullptr;
381
0
    bool close_file = false;
382
0
    if (filename == nullptr) {
383
0
        description = "standard output";
384
0
        QTC::TC("qpdf", "QPDFWriter write to stdout");
385
0
        f = stdout;
386
0
        QUtil::binary_stdout();
387
0
    } else {
388
0
        QTC::TC("qpdf", "QPDFWriter write to file");
389
0
        f = QUtil::safe_fopen(filename, "wb+");
390
0
        close_file = true;
391
0
    }
392
0
    setOutputFile(description, f, close_file);
393
0
}
394
395
void
396
QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file)
397
0
{
398
0
    m->filename = description;
399
0
    m->file = file;
400
0
    m->close_file = close_file;
401
0
    m->file_pl = std::make_unique<Pl_StdioFile>("qpdf output", file);
402
0
    m->pipeline_stack.initialize(m->file_pl.get());
403
0
}
404
405
void
406
QPDFWriter::setOutputMemory()
407
0
{
408
0
    m->filename = "memory buffer";
409
0
    m->buffer_pipeline = std::make_unique<Pl_Buffer>("qpdf output");
410
0
    m->pipeline_stack.initialize(m->buffer_pipeline.get());
411
0
}
412
413
Buffer*
414
QPDFWriter::getBuffer()
415
0
{
416
0
    Buffer* result = m->output_buffer;
417
0
    m->output_buffer = nullptr;
418
0
    return result;
419
0
}
420
421
std::shared_ptr<Buffer>
422
QPDFWriter::getBufferSharedPointer()
423
0
{
424
0
    return std::shared_ptr<Buffer>(getBuffer());
425
0
}
426
427
void
428
QPDFWriter::setOutputPipeline(Pipeline* p)
429
9.75k
{
430
9.75k
    m->filename = "custom pipeline";
431
9.75k
    m->pipeline_stack.initialize(p);
432
9.75k
}
433
434
void
435
QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode)
436
9.75k
{
437
9.75k
    m->object_stream_mode = mode;
438
9.75k
}
439
440
void
441
QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode)
442
0
{
443
0
    switch (mode) {
444
0
    case qpdf_s_uncompress:
445
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
446
0
        m->compress_streams = false;
447
0
        break;
448
449
0
    case qpdf_s_preserve:
450
0
        m->stream_decode_level = qpdf_dl_none;
451
0
        m->compress_streams = false;
452
0
        break;
453
454
0
    case qpdf_s_compress:
455
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
456
0
        m->compress_streams = true;
457
0
        break;
458
0
    }
459
0
    m->stream_decode_level_set = true;
460
0
    m->compress_streams_set = true;
461
0
}
462
463
void
464
QPDFWriter::setCompressStreams(bool val)
465
0
{
466
0
    m->compress_streams = val;
467
0
    m->compress_streams_set = true;
468
0
}
469
470
void
471
QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
472
9.75k
{
473
9.75k
    m->stream_decode_level = val;
474
9.75k
    m->stream_decode_level_set = true;
475
9.75k
}
476
477
void
478
QPDFWriter::setRecompressFlate(bool val)
479
0
{
480
0
    m->recompress_flate = val;
481
0
}
482
483
void
484
QPDFWriter::setContentNormalization(bool val)
485
0
{
486
0
    m->normalize_content_set = true;
487
0
    m->normalize_content = val;
488
0
}
489
490
void
491
QPDFWriter::setQDFMode(bool val)
492
0
{
493
0
    m->qdf_mode = val;
494
0
}
495
496
void
497
QPDFWriter::setPreserveUnreferencedObjects(bool val)
498
0
{
499
0
    m->preserve_unreferenced_objects = val;
500
0
}
501
502
void
503
QPDFWriter::setNewlineBeforeEndstream(bool val)
504
0
{
505
0
    m->newline_before_endstream = val;
506
0
}
507
508
void
509
QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level)
510
18.4k
{
511
18.4k
    bool set_version = false;
512
18.4k
    bool set_extension_level = false;
513
18.4k
    if (m->min_pdf_version.empty()) {
514
9.57k
        set_version = true;
515
9.57k
        set_extension_level = true;
516
9.57k
    } else {
517
8.87k
        int old_major = 0;
518
8.87k
        int old_minor = 0;
519
8.87k
        int min_major = 0;
520
8.87k
        int min_minor = 0;
521
8.87k
        parseVersion(version, old_major, old_minor);
522
8.87k
        parseVersion(m->min_pdf_version, min_major, min_minor);
523
8.87k
        int compare = compareVersions(old_major, old_minor, min_major, min_minor);
524
8.87k
        if (compare > 0) {
525
557
            QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1);
526
557
            set_version = true;
527
557
            set_extension_level = true;
528
8.31k
        } else if (compare == 0) {
529
60
            if (extension_level > m->min_extension_level) {
530
1
                QTC::TC("qpdf", "QPDFWriter increasing extension level");
531
1
                set_extension_level = true;
532
1
            }
533
60
        }
534
8.87k
    }
535
536
18.4k
    if (set_version) {
537
10.1k
        m->min_pdf_version = version;
538
10.1k
    }
539
18.4k
    if (set_extension_level) {
540
10.1k
        m->min_extension_level = extension_level;
541
10.1k
    }
542
18.4k
}
543
544
void
545
QPDFWriter::setMinimumPDFVersion(PDFVersion const& v)
546
0
{
547
0
    std::string version;
548
0
    int extension_level;
549
0
    v.getVersion(version, extension_level);
550
0
    setMinimumPDFVersion(version, extension_level);
551
0
}
552
553
void
554
QPDFWriter::forcePDFVersion(std::string const& version, int extension_level)
555
0
{
556
0
    m->forced_pdf_version = version;
557
0
    m->forced_extension_level = extension_level;
558
0
}
559
560
void
561
QPDFWriter::setExtraHeaderText(std::string const& text)
562
0
{
563
0
    m->extra_header_text = text;
564
0
    if (!m->extra_header_text.empty() && *m->extra_header_text.rbegin() != '\n') {
565
0
        QTC::TC("qpdf", "QPDFWriter extra header text add newline");
566
0
        m->extra_header_text += "\n";
567
0
    } else {
568
0
        QTC::TC("qpdf", "QPDFWriter extra header text no newline");
569
0
    }
570
0
}
571
572
void
573
QPDFWriter::setStaticID(bool val)
574
0
{
575
0
    m->static_id = val;
576
0
}
577
578
void
579
QPDFWriter::setDeterministicID(bool val)
580
9.75k
{
581
9.75k
    m->deterministic_id = val;
582
9.75k
}
583
584
void
585
QPDFWriter::setStaticAesIV(bool val)
586
0
{
587
0
    if (val) {
588
0
        Pl_AES_PDF::useStaticIV();
589
0
    }
590
0
}
591
592
void
593
QPDFWriter::setSuppressOriginalObjectIDs(bool val)
594
0
{
595
0
    m->suppress_original_object_ids = val;
596
0
}
597
598
void
599
QPDFWriter::setPreserveEncryption(bool val)
600
0
{
601
0
    m->preserve_encryption = val;
602
0
}
603
604
void
605
QPDFWriter::setLinearization(bool val)
606
9.75k
{
607
9.75k
    m->linearized = val;
608
9.75k
    if (val) {
609
9.75k
        m->pclm = false;
610
9.75k
    }
611
9.75k
}
612
613
void
614
QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
615
0
{
616
0
    m->lin_pass1_filename = filename;
617
0
}
618
619
void
620
QPDFWriter::setPCLm(bool val)
621
0
{
622
0
    m->pclm = val;
623
0
    if (val) {
624
0
        m->linearized = false;
625
0
    }
626
0
}
627
628
void
629
QPDFWriter::setR2EncryptionParametersInsecure(
630
    char const* user_password,
631
    char const* owner_password,
632
    bool allow_print,
633
    bool allow_modify,
634
    bool allow_extract,
635
    bool allow_annotate)
636
0
{
637
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(1, 2, 5, true);
638
0
    if (!allow_print) {
639
0
        m->encryption->setP(3, false);
640
0
    }
641
0
    if (!allow_modify) {
642
0
        m->encryption->setP(4, false);
643
0
    }
644
0
    if (!allow_extract) {
645
0
        m->encryption->setP(5, false);
646
0
    }
647
0
    if (!allow_annotate) {
648
0
        m->encryption->setP(6, false);
649
0
    }
650
0
    setEncryptionParameters(user_password, owner_password);
651
0
}
652
653
void
654
QPDFWriter::setR3EncryptionParametersInsecure(
655
    char const* user_password,
656
    char const* owner_password,
657
    bool allow_accessibility,
658
    bool allow_extract,
659
    bool allow_assemble,
660
    bool allow_annotate_and_form,
661
    bool allow_form_filling,
662
    bool allow_modify_other,
663
    qpdf_r3_print_e print)
664
0
{
665
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(2, 3, 16, true);
666
0
    interpretR3EncryptionParameters(
667
0
        allow_accessibility,
668
0
        allow_extract,
669
0
        allow_assemble,
670
0
        allow_annotate_and_form,
671
0
        allow_form_filling,
672
0
        allow_modify_other,
673
0
        print,
674
0
        qpdf_r3m_all);
675
0
    setEncryptionParameters(user_password, owner_password);
676
0
}
677
678
void
679
QPDFWriter::setR4EncryptionParametersInsecure(
680
    char const* user_password,
681
    char const* owner_password,
682
    bool allow_accessibility,
683
    bool allow_extract,
684
    bool allow_assemble,
685
    bool allow_annotate_and_form,
686
    bool allow_form_filling,
687
    bool allow_modify_other,
688
    qpdf_r3_print_e print,
689
    bool encrypt_metadata,
690
    bool use_aes)
691
0
{
692
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(4, 4, 16, encrypt_metadata);
693
0
    m->encrypt_use_aes = use_aes;
694
0
    interpretR3EncryptionParameters(
695
0
        allow_accessibility,
696
0
        allow_extract,
697
0
        allow_assemble,
698
0
        allow_annotate_and_form,
699
0
        allow_form_filling,
700
0
        allow_modify_other,
701
0
        print,
702
0
        qpdf_r3m_all);
703
0
    setEncryptionParameters(user_password, owner_password);
704
0
}
705
706
void
707
QPDFWriter::setR5EncryptionParameters(
708
    char const* user_password,
709
    char const* owner_password,
710
    bool allow_accessibility,
711
    bool allow_extract,
712
    bool allow_assemble,
713
    bool allow_annotate_and_form,
714
    bool allow_form_filling,
715
    bool allow_modify_other,
716
    qpdf_r3_print_e print,
717
    bool encrypt_metadata)
718
0
{
719
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(5, 5, 32, encrypt_metadata);
720
0
    m->encrypt_use_aes = true;
721
0
    interpretR3EncryptionParameters(
722
0
        allow_accessibility,
723
0
        allow_extract,
724
0
        allow_assemble,
725
0
        allow_annotate_and_form,
726
0
        allow_form_filling,
727
0
        allow_modify_other,
728
0
        print,
729
0
        qpdf_r3m_all);
730
0
    setEncryptionParameters(user_password, owner_password);
731
0
}
732
733
void
734
QPDFWriter::setR6EncryptionParameters(
735
    char const* user_password,
736
    char const* owner_password,
737
    bool allow_accessibility,
738
    bool allow_extract,
739
    bool allow_assemble,
740
    bool allow_annotate_and_form,
741
    bool allow_form_filling,
742
    bool allow_modify_other,
743
    qpdf_r3_print_e print,
744
    bool encrypt_metadata)
745
0
{
746
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(5, 6, 32, encrypt_metadata);
747
0
    interpretR3EncryptionParameters(
748
0
        allow_accessibility,
749
0
        allow_extract,
750
0
        allow_assemble,
751
0
        allow_annotate_and_form,
752
0
        allow_form_filling,
753
0
        allow_modify_other,
754
0
        print,
755
0
        qpdf_r3m_all);
756
0
    m->encrypt_use_aes = true;
757
0
    setEncryptionParameters(user_password, owner_password);
758
0
}
759
760
void
761
QPDFWriter::interpretR3EncryptionParameters(
762
    bool allow_accessibility,
763
    bool allow_extract,
764
    bool allow_assemble,
765
    bool allow_annotate_and_form,
766
    bool allow_form_filling,
767
    bool allow_modify_other,
768
    qpdf_r3_print_e print,
769
    qpdf_r3_modify_e modify)
770
0
{
771
    // Acrobat 5 security options:
772
773
    // Checkboxes:
774
    //   Enable Content Access for the Visually Impaired
775
    //   Allow Content Copying and Extraction
776
777
    // Allowed changes menu:
778
    //   None
779
    //   Only Document Assembly
780
    //   Only Form Field Fill-in or Signing
781
    //   Comment Authoring, Form Field Fill-in or Signing
782
    //   General Editing, Comment and Form Field Authoring
783
784
    // Allowed printing menu:
785
    //   None
786
    //   Low Resolution
787
    //   Full printing
788
789
    // Meanings of bits in P when R >= 3
790
    //
791
    //  3: low-resolution printing
792
    //  4: document modification except as controlled by 6, 9, and 11
793
    //  5: extraction
794
    //  6: add/modify annotations (comment), fill in forms
795
    //     if 4+6 are set, also allows modification of form fields
796
    //  9: fill in forms even if 6 is clear
797
    // 10: accessibility; ignored by readers, should always be set
798
    // 11: document assembly even if 4 is clear
799
    // 12: high-resolution printing
800
0
    if (!allow_accessibility && m->encryption->getR() <= 3) {
801
        // Bit 10 is deprecated and should always be set.  This used to mean accessibility.  There
802
        // is no way to disable accessibility with R > 3.
803
0
        m->encryption->setP(10, false);
804
0
    }
805
0
    if (!allow_extract) {
806
0
        m->encryption->setP(5, false);
807
0
    }
808
809
0
    switch (print) {
810
0
    case qpdf_r3p_none:
811
0
        m->encryption->setP(3, false); // any printing
812
0
        [[fallthrough]];
813
0
    case qpdf_r3p_low:
814
0
        m->encryption->setP(12, false); // high resolution printing
815
0
        [[fallthrough]];
816
0
    case qpdf_r3p_full:
817
0
        break;
818
        // no default so gcc warns for missing cases
819
0
    }
820
821
    // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full
822
    // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're
823
    // stuck with it. See also allow checks below to control the bits individually.
824
825
    // NOT EXERCISED IN TEST SUITE
826
0
    switch (modify) {
827
0
    case qpdf_r3m_none:
828
0
        m->encryption->setP(11, false); // document assembly
829
0
        [[fallthrough]];
830
0
    case qpdf_r3m_assembly:
831
0
        m->encryption->setP(9, false); // filling in form fields
832
0
        [[fallthrough]];
833
0
    case qpdf_r3m_form:
834
0
        m->encryption->setP(6, false); // modify annotations, fill in form fields
835
0
        [[fallthrough]];
836
0
    case qpdf_r3m_annotate:
837
0
        m->encryption->setP(4, false); // other modifications
838
0
        [[fallthrough]];
839
0
    case qpdf_r3m_all:
840
0
        break;
841
        // no default so gcc warns for missing cases
842
0
    }
843
    // END NOT EXERCISED IN TEST SUITE
844
845
0
    if (!allow_assemble) {
846
0
        m->encryption->setP(11, false);
847
0
    }
848
0
    if (!allow_annotate_and_form) {
849
0
        m->encryption->setP(6, false);
850
0
    }
851
0
    if (!allow_form_filling) {
852
0
        m->encryption->setP(9, false);
853
0
    }
854
0
    if (!allow_modify_other) {
855
0
        m->encryption->setP(4, false);
856
0
    }
857
0
}
858
859
void
860
QPDFWriter::setEncryptionParameters(char const* user_password, char const* owner_password)
861
0
{
862
0
    generateID(true);
863
0
    m->encryption->setId1(m->id1);
864
0
    m->encryption_key = m->encryption->compute_parameters(user_password, owner_password);
865
0
    setEncryptionMinimumVersion();
866
0
}
867
868
void
869
QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
870
9.75k
{
871
9.75k
    m->preserve_encryption = false;
872
9.75k
    QPDFObjectHandle trailer = qpdf.getTrailer();
873
9.75k
    if (trailer.hasKey("/Encrypt")) {
874
72
        generateID(true);
875
72
        m->id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue();
876
72
        QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
877
72
        int V = encrypt.getKey("/V").getIntValueAsInt();
878
72
        int key_len = 5;
879
72
        if (V > 1) {
880
0
            key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8;
881
0
        }
882
72
        const bool encrypt_metadata =
883
72
            encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool()
884
72
            ? encrypt.getKey("/EncryptMetadata").getBoolValue()
885
72
            : true;
886
72
        if (V >= 4) {
887
            // When copying encryption parameters, use AES even if the original file did not.
888
            // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of
889
            // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF
890
            // all potentially having different values.
891
0
            m->encrypt_use_aes = true;
892
0
        }
893
72
        QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", encrypt_metadata ? 0 : 1);
894
72
        QTC::TC("qpdf", "QPDFWriter copy use_aes", m->encrypt_use_aes ? 0 : 1);
895
896
72
        m->encryption = std::make_unique<QPDF::EncryptionData>(
897
72
            V,
898
72
            encrypt.getKey("/R").getIntValueAsInt(),
899
72
            key_len,
900
72
            static_cast<int>(encrypt.getKey("/P").getIntValue()),
901
72
            encrypt.getKey("/O").getStringValue(),
902
72
            encrypt.getKey("/U").getStringValue(),
903
72
            V < 5 ? "" : encrypt.getKey("/OE").getStringValue(),
904
72
            V < 5 ? "" : encrypt.getKey("/UE").getStringValue(),
905
72
            V < 5 ? "" : encrypt.getKey("/Perms").getStringValue(),
906
72
            m->id1, // m->id1 == the other file's id1
907
72
            encrypt_metadata);
908
72
        m->encryption_key = V >= 5
909
72
            ? qpdf.getEncryptionKey()
910
72
            : m->encryption->compute_encryption_key(qpdf.getPaddedUserPassword());
911
72
        setEncryptionMinimumVersion();
912
72
    }
913
9.75k
}
914
915
void
916
QPDFWriter::disableIncompatibleEncryption(int major, int minor, int extension_level)
917
0
{
918
0
    if (!m->encryption) {
919
0
        return;
920
0
    }
921
0
    if (compareVersions(major, minor, 1, 3) < 0) {
922
0
        m->encryption = nullptr;
923
0
        return;
924
0
    }
925
0
    int V = m->encryption->getV();
926
0
    int R = m->encryption->getR();
927
0
    if (compareVersions(major, minor, 1, 4) < 0) {
928
0
        if (V > 1 || R > 2) {
929
0
            m->encryption = nullptr;
930
0
        }
931
0
    } else if (compareVersions(major, minor, 1, 5) < 0) {
932
0
        if (V > 2 || R > 3) {
933
0
            m->encryption = nullptr;
934
0
        }
935
0
    } else if (compareVersions(major, minor, 1, 6) < 0) {
936
0
        if (m->encrypt_use_aes) {
937
0
            m->encryption = nullptr;
938
0
        }
939
0
    } else if (
940
0
        (compareVersions(major, minor, 1, 7) < 0) ||
941
0
        ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) {
942
0
        if (V >= 5 || R >= 5) {
943
0
            m->encryption = nullptr;
944
0
        }
945
0
    }
946
947
0
    if (!m->encryption) {
948
0
        QTC::TC("qpdf", "QPDFWriter forced version disabled encryption");
949
0
    }
950
0
}
951
952
void
953
QPDFWriter::parseVersion(std::string const& version, int& major, int& minor) const
954
17.7k
{
955
17.7k
    major = QUtil::string_to_int(version.c_str());
956
17.7k
    minor = 0;
957
17.7k
    size_t p = version.find('.');
958
17.7k
    if ((p != std::string::npos) && (version.length() > p)) {
959
17.7k
        minor = QUtil::string_to_int(version.substr(p + 1).c_str());
960
17.7k
    }
961
17.7k
    std::string tmp = std::to_string(major) + "." + std::to_string(minor);
962
17.7k
    if (tmp != version) {
963
        // The version number in the input is probably invalid. This happens with some files that
964
        // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately
965
        // QPDFWriter doesn't have a way to give a warning, so we just ignore this case.
966
493
    }
967
17.7k
}
968
969
int
970
QPDFWriter::compareVersions(int major1, int minor1, int major2, int minor2) const
971
8.86k
{
972
8.86k
    if (major1 < major2) {
973
79
        return -1;
974
8.78k
    } else if (major1 > major2) {
975
58
        return 1;
976
8.72k
    } else if (minor1 < minor2) {
977
8.16k
        return -1;
978
8.16k
    } else if (minor1 > minor2) {
979
499
        return 1;
980
499
    } else {
981
60
        return 0;
982
60
    }
983
8.86k
}
984
985
void
986
QPDFWriter::setEncryptionMinimumVersion()
987
0
{
988
0
    auto const R = m->encryption->getR();
989
0
    if (R >= 6) {
990
0
        setMinimumPDFVersion("1.7", 8);
991
0
    } else if (R == 5) {
992
0
        setMinimumPDFVersion("1.7", 3);
993
0
    } else if (R == 4) {
994
0
        setMinimumPDFVersion(m->encrypt_use_aes ? "1.6" : "1.5");
995
0
    } else if (R == 3) {
996
0
        setMinimumPDFVersion("1.4");
997
0
    } else {
998
0
        setMinimumPDFVersion("1.3");
999
0
    }
1000
0
}
1001
1002
void
1003
QPDFWriter::setDataKey(int objid)
1004
133k
{
1005
133k
    if (m->encryption) {
1006
0
        m->cur_data_key = QPDF::compute_data_key(
1007
0
            m->encryption_key,
1008
0
            objid,
1009
0
            0,
1010
0
            m->encrypt_use_aes,
1011
0
            m->encryption->getV(),
1012
0
            m->encryption->getR());
1013
0
    }
1014
133k
}
1015
1016
unsigned int
1017
QPDFWriter::bytesNeeded(long long n)
1018
76.6k
{
1019
76.6k
    unsigned int bytes = 0;
1020
176k
    while (n) {
1021
99.6k
        ++bytes;
1022
99.6k
        n >>= 8;
1023
99.6k
    }
1024
76.6k
    return bytes;
1025
76.6k
}
1026
1027
void
1028
QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes)
1029
1.07M
{
1030
1.07M
    if (bytes > sizeof(unsigned long long)) {
1031
0
        throw std::logic_error("QPDFWriter::writeBinary called with too many bytes");
1032
0
    }
1033
1.07M
    unsigned char data[sizeof(unsigned long long)];
1034
2.62M
    for (unsigned int i = 0; i < bytes; ++i) {
1035
1.55M
        data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff);
1036
1.55M
        val >>= 8;
1037
1.55M
    }
1038
1.07M
    m->pipeline->write(data, bytes);
1039
1.07M
}
1040
1041
QPDFWriter&
1042
QPDFWriter::write(std::string_view str)
1043
9.34M
{
1044
9.34M
    m->pipeline->write(str);
1045
9.34M
    return *this;
1046
9.34M
}
1047
1048
QPDFWriter&
1049
QPDFWriter::write(std::integral auto val)
1050
1.23M
{
1051
1.23M
    m->pipeline->write(std::to_string(val));
1052
1.23M
    return *this;
1053
1.23M
}
_ZN10QPDFWriter5writeITkNSt3__18integralEiEERS_T_
Line
Count
Source
1050
639k
{
1051
639k
    m->pipeline->write(std::to_string(val));
1052
639k
    return *this;
1053
639k
}
_ZN10QPDFWriter5writeITkNSt3__18integralExEERS_T_
Line
Count
Source
1050
410k
{
1051
410k
    m->pipeline->write(std::to_string(val));
1052
410k
    return *this;
1053
410k
}
_ZN10QPDFWriter5writeITkNSt3__18integralEmEERS_T_
Line
Count
Source
1050
110k
{
1051
110k
    m->pipeline->write(std::to_string(val));
1052
110k
    return *this;
1053
110k
}
_ZN10QPDFWriter5writeITkNSt3__18integralEjEERS_T_
Line
Count
Source
1050
76.6k
{
1051
76.6k
    m->pipeline->write(std::to_string(val));
1052
76.6k
    return *this;
1053
76.6k
}
1054
1055
QPDFWriter&
1056
QPDFWriter::write(size_t count, char c)
1057
41.7k
{
1058
41.7k
    m->pipeline->write(count, c);
1059
41.7k
    return *this;
1060
41.7k
}
1061
1062
QPDFWriter&
1063
QPDFWriter::write_name(std::string const& str)
1064
692k
{
1065
692k
    m->pipeline->write(Name::normalize(str));
1066
692k
    return *this;
1067
692k
}
1068
1069
QPDFWriter&
1070
QPDFWriter::write_string(std::string const& str, bool force_binary)
1071
26.1k
{
1072
26.1k
    m->pipeline->write(QPDF_String(str).unparse(force_binary));
1073
26.1k
    return *this;
1074
26.1k
}
1075
1076
template <typename... Args>
1077
QPDFWriter&
1078
QPDFWriter::write_qdf(Args&&... args)
1079
836k
{
1080
836k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
836k
    return *this;
1084
836k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1079
586k
{
1080
586k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
586k
    return *this;
1084
586k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [3]>(char const (&) [3])
Line
Count
Source
1079
210k
{
1080
210k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
210k
    return *this;
1084
210k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1079
26.5k
{
1080
26.5k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
26.5k
    return *this;
1084
26.5k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [11]>(char const (&) [11])
Line
Count
Source
1079
13.6k
{
1080
13.6k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
13.6k
    return *this;
1084
13.6k
}
1085
1086
template <typename... Args>
1087
QPDFWriter&
1088
QPDFWriter::write_no_qdf(Args&&... args)
1089
387k
{
1090
387k
    if (!m->qdf_mode) {
1091
387k
        m->pipeline->write(std::forward<Args>(args)...);
1092
387k
    }
1093
387k
    return *this;
1094
387k
}
QPDFWriter& QPDFWriter::write_no_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1089
361k
{
1090
361k
    if (!m->qdf_mode) {
1091
361k
        m->pipeline->write(std::forward<Args>(args)...);
1092
361k
    }
1093
361k
    return *this;
1094
361k
}
QPDFWriter& QPDFWriter::write_no_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1089
26.5k
{
1090
26.5k
    if (!m->qdf_mode) {
1091
26.5k
        m->pipeline->write(std::forward<Args>(args)...);
1092
26.5k
    }
1093
26.5k
    return *this;
1094
26.5k
}
1095
1096
void
1097
QPDFWriter::adjustAESStreamLength(size_t& length)
1098
64.3k
{
1099
64.3k
    if (m->encryption && !m->cur_data_key.empty() && m->encrypt_use_aes) {
1100
        // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16.  It will
1101
        // also be prepended by 16 bits of random data.
1102
0
        length += 32 - (length & 0xf);
1103
0
    }
1104
64.3k
}
1105
1106
QPDFWriter&
1107
QPDFWriter::write_encrypted(std::string_view str)
1108
63.9k
{
1109
63.9k
    if (!(m->encryption && !m->cur_data_key.empty())) {
1110
63.9k
        write(str);
1111
63.9k
    } else if (m->encrypt_use_aes) {
1112
0
        write(pl::pipe<Pl_AES_PDF>(str, true, m->cur_data_key));
1113
0
    } else {
1114
0
        write(pl::pipe<Pl_RC4>(str, m->cur_data_key));
1115
0
    }
1116
1117
63.9k
    return *this;
1118
63.9k
}
1119
1120
void
1121
QPDFWriter::computeDeterministicIDData()
1122
6.55k
{
1123
6.55k
    if (!m->id2.empty()) {
1124
        // Can't happen in the code
1125
0
        throw std::logic_error(
1126
0
            "Deterministic ID computation enabled after ID generation has already occurred.");
1127
0
    }
1128
6.55k
    qpdf_assert_debug(m->deterministic_id_data.empty());
1129
6.55k
    m->deterministic_id_data = m->pipeline_stack.hex_digest();
1130
6.55k
}
1131
1132
int
1133
QPDFWriter::openObject(int objid)
1134
172k
{
1135
172k
    if (objid == 0) {
1136
0
        objid = m->next_objid++;
1137
0
    }
1138
172k
    m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount());
1139
172k
    write(objid).write(" 0 obj\n");
1140
172k
    return objid;
1141
172k
}
1142
1143
void
1144
QPDFWriter::closeObject(int objid)
1145
172k
{
1146
    // Write a newline before endobj as it makes the file easier to repair.
1147
172k
    write("\nendobj\n").write_qdf("\n");
1148
172k
    auto& new_obj = m->new_obj[objid];
1149
172k
    new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset();
1150
172k
}
1151
1152
void
1153
QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen og)
1154
82.3k
{
1155
82.3k
    int objid = og.getObj();
1156
82.3k
    if ((og.getGen() != 0) || (!m->object_stream_to_objects.contains(objid))) {
1157
        // This is not an object stream.
1158
74.6k
        return;
1159
74.6k
    }
1160
1161
    // Reserve numbers for the objects that belong to this object stream.
1162
95.4k
    for (auto const& iter: m->object_stream_to_objects[objid]) {
1163
95.4k
        m->obj[iter].renumber = m->next_objid++;
1164
95.4k
    }
1165
7.65k
}
1166
1167
void
1168
QPDFWriter::enqueueObject(QPDFObjectHandle object)
1169
81.9k
{
1170
81.9k
    if (object.isIndirect()) {
1171
        // This owner check can only be done for indirect objects. It is possible for a direct
1172
        // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle from
1173
        // one file was insert into another file without copying. Doing that is safe even if the
1174
        // original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from its owner.
1175
81.9k
        if (object.getOwningQPDF() != &(m->pdf)) {
1176
0
            QTC::TC("qpdf", "QPDFWriter foreign object");
1177
0
            throw std::logic_error(
1178
0
                "QPDFObjectHandle from different QPDF found while writing.  Use "
1179
0
                "QPDF::copyForeignObject to add objects from another file.");
1180
0
        }
1181
1182
81.9k
        if (m->qdf_mode && object.isStreamOfType("/XRef")) {
1183
            // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so
1184
            // will confuse fix-qdf, which expects to see only one XRef stream at the end of the
1185
            // file. This case can occur when creating a QDF from a file with object streams when
1186
            // preserving unreferenced objects since the old cross reference streams are not
1187
            // actually referenced by object number.
1188
0
            QTC::TC("qpdf", "QPDFWriter ignore XRef in qdf mode");
1189
0
            return;
1190
0
        }
1191
1192
81.9k
        QPDFObjGen og = object.getObjGen();
1193
81.9k
        auto& obj = m->obj[og];
1194
1195
81.9k
        if (obj.renumber == 0) {
1196
81.2k
            if (obj.object_stream > 0) {
1197
                // This is in an object stream.  Don't process it here.  Instead, enqueue the object
1198
                // stream.  Object streams always have generation 0.
1199
                // Detect loops by storing invalid object ID -1, which will get overwritten later.
1200
0
                obj.renumber = -1;
1201
0
                enqueueObject(m->pdf.getObject(obj.object_stream, 0));
1202
81.2k
            } else {
1203
81.2k
                m->object_queue.push_back(object);
1204
81.2k
                obj.renumber = m->next_objid++;
1205
1206
81.2k
                if ((og.getGen() == 0) && m->object_stream_to_objects.contains(og.getObj())) {
1207
                    // For linearized files, uncompressed objects go at end, and we take care of
1208
                    // assigning numbers to them elsewhere.
1209
7.59k
                    if (!m->linearized) {
1210
0
                        assignCompressedObjectNumbers(og);
1211
0
                    }
1212
73.6k
                } else if ((!m->direct_stream_lengths) && object.isStream()) {
1213
                    // reserve next object ID for length
1214
0
                    ++m->next_objid;
1215
0
                }
1216
81.2k
            }
1217
81.2k
        } else if (obj.renumber == -1) {
1218
            // This can happen if a specially constructed file indicates that an object stream is
1219
            // inside itself.
1220
0
        }
1221
81.9k
        return;
1222
81.9k
    } else if (!m->linearized) {
1223
0
        if (object.isArray()) {
1224
0
            for (auto& item: object.as_array()) {
1225
0
                enqueueObject(item);
1226
0
            }
1227
0
        } else if (auto d = object.as_dictionary()) {
1228
0
            for (auto const& item: d) {
1229
0
                if (!item.second.null()) {
1230
0
                    enqueueObject(item.second);
1231
0
                }
1232
0
            }
1233
0
        }
1234
0
    } else {
1235
        // ignore
1236
0
    }
1237
81.9k
}
1238
1239
void
1240
QPDFWriter::unparseChild(QPDFObjectHandle const& child, size_t level, int flags)
1241
2.85M
{
1242
2.85M
    if (!m->linearized) {
1243
0
        enqueueObject(child);
1244
0
    }
1245
2.85M
    if (child.isIndirect()) {
1246
410k
        write(m->obj[child].renumber).write(" 0 R");
1247
2.44M
    } else {
1248
2.44M
        unparseObject(child, level, flags);
1249
2.44M
    }
1250
2.85M
}
1251
1252
void
1253
QPDFWriter::writeTrailer(
1254
    trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass)
1255
26.7k
{
1256
26.7k
    QPDFObjectHandle trailer = getTrimmedTrailer();
1257
26.7k
    if (xref_stream) {
1258
25.5k
        m->cur_data_key.clear();
1259
25.5k
    } else {
1260
1.14k
        write("trailer <<");
1261
1.14k
    }
1262
26.7k
    write_qdf("\n");
1263
26.7k
    if (which == t_lin_second) {
1264
13.0k
        write(" /Size ").write(size);
1265
13.6k
    } else {
1266
33.0k
        for (auto const& [key, value]: trailer.as_dictionary()) {
1267
33.0k
            if (value.null()) {
1268
6.56k
                continue;
1269
6.56k
            }
1270
26.5k
            write_qdf("  ").write_no_qdf(" ").write_name(key).write(" ");
1271
26.5k
            if (key == "/Size") {
1272
2.10k
                write(size);
1273
2.10k
                if (which == t_lin_first) {
1274
2.10k
                    write(" /Prev ");
1275
2.10k
                    qpdf_offset_t pos = m->pipeline->getCount();
1276
2.10k
                    write(prev).write(QIntC::to_size(pos - m->pipeline->getCount() + 21), ' ');
1277
2.10k
                }
1278
24.4k
            } else {
1279
24.4k
                unparseChild(value, 1, 0);
1280
24.4k
            }
1281
26.5k
            write_qdf("\n");
1282
26.5k
        }
1283
13.6k
    }
1284
1285
    // Write ID
1286
26.7k
    write_qdf(" ").write(" /ID [");
1287
26.7k
    if (linearization_pass == 1) {
1288
13.6k
        std::string original_id1 = getOriginalID1();
1289
13.6k
        if (original_id1.empty()) {
1290
12.8k
            write("<00000000000000000000000000000000>");
1291
12.8k
        } else {
1292
            // Write a string of zeroes equal in length to the representation of the original ID.
1293
            // While writing the original ID would have the same number of bytes, it would cause a
1294
            // change to the deterministic ID generated by older versions of the software that
1295
            // hard-coded the length of the ID to 16 bytes.
1296
783
            size_t len = QPDF_String(original_id1).unparse(true).length() - 2;
1297
783
            write("<").write(len, '0').write(">");
1298
783
        }
1299
13.6k
        write("<00000000000000000000000000000000>");
1300
13.6k
    } else {
1301
13.0k
        if (linearization_pass == 0 && m->deterministic_id) {
1302
0
            computeDeterministicIDData();
1303
0
        }
1304
13.0k
        generateID(m->encryption.get());
1305
13.0k
        write_string(m->id1, true).write_string(m->id2, true);
1306
13.0k
    }
1307
26.7k
    write("]");
1308
1309
26.7k
    if (which != t_lin_second) {
1310
        // Write reference to encryption dictionary
1311
13.4k
        if (m->encryption) {
1312
0
            write(" /Encrypt ").write(m->encryption_dict_objid).write(" 0 R");
1313
0
        }
1314
13.4k
    }
1315
1316
26.7k
    write_qdf("\n>>").write_no_qdf(" >>");
1317
26.7k
}
1318
1319
bool
1320
QPDFWriter::willFilterStream(
1321
    QPDFObjectHandle stream,
1322
    bool& compress_stream,  // out only
1323
    bool& is_root_metadata, // out only
1324
    std::string* stream_data)
1325
69.6k
{
1326
69.6k
    compress_stream = false;
1327
69.6k
    is_root_metadata = false;
1328
1329
69.6k
    QPDFObjGen old_og = stream.getObjGen();
1330
69.6k
    QPDFObjectHandle stream_dict = stream.getDict();
1331
1332
69.6k
    if (stream.isRootMetadata()) {
1333
384
        is_root_metadata = true;
1334
384
    }
1335
69.6k
    bool filter = stream.isDataModified() || m->compress_streams || m->stream_decode_level;
1336
69.6k
    bool filter_on_write = stream.getFilterOnWrite();
1337
69.6k
    if (!filter_on_write) {
1338
11.9k
        QTC::TC("qpdf", "QPDFWriter getFilterOnWrite false");
1339
11.9k
        filter = false;
1340
11.9k
    }
1341
69.6k
    if (filter_on_write && m->compress_streams) {
1342
        // Don't filter if the stream is already compressed with FlateDecode. This way we don't make
1343
        // it worse if the original file used a better Flate algorithm, and we don't spend time and
1344
        // CPU cycles uncompressing and recompressing stuff. This can be overridden with
1345
        // setRecompressFlate(true).
1346
57.7k
        QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
1347
57.7k
        if (!m->recompress_flate && !stream.isDataModified() && filter_obj.isName() &&
1348
57.7k
            (filter_obj.getName() == "/FlateDecode" || filter_obj.getName() == "/Fl")) {
1349
10.6k
            QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode");
1350
10.6k
            filter = false;
1351
10.6k
        }
1352
57.7k
    }
1353
69.6k
    bool normalize = false;
1354
69.6k
    bool uncompress = false;
1355
69.6k
    if (filter_on_write && is_root_metadata &&
1356
69.6k
        (!m->encryption || !m->encryption->getEncryptMetadata())) {
1357
384
        QTC::TC("qpdf", "QPDFWriter not compressing metadata");
1358
384
        filter = true;
1359
384
        compress_stream = false;
1360
384
        uncompress = true;
1361
69.3k
    } else if (filter_on_write && m->normalize_content && m->normalized_streams.contains(old_og)) {
1362
0
        normalize = true;
1363
0
        filter = true;
1364
69.3k
    } else if (filter_on_write && filter && m->compress_streams) {
1365
46.7k
        compress_stream = true;
1366
46.7k
        QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");
1367
46.7k
    }
1368
1369
    // Disable compression for empty streams to improve compatibility
1370
69.6k
    if (stream_dict.getKey("/Length").isInteger() &&
1371
69.6k
        stream_dict.getKey("/Length").getIntValue() == 0) {
1372
3.64k
        filter = true;
1373
3.64k
        compress_stream = false;
1374
3.64k
    }
1375
1376
69.6k
    bool filtered = false;
1377
79.3k
    for (bool first_attempt: {true, false}) {
1378
79.3k
        auto pp_stream_data = stream_data ? m->pipeline_stack.activate(*stream_data)
1379
79.3k
                                          : m->pipeline_stack.activate(true);
1380
1381
79.3k
        try {
1382
79.3k
            filtered = stream.pipeStreamData(
1383
79.3k
                m->pipeline,
1384
79.3k
                !filter ? 0
1385
79.3k
                        : ((normalize ? qpdf_ef_normalize : 0) |
1386
49.4k
                           (compress_stream ? qpdf_ef_compress : 0)),
1387
79.3k
                !filter ? qpdf_dl_none : (uncompress ? qpdf_dl_all : m->stream_decode_level),
1388
79.3k
                false,
1389
79.3k
                first_attempt);
1390
79.3k
            if (filter && !filtered) {
1391
                // Try again
1392
9.59k
                filter = false;
1393
9.59k
                stream.setFilterOnWrite(false);
1394
69.7k
            } else {
1395
69.7k
                break;
1396
69.7k
            }
1397
79.3k
        } catch (std::runtime_error& e) {
1398
76
            if (filter && first_attempt) {
1399
62
                stream.warn("error while getting stream data: "s + e.what());
1400
62
                stream.warn("qpdf will attempt to write the damaged stream unchanged");
1401
62
                filter = false;
1402
62
                stream.setFilterOnWrite(false);
1403
62
                continue;
1404
62
            }
1405
14
            throw std::runtime_error(
1406
14
                "error while getting stream data for " + stream.unparse() + ": " + e.what());
1407
76
        }
1408
9.59k
        if (stream_data) {
1409
2.21k
            stream_data->clear();
1410
2.21k
        }
1411
9.59k
    }
1412
69.7k
    if (!filtered) {
1413
29.4k
        compress_stream = false;
1414
29.4k
    }
1415
69.7k
    return filtered;
1416
69.6k
}
1417
1418
void
1419
QPDFWriter::unparseObject(
1420
    QPDFObjectHandle object, size_t level, int flags, size_t stream_length, bool compress)
1421
2.77M
{
1422
2.77M
    QPDFObjGen old_og = object.getObjGen();
1423
2.77M
    int child_flags = flags & ~f_stream;
1424
    // For non-qdf, "indent" and "indent_large" are a single space between tokens. For qdf, they
1425
    // include the preceding newline.
1426
2.77M
    std::string indent_large = " ";
1427
2.77M
    if (m->qdf_mode) {
1428
0
        indent_large.append(2 * (level + 1), ' ');
1429
0
        indent_large[0] = '\n';
1430
0
    }
1431
2.77M
    std::string_view indent{indent_large.data(), m->qdf_mode ? indent_large.size() - 2 : 1};
1432
1433
2.77M
    if (auto const tc = object.getTypeCode(); tc == ::ot_array) {
1434
        // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the
1435
        // [ in the /H key of the linearization parameter dictionary.  We'll do this unconditionally
1436
        // for all arrays because it looks nicer and doesn't make the files that much bigger.
1437
109k
        write("[");
1438
2.16M
        for (auto const& item: object.as_array()) {
1439
2.16M
            write(indent_large);
1440
2.16M
            unparseChild(item, level + 1, child_flags);
1441
2.16M
        }
1442
109k
        write(indent).write("]");
1443
2.66M
    } else if (tc == ::ot_dictionary) {
1444
        // Handle special cases for specific dictionaries.
1445
1446
224k
        if (old_og == m->root_og) {
1447
            // Extensions dictionaries.
1448
1449
            // We have one of several cases:
1450
            //
1451
            // * We need ADBE
1452
            //    - We already have Extensions
1453
            //       - If it has the right ADBE, preserve it
1454
            //       - Otherwise, replace ADBE
1455
            //    - We don't have Extensions: create one from scratch
1456
            // * We don't want ADBE
1457
            //    - We already have Extensions
1458
            //       - If it only has ADBE, remove it
1459
            //       - If it has other things, keep those and remove ADBE
1460
            //    - We have no extensions: no action required
1461
            //
1462
            // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE
1463
            // dictionary, so we can modify in place.
1464
1465
13.4k
            auto extensions = object.getKey("/Extensions");
1466
13.4k
            const bool has_extensions = extensions.isDictionary();
1467
13.4k
            const bool need_extensions_adbe = m->final_extension_level > 0;
1468
1469
13.4k
            if (has_extensions || need_extensions_adbe) {
1470
                // Make a shallow copy of this object so we can modify it safely without affecting
1471
                // the original. This code has logic to skip certain keys in agreement with
1472
                // prepareFileForWrite and with skip_stream_parameters so that replacing them
1473
                // doesn't leave unreferenced objects in the output. We can use unsafeShallowCopy
1474
                // here because all we are doing is removing or replacing top-level keys.
1475
490
                object = object.unsafeShallowCopy();
1476
490
                if (!has_extensions) {
1477
0
                    extensions = QPDFObjectHandle();
1478
0
                }
1479
1480
490
                const bool have_extensions_adbe = extensions && extensions.hasKey("/ADBE");
1481
490
                const bool have_extensions_other =
1482
490
                    extensions && extensions.getKeys().size() > (have_extensions_adbe ? 1u : 0u);
1483
1484
490
                if (need_extensions_adbe) {
1485
17
                    if (!(have_extensions_other || have_extensions_adbe)) {
1486
                        // We need Extensions and don't have it.  Create it here.
1487
0
                        QTC::TC("qpdf", "QPDFWriter create Extensions", m->qdf_mode ? 0 : 1);
1488
0
                        extensions = object.replaceKeyAndGetNew(
1489
0
                            "/Extensions", QPDFObjectHandle::newDictionary());
1490
0
                    }
1491
473
                } else if (!have_extensions_other) {
1492
                    // We have Extensions dictionary and don't want one.
1493
155
                    if (have_extensions_adbe) {
1494
145
                        QTC::TC("qpdf", "QPDFWriter remove existing Extensions");
1495
145
                        object.removeKey("/Extensions");
1496
145
                        extensions = QPDFObjectHandle(); // uninitialized
1497
145
                    }
1498
155
                }
1499
1500
490
                if (extensions) {
1501
345
                    QTC::TC("qpdf", "QPDFWriter preserve Extensions");
1502
345
                    QPDFObjectHandle adbe = extensions.getKey("/ADBE");
1503
345
                    if (adbe.isDictionary() &&
1504
345
                        adbe.getKey("/BaseVersion").isNameAndEquals("/" + m->final_pdf_version) &&
1505
345
                        adbe.getKey("/ExtensionLevel").isInteger() &&
1506
345
                        (adbe.getKey("/ExtensionLevel").getIntValue() ==
1507
9
                         m->final_extension_level)) {
1508
9
                        QTC::TC("qpdf", "QPDFWriter preserve ADBE");
1509
336
                    } else {
1510
336
                        if (need_extensions_adbe) {
1511
8
                            extensions.replaceKey(
1512
8
                                "/ADBE",
1513
8
                                QPDFObjectHandle::parse(
1514
8
                                    "<< /BaseVersion /" + m->final_pdf_version +
1515
8
                                    " /ExtensionLevel " + std::to_string(m->final_extension_level) +
1516
8
                                    " >>"));
1517
328
                        } else {
1518
328
                            QTC::TC("qpdf", "QPDFWriter remove ADBE");
1519
328
                            extensions.removeKey("/ADBE");
1520
328
                        }
1521
336
                    }
1522
345
                }
1523
490
            }
1524
13.4k
        }
1525
1526
        // Stream dictionaries.
1527
1528
224k
        if (flags & f_stream) {
1529
            // Suppress /Length since we will write it manually
1530
1531
            // Make a shallow copy of this object so we can modify it safely without affecting the
1532
            // original. This code has logic to skip certain keys in agreement with
1533
            // prepareFileForWrite and with skip_stream_parameters so that replacing them doesn't
1534
            // leave unreferenced objects in the output. We can use unsafeShallowCopy here because
1535
            // all we are doing is removing or replacing top-level keys.
1536
43.8k
            object = object.unsafeShallowCopy();
1537
1538
43.8k
            object.removeKey("/Length");
1539
1540
            // If /DecodeParms is an empty list, remove it.
1541
43.8k
            if (object.getKey("/DecodeParms").empty()) {
1542
41.7k
                object.removeKey("/DecodeParms");
1543
41.7k
            }
1544
1545
43.8k
            if (flags & f_filtered) {
1546
                // We will supply our own filter and decode parameters.
1547
25.1k
                object.removeKey("/Filter");
1548
25.1k
                object.removeKey("/DecodeParms");
1549
25.1k
            } else {
1550
                // Make sure, no matter what else we have, that we don't have /Crypt in the output
1551
                // filters.
1552
18.6k
                QPDFObjectHandle filter = object.getKey("/Filter");
1553
18.6k
                QPDFObjectHandle decode_parms = object.getKey("/DecodeParms");
1554
18.6k
                if (filter.isOrHasName("/Crypt")) {
1555
450
                    if (filter.isName()) {
1556
33
                        object.removeKey("/Filter");
1557
33
                        object.removeKey("/DecodeParms");
1558
417
                    } else {
1559
417
                        int idx = 0;
1560
2.34k
                        for (auto const& item: filter.as_array()) {
1561
2.34k
                            if (item.isNameAndEquals("/Crypt")) {
1562
                                // If filter is an array, then the code in QPDF_Stream has already
1563
                                // verified that DecodeParms and Filters are arrays of the same
1564
                                // length, but if they weren't for some reason, eraseItem does type
1565
                                // and bounds checking. Fuzzing tells us that this can actually
1566
                                // happen.
1567
417
                                filter.eraseItem(idx);
1568
417
                                decode_parms.eraseItem(idx);
1569
417
                                break;
1570
417
                            }
1571
1.93k
                            ++idx;
1572
1.93k
                        }
1573
417
                    }
1574
450
                }
1575
18.6k
            }
1576
43.8k
        }
1577
1578
224k
        write("<<");
1579
1580
791k
        for (auto const& [key, value]: object.as_dictionary()) {
1581
791k
            if (!value.null()) {
1582
666k
                write(indent_large).write_name(key).write(" ");
1583
666k
                if (key == "/Contents" && object.isDictionaryOfType("/Sig") &&
1584
666k
                    object.hasKey("/ByteRange")) {
1585
34
                    QTC::TC("qpdf", "QPDFWriter no encryption sig contents");
1586
34
                    unparseChild(value, level + 1, child_flags | f_hex_string | f_no_encryption);
1587
666k
                } else {
1588
666k
                    unparseChild(value, level + 1, child_flags);
1589
666k
                }
1590
666k
            }
1591
791k
        }
1592
1593
224k
        if (flags & f_stream) {
1594
43.4k
            write(indent_large).write("/Length ");
1595
1596
43.4k
            if (m->direct_stream_lengths) {
1597
43.4k
                write(stream_length);
1598
43.4k
            } else {
1599
0
                write(m->cur_stream_length_id).write(" 0 R");
1600
0
            }
1601
43.4k
            if (compress && (flags & f_filtered)) {
1602
24.6k
                write(indent_large).write("/Filter /FlateDecode");
1603
24.6k
            }
1604
43.4k
        }
1605
1606
224k
        write(indent).write(">>");
1607
2.43M
    } else if (tc == ::ot_stream) {
1608
        // Write stream data to a buffer.
1609
43.8k
        if (!m->direct_stream_lengths) {
1610
0
            m->cur_stream_length_id = m->obj[old_og].renumber + 1;
1611
0
        }
1612
1613
43.8k
        flags |= f_stream;
1614
43.8k
        bool compress_stream = false;
1615
43.8k
        bool is_metadata = false;
1616
43.8k
        std::string stream_data;
1617
43.8k
        if (willFilterStream(object, compress_stream, is_metadata, &stream_data)) {
1618
25.1k
            flags |= f_filtered;
1619
25.1k
        }
1620
43.8k
        QPDFObjectHandle stream_dict = object.getDict();
1621
1622
43.8k
        m->cur_stream_length = stream_data.size();
1623
43.8k
        if (is_metadata && m->encryption && !m->encryption->getEncryptMetadata()) {
1624
            // Don't encrypt stream data for the metadata stream
1625
0
            m->cur_data_key.clear();
1626
0
        }
1627
43.8k
        adjustAESStreamLength(m->cur_stream_length);
1628
43.8k
        unparseObject(stream_dict, 0, flags, m->cur_stream_length, compress_stream);
1629
43.8k
        char last_char = stream_data.empty() ? '\0' : stream_data.back();
1630
43.8k
        write("\nstream\n").write_encrypted(stream_data);
1631
43.8k
        m->added_newline = m->newline_before_endstream || (m->qdf_mode && last_char != '\n');
1632
43.8k
        write(m->added_newline ? "\nendstream" : "endstream");
1633
2.39M
    } else if (tc == ::ot_string) {
1634
84.6k
        std::string val;
1635
84.6k
        if (m->encryption && !(flags & f_in_ostream) && !(flags & f_no_encryption) &&
1636
84.6k
            !m->cur_data_key.empty()) {
1637
0
            val = object.getStringValue();
1638
0
            if (m->encrypt_use_aes) {
1639
0
                Pl_Buffer bufpl("encrypted string");
1640
0
                Pl_AES_PDF pl("aes encrypt string", &bufpl, true, m->cur_data_key);
1641
0
                pl.writeString(val);
1642
0
                pl.finish();
1643
0
                val = QPDF_String(bufpl.getString()).unparse(true);
1644
0
            } else {
1645
0
                auto tmp_ph = QUtil::make_unique_cstr(val);
1646
0
                char* tmp = tmp_ph.get();
1647
0
                size_t vlen = val.length();
1648
0
                RC4 rc4(
1649
0
                    QUtil::unsigned_char_pointer(m->cur_data_key),
1650
0
                    QIntC::to_int(m->cur_data_key.length()));
1651
0
                auto data = QUtil::unsigned_char_pointer(tmp);
1652
0
                rc4.process(data, vlen, data);
1653
0
                val = QPDF_String(std::string(tmp, vlen)).unparse();
1654
0
            }
1655
84.6k
        } else if (flags & f_hex_string) {
1656
34
            val = QPDF_String(object.getStringValue()).unparse(true);
1657
84.5k
        } else {
1658
84.5k
            val = object.unparseResolved();
1659
84.5k
        }
1660
84.6k
        write(val);
1661
2.30M
    } else {
1662
2.30M
        write(object.unparseResolved());
1663
2.30M
    }
1664
2.77M
}
1665
1666
void
1667
QPDFWriter::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj)
1668
27.9k
{
1669
27.9k
    qpdf_assert_debug(first_obj > 0);
1670
27.9k
    bool is_first = true;
1671
27.9k
    auto id = std::to_string(first_obj) + ' ';
1672
348k
    for (auto& offset: offsets) {
1673
348k
        if (is_first) {
1674
27.9k
            is_first = false;
1675
320k
        } else {
1676
320k
            write_qdf("\n").write_no_qdf(" ");
1677
320k
        }
1678
348k
        write(id);
1679
348k
        util::increment(id, 1);
1680
348k
        write(offset);
1681
348k
    }
1682
27.9k
    write("\n");
1683
27.9k
}
1684
1685
void
1686
QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1687
13.9k
{
1688
    // Note: object might be null if this is a place-holder for an object stream that we are
1689
    // generating from scratch.
1690
1691
13.9k
    QPDFObjGen old_og = object.getObjGen();
1692
13.9k
    qpdf_assert_debug(old_og.getGen() == 0);
1693
13.9k
    int old_id = old_og.getObj();
1694
13.9k
    int new_stream_id = m->obj[old_og].renumber;
1695
1696
13.9k
    std::vector<qpdf_offset_t> offsets;
1697
13.9k
    qpdf_offset_t first = 0;
1698
1699
    // Generate stream itself.  We have to do this in two passes so we can calculate offsets in the
1700
    // first pass.
1701
13.9k
    std::string stream_buffer_pass1;
1702
13.9k
    std::string stream_buffer_pass2;
1703
13.9k
    int first_obj = -1;
1704
13.9k
    const bool compressed = m->compress_streams && !m->qdf_mode;
1705
13.9k
    {
1706
        // Pass 1
1707
13.9k
        auto pp_ostream_pass1 = m->pipeline_stack.activate(stream_buffer_pass1);
1708
1709
13.9k
        int count = -1;
1710
174k
        for (auto const& obj: m->object_stream_to_objects[old_id]) {
1711
174k
            ++count;
1712
174k
            int new_obj = m->obj[obj].renumber;
1713
174k
            if (first_obj == -1) {
1714
13.9k
                first_obj = new_obj;
1715
13.9k
            }
1716
174k
            if (m->qdf_mode) {
1717
0
                write("%% Object stream: object ").write(new_obj).write(", index ").write(count);
1718
0
                if (!m->suppress_original_object_ids) {
1719
0
                    write("; original object ID: ").write(obj.getObj());
1720
                    // For compatibility, only write the generation if non-zero.  While object
1721
                    // streams only allow objects with generation 0, if we are generating object
1722
                    // streams, the old object could have a non-zero generation.
1723
0
                    if (obj.getGen() != 0) {
1724
0
                        QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
1725
0
                        write(" ").write(obj.getGen());
1726
0
                    }
1727
0
                }
1728
0
                write("\n");
1729
0
            }
1730
1731
174k
            offsets.push_back(m->pipeline->getCount());
1732
            // To avoid double-counting objects being written in object streams for progress
1733
            // reporting, decrement in pass 1.
1734
174k
            indicateProgress(true, false);
1735
1736
174k
            QPDFObjectHandle obj_to_write = m->pdf.getObject(obj);
1737
174k
            if (obj_to_write.isStream()) {
1738
                // This condition occurred in a fuzz input. Ideally we should block it at parse
1739
                // time, but it's not clear to me how to construct a case for this.
1740
0
                obj_to_write.warn("stream found inside object stream; treating as null");
1741
0
                obj_to_write = QPDFObjectHandle::newNull();
1742
0
            }
1743
174k
            writeObject(obj_to_write, count);
1744
1745
174k
            m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count);
1746
174k
        }
1747
13.9k
    }
1748
13.9k
    {
1749
        // Adjust offsets to skip over comment before first object
1750
13.9k
        first = offsets.at(0);
1751
174k
        for (auto& iter: offsets) {
1752
174k
            iter -= first;
1753
174k
        }
1754
1755
        // Take one pass at writing pairs of numbers so we can get their size information
1756
13.9k
        {
1757
13.9k
            auto pp_discard = m->pipeline_stack.activate(true);
1758
13.9k
            writeObjectStreamOffsets(offsets, first_obj);
1759
13.9k
            first += m->pipeline->getCount();
1760
13.9k
        }
1761
1762
        // Set up a stream to write the stream data into a buffer.
1763
13.9k
        auto pp_ostream = m->pipeline_stack.activate(stream_buffer_pass2);
1764
1765
13.9k
        writeObjectStreamOffsets(offsets, first_obj);
1766
13.9k
        write(stream_buffer_pass1);
1767
13.9k
        stream_buffer_pass1.clear();
1768
13.9k
        stream_buffer_pass1.shrink_to_fit();
1769
13.9k
        if (compressed) {
1770
13.9k
            stream_buffer_pass2 = pl::pipe<Pl_Flate>(stream_buffer_pass2, Pl_Flate::a_deflate);
1771
13.9k
        }
1772
13.9k
    }
1773
1774
    // Write the object
1775
13.9k
    openObject(new_stream_id);
1776
13.9k
    setDataKey(new_stream_id);
1777
13.9k
    write("<<").write_qdf("\n ").write(" /Type /ObjStm").write_qdf("\n ");
1778
13.9k
    size_t length = stream_buffer_pass2.size();
1779
13.9k
    adjustAESStreamLength(length);
1780
13.9k
    write(" /Length ").write(length).write_qdf("\n ");
1781
13.9k
    if (compressed) {
1782
13.9k
        write(" /Filter /FlateDecode");
1783
13.9k
    }
1784
13.9k
    write(" /N ").write(offsets.size()).write_qdf("\n ").write(" /First ").write(first);
1785
13.9k
    if (!object.isNull()) {
1786
        // If the original object has an /Extends key, preserve it.
1787
0
        QPDFObjectHandle dict = object.getDict();
1788
0
        QPDFObjectHandle extends = dict.getKey("/Extends");
1789
0
        if (extends.isIndirect()) {
1790
0
            QTC::TC("qpdf", "QPDFWriter copy Extends");
1791
0
            write_qdf("\n ").write(" /Extends ");
1792
0
            unparseChild(extends, 1, f_in_ostream);
1793
0
        }
1794
0
    }
1795
13.9k
    write_qdf("\n").write_no_qdf(" ").write(">>\nstream\n").write_encrypted(stream_buffer_pass2);
1796
13.9k
    if (m->encryption) {
1797
0
        QTC::TC("qpdf", "QPDFWriter encrypt object stream");
1798
0
    }
1799
13.9k
    write(m->newline_before_endstream ? "\nendstream" : "endstream");
1800
13.9k
    m->cur_data_key.clear();
1801
13.9k
    closeObject(new_stream_id);
1802
13.9k
}
1803
1804
void
1805
QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
1806
301k
{
1807
301k
    QPDFObjGen old_og = object.getObjGen();
1808
1809
301k
    if (object_stream_index == -1 && old_og.getGen() == 0 &&
1810
301k
        m->object_stream_to_objects.contains(old_og.getObj())) {
1811
13.9k
        writeObjectStream(object);
1812
13.9k
        return;
1813
13.9k
    }
1814
1815
287k
    indicateProgress(false, false);
1816
287k
    auto new_id = m->obj[old_og].renumber;
1817
287k
    if (m->qdf_mode) {
1818
0
        if (m->page_object_to_seq.contains(old_og)) {
1819
0
            write("%% Page ").write(m->page_object_to_seq[old_og]).write("\n");
1820
0
        }
1821
0
        if (m->contents_to_page_seq.contains(old_og)) {
1822
0
            write("%% Contents for page ").write(m->contents_to_page_seq[old_og]).write("\n");
1823
0
        }
1824
0
    }
1825
287k
    if (object_stream_index == -1) {
1826
112k
        if (m->qdf_mode && (!m->suppress_original_object_ids)) {
1827
0
            write("%% Original object ID: ").write(object.getObjGen().unparse(' ')).write("\n");
1828
0
        }
1829
112k
        openObject(new_id);
1830
112k
        setDataKey(new_id);
1831
112k
        unparseObject(object, 0, 0);
1832
112k
        m->cur_data_key.clear();
1833
112k
        closeObject(new_id);
1834
174k
    } else {
1835
174k
        unparseObject(object, 0, f_in_ostream);
1836
174k
        write("\n");
1837
174k
    }
1838
1839
287k
    if (!m->direct_stream_lengths && object.isStream()) {
1840
0
        if (m->qdf_mode) {
1841
0
            if (m->added_newline) {
1842
0
                write("%QDF: ignore_newline\n");
1843
0
            }
1844
0
        }
1845
0
        openObject(new_id + 1);
1846
0
        write(m->cur_stream_length);
1847
0
        closeObject(new_id + 1);
1848
0
    }
1849
287k
}
1850
1851
std::string
1852
QPDFWriter::getOriginalID1()
1853
20.1k
{
1854
20.1k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1855
20.1k
    if (trailer.hasKey("/ID")) {
1856
1.16k
        return trailer.getKey("/ID").getArrayItem(0).getStringValue();
1857
19.0k
    } else {
1858
19.0k
        return "";
1859
19.0k
    }
1860
20.1k
}
1861
1862
void
1863
QPDFWriter::generateID(bool encrypted)
1864
13.1k
{
1865
    // Generate the ID lazily so that we can handle the user's preference to use static or
1866
    // deterministic ID generation.
1867
1868
13.1k
    if (!m->id2.empty()) {
1869
6.52k
        return;
1870
6.52k
    }
1871
1872
6.62k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1873
1874
6.62k
    std::string result;
1875
1876
6.62k
    if (m->static_id) {
1877
        // For test suite use only...
1878
0
        static unsigned char tmp[] = {
1879
0
            0x31,
1880
0
            0x41,
1881
0
            0x59,
1882
0
            0x26,
1883
0
            0x53,
1884
0
            0x58,
1885
0
            0x97,
1886
0
            0x93,
1887
0
            0x23,
1888
0
            0x84,
1889
0
            0x62,
1890
0
            0x64,
1891
0
            0x33,
1892
0
            0x83,
1893
0
            0x27,
1894
0
            0x95,
1895
0
            0x00};
1896
0
        result = reinterpret_cast<char*>(tmp);
1897
6.62k
    } else {
1898
        // The PDF specification has guidelines for creating IDs, but it states clearly that the
1899
        // only thing that's really important is that it is very likely to be unique.  We can't
1900
        // really follow the guidelines in the spec exactly because we haven't written the file yet.
1901
        // This scheme should be fine though.  The deterministic ID case uses a digest of a
1902
        // sufficient portion of the file's contents such no two non-matching files would match in
1903
        // the subsets used for this computation.  Note that we explicitly omit the filename from
1904
        // the digest calculation for deterministic ID so that the same file converted with qpdf, in
1905
        // that case, would have the same ID regardless of the output file's name.
1906
1907
6.62k
        std::string seed;
1908
6.62k
        if (m->deterministic_id) {
1909
6.62k
            if (encrypted) {
1910
72
                throw std::runtime_error(
1911
72
                    "QPDFWriter: unable to generated a deterministic ID because the file to be "
1912
72
                    "written is encrypted (even though the file may not require a password)");
1913
72
            }
1914
6.55k
            if (m->deterministic_id_data.empty()) {
1915
0
                throw std::logic_error(
1916
0
                    "INTERNAL ERROR: QPDFWriter::generateID has no data for deterministic ID");
1917
0
            }
1918
6.55k
            seed += m->deterministic_id_data;
1919
6.55k
        } else {
1920
0
            seed += std::to_string(QUtil::get_current_time());
1921
0
            seed += m->filename;
1922
0
            seed += " ";
1923
0
        }
1924
6.55k
        seed += " QPDF ";
1925
6.55k
        if (trailer.hasKey("/Info")) {
1926
5.74k
            for (auto const& item: trailer.getKey("/Info").as_dictionary()) {
1927
5.74k
                if (item.second.isString()) {
1928
1.45k
                    seed += " ";
1929
1.45k
                    seed += item.second.getStringValue();
1930
1.45k
                }
1931
5.74k
            }
1932
274
        }
1933
1934
6.55k
        MD5 m;
1935
6.55k
        m.encodeString(seed.c_str());
1936
6.55k
        MD5::Digest digest;
1937
6.55k
        m.digest(digest);
1938
6.55k
        result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest));
1939
6.55k
    }
1940
1941
    // If /ID already exists, follow the spec: use the original first word and generate a new second
1942
    // word.  Otherwise, we'll use the generated ID for both.
1943
1944
6.55k
    m->id2 = result;
1945
    // Note: keep /ID from old file even if --static-id was given.
1946
6.55k
    m->id1 = getOriginalID1();
1947
6.55k
    if (m->id1.empty()) {
1948
6.22k
        m->id1 = m->id2;
1949
6.22k
    }
1950
6.55k
}
1951
1952
void
1953
QPDFWriter::initializeSpecialStreams()
1954
0
{
1955
    // Mark all page content streams in case we are filtering or normalizing.
1956
0
    std::vector<QPDFObjectHandle> pages = m->pdf.getAllPages();
1957
0
    int num = 0;
1958
0
    for (auto& page: pages) {
1959
0
        m->page_object_to_seq[page.getObjGen()] = ++num;
1960
0
        QPDFObjectHandle contents = page.getKey("/Contents");
1961
0
        std::vector<QPDFObjGen> contents_objects;
1962
0
        if (contents.isArray()) {
1963
0
            int n = static_cast<int>(contents.size());
1964
0
            for (int i = 0; i < n; ++i) {
1965
0
                contents_objects.push_back(contents.getArrayItem(i).getObjGen());
1966
0
            }
1967
0
        } else if (contents.isStream()) {
1968
0
            contents_objects.push_back(contents.getObjGen());
1969
0
        }
1970
1971
0
        for (auto const& c: contents_objects) {
1972
0
            m->contents_to_page_seq[c] = num;
1973
0
            m->normalized_streams.insert(c);
1974
0
        }
1975
0
    }
1976
0
}
1977
1978
void
1979
QPDFWriter::preserveObjectStreams()
1980
0
{
1981
0
    auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
1982
    // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
1983
    // streams out of old objects that have generation numbers greater than zero. However in an
1984
    // existing PDF, all object stream objects and all objects in them must have generation 0
1985
    // because the PDF spec does not provide any way to do otherwise. This code filters out objects
1986
    // that are not allowed to be in object streams. In addition to removing objects that were
1987
    // erroneously included in object streams in the source PDF, it also prevents unreferenced
1988
    // objects from being included.
1989
0
    auto end = xref.cend();
1990
0
    m->obj.streams_empty = true;
1991
0
    if (m->preserve_unreferenced_objects) {
1992
0
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
1993
0
            if (iter->second.getType() == 2) {
1994
                // Pdf contains object streams.
1995
0
                QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
1996
0
                m->obj.streams_empty = false;
1997
0
                m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
1998
0
            }
1999
0
        }
2000
0
    } else {
2001
        // Start by scanning for first compressed object in case we don't have any object streams to
2002
        // process.
2003
0
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
2004
0
            if (iter->second.getType() == 2) {
2005
                // Pdf contains object streams.
2006
0
                QTC::TC("qpdf", "QPDFWriter preserve object streams");
2007
0
                m->obj.streams_empty = false;
2008
0
                auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
2009
                // The object pointed to by iter may be a previous generation, in which case it is
2010
                // removed by getCompressibleObjSet. We need to restart the loop (while the object
2011
                // table may contain multiple generations of an object).
2012
0
                for (iter = xref.cbegin(); iter != end; ++iter) {
2013
0
                    if (iter->second.getType() == 2) {
2014
0
                        auto id = static_cast<size_t>(iter->first.getObj());
2015
0
                        if (id < eligible.size() && eligible[id]) {
2016
0
                            m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
2017
0
                        } else {
2018
0
                            QTC::TC("qpdf", "QPDFWriter exclude from object stream");
2019
0
                        }
2020
0
                    }
2021
0
                }
2022
0
                return;
2023
0
            }
2024
0
        }
2025
0
    }
2026
0
}
2027
2028
void
2029
QPDFWriter::generateObjectStreams()
2030
9.68k
{
2031
    // Basic strategy: make a list of objects that can go into an object stream.  Then figure out
2032
    // how many object streams are needed so that we can distribute objects approximately evenly
2033
    // without having any object stream exceed 100 members.  We don't have to worry about linearized
2034
    // files here -- if the file is linearized, we take care of excluding things that aren't allowed
2035
    // here later.
2036
2037
    // This code doesn't do anything with /Extends.
2038
2039
9.68k
    std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf);
2040
9.68k
    size_t n_object_streams = (eligible.size() + 99U) / 100U;
2041
2042
9.68k
    initializeTables(2U * n_object_streams);
2043
9.68k
    if (n_object_streams == 0) {
2044
37
        m->obj.streams_empty = true;
2045
37
        return;
2046
37
    }
2047
9.65k
    size_t n_per = eligible.size() / n_object_streams;
2048
9.65k
    if (n_per * n_object_streams < eligible.size()) {
2049
164
        ++n_per;
2050
164
    }
2051
9.65k
    unsigned int n = 0;
2052
9.65k
    int cur_ostream = m->pdf.newIndirectNull().getObjectID();
2053
123k
    for (auto const& item: eligible) {
2054
123k
        if (n == n_per) {
2055
656
            QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
2056
656
            n = 0;
2057
            // Construct a new null object as the "original" object stream.  The rest of the code
2058
            // knows that this means we're creating the object stream from scratch.
2059
656
            cur_ostream = m->pdf.newIndirectNull().getObjectID();
2060
656
        }
2061
123k
        auto& obj = m->obj[item];
2062
123k
        obj.object_stream = cur_ostream;
2063
123k
        obj.gen = item.getGen();
2064
123k
        ++n;
2065
123k
    }
2066
9.65k
}
2067
2068
QPDFObjectHandle
2069
QPDFWriter::getTrimmedTrailer()
2070
26.7k
{
2071
    // Remove keys from the trailer that necessarily have to be replaced when writing the file.
2072
2073
26.7k
    QPDFObjectHandle trailer = m->pdf.getTrailer().unsafeShallowCopy();
2074
2075
    // Remove encryption keys
2076
26.7k
    trailer.removeKey("/ID");
2077
26.7k
    trailer.removeKey("/Encrypt");
2078
2079
    // Remove modification information
2080
26.7k
    trailer.removeKey("/Prev");
2081
2082
    // Remove all trailer keys that potentially come from a cross-reference stream
2083
26.7k
    trailer.removeKey("/Index");
2084
26.7k
    trailer.removeKey("/W");
2085
26.7k
    trailer.removeKey("/Length");
2086
26.7k
    trailer.removeKey("/Filter");
2087
26.7k
    trailer.removeKey("/DecodeParms");
2088
26.7k
    trailer.removeKey("/Type");
2089
26.7k
    trailer.removeKey("/XRefStm");
2090
2091
26.7k
    return trailer;
2092
26.7k
}
2093
2094
// Make document extension level information direct as required by the spec.
2095
void
2096
QPDFWriter::prepareFileForWrite()
2097
9.54k
{
2098
9.54k
    m->pdf.fixDanglingReferences();
2099
9.54k
    auto root = m->pdf.getRoot();
2100
9.54k
    auto oh = root.getKey("/Extensions");
2101
9.54k
    if (oh.isDictionary()) {
2102
357
        const bool extensions_indirect = oh.isIndirect();
2103
357
        if (extensions_indirect) {
2104
330
            QTC::TC("qpdf", "QPDFWriter make Extensions direct");
2105
330
            oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy());
2106
330
        }
2107
357
        if (oh.hasKey("/ADBE")) {
2108
173
            auto adbe = oh.getKey("/ADBE");
2109
173
            if (adbe.isIndirect()) {
2110
152
                QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1);
2111
152
                adbe.makeDirect();
2112
152
                oh.replaceKey("/ADBE", adbe);
2113
152
            }
2114
173
        }
2115
357
    }
2116
9.54k
}
2117
2118
void
2119
QPDFWriter::initializeTables(size_t extra)
2120
9.66k
{
2121
9.66k
    auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra;
2122
9.66k
    m->obj.resize(size);
2123
9.66k
    m->new_obj.resize(size);
2124
9.66k
}
2125
2126
void
2127
QPDFWriter::doWriteSetup()
2128
9.75k
{
2129
9.75k
    if (m->did_write_setup) {
2130
0
        return;
2131
0
    }
2132
9.75k
    m->did_write_setup = true;
2133
2134
    // Do preliminary setup
2135
2136
9.75k
    if (m->linearized) {
2137
9.75k
        m->qdf_mode = false;
2138
9.75k
    }
2139
2140
9.75k
    if (m->pclm) {
2141
0
        m->stream_decode_level = qpdf_dl_none;
2142
0
        m->compress_streams = false;
2143
0
        m->encryption = nullptr;
2144
0
    }
2145
2146
9.75k
    if (m->qdf_mode) {
2147
0
        if (!m->normalize_content_set) {
2148
0
            m->normalize_content = true;
2149
0
        }
2150
0
        if (!m->compress_streams_set) {
2151
0
            m->compress_streams = false;
2152
0
        }
2153
0
        if (!m->stream_decode_level_set) {
2154
0
            m->stream_decode_level = qpdf_dl_generalized;
2155
0
        }
2156
0
    }
2157
2158
9.75k
    if (m->encryption) {
2159
        // Encryption has been explicitly set
2160
0
        m->preserve_encryption = false;
2161
9.75k
    } else if (m->normalize_content || !m->compress_streams || m->pclm || m->qdf_mode) {
2162
        // Encryption makes looking at contents pretty useless.  If the user explicitly encrypted
2163
        // though, we still obey that.
2164
0
        m->preserve_encryption = false;
2165
0
    }
2166
2167
9.75k
    if (m->preserve_encryption) {
2168
9.75k
        copyEncryptionParameters(m->pdf);
2169
9.75k
    }
2170
2171
9.75k
    if (!m->forced_pdf_version.empty()) {
2172
0
        int major = 0;
2173
0
        int minor = 0;
2174
0
        parseVersion(m->forced_pdf_version, major, minor);
2175
0
        disableIncompatibleEncryption(major, minor, m->forced_extension_level);
2176
0
        if (compareVersions(major, minor, 1, 5) < 0) {
2177
0
            QTC::TC("qpdf", "QPDFWriter forcing object stream disable");
2178
0
            m->object_stream_mode = qpdf_o_disable;
2179
0
        }
2180
0
    }
2181
2182
9.75k
    if (m->qdf_mode || m->normalize_content) {
2183
0
        initializeSpecialStreams();
2184
0
    }
2185
2186
9.75k
    if (m->qdf_mode) {
2187
        // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing
2188
        // recomputed stream length data. Certain streams such as object streams, xref streams, and
2189
        // hint streams always get direct stream lengths.
2190
0
        m->direct_stream_lengths = false;
2191
0
    }
2192
2193
9.75k
    switch (m->object_stream_mode) {
2194
0
    case qpdf_o_disable:
2195
0
        initializeTables();
2196
0
        m->obj.streams_empty = true;
2197
0
        break;
2198
2199
0
    case qpdf_o_preserve:
2200
0
        initializeTables();
2201
0
        preserveObjectStreams();
2202
0
        break;
2203
2204
9.68k
    case qpdf_o_generate:
2205
9.68k
        generateObjectStreams();
2206
9.68k
        break;
2207
2208
        // no default so gcc will warn for missing case tag
2209
9.75k
    }
2210
2211
9.66k
    if (!m->obj.streams_empty) {
2212
9.62k
        if (m->linearized) {
2213
            // Page dictionaries are not allowed to be compressed objects.
2214
16.8k
            for (auto& page: m->pdf.getAllPages()) {
2215
16.8k
                if (m->obj[page].object_stream > 0) {
2216
15.1k
                    QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
2217
15.1k
                    m->obj[page].object_stream = 0;
2218
15.1k
                }
2219
16.8k
            }
2220
9.62k
        }
2221
2222
9.62k
        if (m->linearized || m->encryption) {
2223
            // The document catalog is not allowed to be compressed in linearized files either.  It
2224
            // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
2225
            // handle encrypted files with compressed document catalogs, so we disable them in that
2226
            // case as well.
2227
9.62k
            if (m->obj[m->root_og].object_stream > 0) {
2228
8.01k
                QTC::TC("qpdf", "QPDFWriter uncompressing root");
2229
8.01k
                m->obj[m->root_og].object_stream = 0;
2230
8.01k
            }
2231
9.62k
        }
2232
2233
        // Generate reverse mapping from object stream to objects
2234
3.09M
        m->obj.forEach([this](auto id, auto const& item) -> void {
2235
3.09M
            if (item.object_stream > 0) {
2236
100k
                auto& vec = m->object_stream_to_objects[item.object_stream];
2237
100k
                vec.emplace_back(id, item.gen);
2238
100k
                if (m->max_ostream_index < vec.size()) {
2239
46.1k
                    ++m->max_ostream_index;
2240
46.1k
                }
2241
100k
            }
2242
3.09M
        });
2243
9.62k
        --m->max_ostream_index;
2244
2245
9.62k
        if (m->object_stream_to_objects.empty()) {
2246
707
            m->obj.streams_empty = true;
2247
8.92k
        } else {
2248
8.92k
            setMinimumPDFVersion("1.5");
2249
8.92k
        }
2250
9.62k
    }
2251
2252
9.66k
    setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel());
2253
9.66k
    m->final_pdf_version = m->min_pdf_version;
2254
9.66k
    m->final_extension_level = m->min_extension_level;
2255
9.66k
    if (!m->forced_pdf_version.empty()) {
2256
0
        QTC::TC("qpdf", "QPDFWriter using forced PDF version");
2257
0
        m->final_pdf_version = m->forced_pdf_version;
2258
0
        m->final_extension_level = m->forced_extension_level;
2259
0
    }
2260
9.66k
}
2261
2262
void
2263
QPDFWriter::write()
2264
9.75k
{
2265
9.75k
    doWriteSetup();
2266
2267
    // Set up progress reporting. For linearized files, we write two passes. events_expected is an
2268
    // approximation, but it's good enough for progress reporting, which is mostly a guess anyway.
2269
9.75k
    m->events_expected = QIntC::to_int(m->pdf.getObjectCount() * (m->linearized ? 2 : 1));
2270
2271
9.75k
    prepareFileForWrite();
2272
2273
9.75k
    if (m->linearized) {
2274
9.51k
        writeLinearized();
2275
9.51k
    } else {
2276
248
        writeStandard();
2277
248
    }
2278
2279
9.75k
    m->pipeline->finish();
2280
9.75k
    if (m->close_file) {
2281
0
        fclose(m->file);
2282
0
    }
2283
9.75k
    m->file = nullptr;
2284
9.75k
    if (m->buffer_pipeline) {
2285
0
        m->output_buffer = m->buffer_pipeline->getBuffer();
2286
0
        m->buffer_pipeline = nullptr;
2287
0
    }
2288
9.75k
    indicateProgress(false, true);
2289
9.75k
}
2290
2291
QPDFObjGen
2292
QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
2293
0
{
2294
0
    return {m->obj[og].renumber, 0};
2295
0
}
2296
2297
std::map<QPDFObjGen, QPDFXRefEntry>
2298
QPDFWriter::getWrittenXRefTable()
2299
0
{
2300
0
    std::map<QPDFObjGen, QPDFXRefEntry> result;
2301
2302
0
    auto it = result.begin();
2303
0
    m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void {
2304
0
        if (item.xref.getType() != 0) {
2305
0
            it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref);
2306
0
        }
2307
0
    });
2308
0
    return result;
2309
0
}
2310
2311
void
2312
QPDFWriter::enqueuePart(std::vector<QPDFObjectHandle>& part)
2313
37.3k
{
2314
81.9k
    for (auto const& oh: part) {
2315
81.9k
        enqueueObject(oh);
2316
81.9k
    }
2317
37.3k
}
2318
2319
void
2320
QPDFWriter::writeEncryptionDictionary()
2321
0
{
2322
0
    m->encryption_dict_objid = openObject(m->encryption_dict_objid);
2323
0
    auto& enc = *m->encryption;
2324
0
    auto const V = enc.getV();
2325
2326
0
    write("<<");
2327
0
    if (V >= 4) {
2328
0
        write(" /CF << /StdCF << /AuthEvent /DocOpen /CFM ");
2329
0
        write(m->encrypt_use_aes ? ((V < 5) ? "/AESV2" : "/AESV3") : "/V2");
2330
        // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of
2331
        // MacOS won't open encrypted files without it.
2332
0
        write((V < 5) ? " /Length 16 >> >>" : " /Length 32 >> >>");
2333
0
        if (!m->encryption->getEncryptMetadata()) {
2334
0
            write(" /EncryptMetadata false");
2335
0
        }
2336
0
    }
2337
0
    write(" /Filter /Standard /Length ").write(enc.getLengthBytes() * 8);
2338
0
    write(" /O ").write_string(enc.getO(), true);
2339
0
    if (V >= 4) {
2340
0
        write(" /OE ").write_string(enc.getOE(), true);
2341
0
    }
2342
0
    write(" /P ").write(enc.getP());
2343
0
    if (V >= 5) {
2344
0
        write(" /Perms ").write_string(enc.getPerms(), true);
2345
0
    }
2346
0
    write(" /R ").write(enc.getR());
2347
2348
0
    if (V >= 4) {
2349
0
        write(" /StmF /StdCF /StrF /StdCF");
2350
0
    }
2351
0
    write(" /U ").write_string(enc.getU(), true);
2352
0
    if (V >= 4) {
2353
0
        write(" /UE ").write_string(enc.getUE(), true);
2354
0
    }
2355
0
    write(" /V ").write(enc.getV()).write(" >>");
2356
0
    closeObject(m->encryption_dict_objid);
2357
0
}
2358
2359
std::string
2360
QPDFWriter::getFinalVersion()
2361
0
{
2362
0
    doWriteSetup();
2363
0
    return m->final_pdf_version;
2364
0
}
2365
2366
void
2367
QPDFWriter::writeHeader()
2368
13.6k
{
2369
13.6k
    write("%PDF-").write(m->final_pdf_version);
2370
13.6k
    if (m->pclm) {
2371
        // PCLm version
2372
0
        write("\n%PCLm 1.0\n");
2373
13.6k
    } else {
2374
        // This string of binary characters would not be valid UTF-8, so it really should be treated
2375
        // as binary.
2376
13.6k
        write("\n%\xbf\xf7\xa2\xfe\n");
2377
13.6k
    }
2378
13.6k
    write_qdf("%QDF-1.0\n\n");
2379
2380
    // Note: do not write extra header text here.  Linearized PDFs must include the entire
2381
    // linearization parameter dictionary within the first 1024 characters of the PDF file, so for
2382
    // linearized files, we have to write extra header text after the linearization parameter
2383
    // dictionary.
2384
13.6k
}
2385
2386
void
2387
QPDFWriter::writeHintStream(int hint_id)
2388
6.55k
{
2389
6.55k
    std::string hint_buffer;
2390
6.55k
    int S = 0;
2391
6.55k
    int O = 0;
2392
6.55k
    bool compressed = m->compress_streams && !m->qdf_mode;
2393
6.55k
    QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed);
2394
2395
6.55k
    openObject(hint_id);
2396
6.55k
    setDataKey(hint_id);
2397
2398
6.55k
    size_t hlen = hint_buffer.size();
2399
2400
6.55k
    write("<< ");
2401
6.55k
    if (compressed) {
2402
6.55k
        write("/Filter /FlateDecode ");
2403
6.55k
    }
2404
6.55k
    write("/S ").write(S);
2405
6.55k
    if (O) {
2406
168
        write(" /O ").write(O);
2407
168
    }
2408
6.55k
    adjustAESStreamLength(hlen);
2409
6.55k
    write(" /Length ").write(hlen);
2410
6.55k
    write(" >>\nstream\n").write_encrypted(hint_buffer);
2411
2412
6.55k
    if (m->encryption) {
2413
0
        QTC::TC("qpdf", "QPDFWriter encrypted hint stream");
2414
0
    }
2415
2416
6.55k
    write(hint_buffer.empty() || hint_buffer.back() != '\n' ? "\nendstream" : "endstream");
2417
6.55k
    closeObject(hint_id);
2418
6.55k
}
2419
2420
qpdf_offset_t
2421
QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size)
2422
0
{
2423
    // There are too many extra arguments to replace overloaded function with defaults in the header
2424
    // file...too much risk of leaving something off.
2425
0
    return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0);
2426
0
}
2427
2428
qpdf_offset_t
2429
QPDFWriter::writeXRefTable(
2430
    trailer_e which,
2431
    int first,
2432
    int last,
2433
    int size,
2434
    qpdf_offset_t prev,
2435
    bool suppress_offsets,
2436
    int hint_id,
2437
    qpdf_offset_t hint_offset,
2438
    qpdf_offset_t hint_length,
2439
    int linearization_pass)
2440
1.14k
{
2441
1.14k
    write("xref\n").write(first).write(" ").write(last - first + 1);
2442
1.14k
    qpdf_offset_t space_before_zero = m->pipeline->getCount();
2443
1.14k
    write("\n");
2444
1.14k
    if (first == 0) {
2445
563
        write("0000000000 65535 f \n");
2446
563
        ++first;
2447
563
    }
2448
12.6k
    for (int i = first; i <= last; ++i) {
2449
11.5k
        qpdf_offset_t offset = 0;
2450
11.5k
        if (!suppress_offsets) {
2451
5.58k
            offset = m->new_obj[i].xref.getOffset();
2452
5.58k
            if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2453
3.76k
                offset += hint_length;
2454
3.76k
            }
2455
5.58k
        }
2456
11.5k
        write(QUtil::int_to_string(offset, 10)).write(" 00000 n \n");
2457
11.5k
    }
2458
1.14k
    writeTrailer(which, size, false, prev, linearization_pass);
2459
1.14k
    write("\n");
2460
1.14k
    return space_before_zero;
2461
1.14k
}
2462
2463
qpdf_offset_t
2464
QPDFWriter::writeXRefStream(
2465
    int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size)
2466
0
{
2467
    // There are too many extra arguments to replace overloaded function with defaults in the header
2468
    // file...too much risk of leaving something off.
2469
0
    return writeXRefStream(
2470
0
        objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0);
2471
0
}
2472
2473
qpdf_offset_t
2474
QPDFWriter::writeXRefStream(
2475
    int xref_id,
2476
    int max_id,
2477
    qpdf_offset_t max_offset,
2478
    trailer_e which,
2479
    int first,
2480
    int last,
2481
    int size,
2482
    qpdf_offset_t prev,
2483
    int hint_id,
2484
    qpdf_offset_t hint_offset,
2485
    qpdf_offset_t hint_length,
2486
    bool skip_compression,
2487
    int linearization_pass)
2488
25.5k
{
2489
25.5k
    qpdf_offset_t xref_offset = m->pipeline->getCount();
2490
25.5k
    qpdf_offset_t space_before_zero = xref_offset - 1;
2491
2492
    // field 1 contains offsets and object stream identifiers
2493
25.5k
    unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id));
2494
2495
    // field 2 contains object stream indices
2496
25.5k
    unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index));
2497
2498
25.5k
    unsigned int esize = 1 + f1_size + f2_size;
2499
2500
    // Must store in xref table in advance of writing the actual data rather than waiting for
2501
    // openObject to do it.
2502
25.5k
    m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2503
2504
25.5k
    std::string xref_data;
2505
25.5k
    const bool compressed = m->compress_streams && !m->qdf_mode;
2506
25.5k
    {
2507
25.5k
        auto pp_xref = m->pipeline_stack.activate(xref_data);
2508
2509
382k
        for (int i = first; i <= last; ++i) {
2510
357k
            QPDFXRefEntry& e = m->new_obj[i].xref;
2511
357k
            switch (e.getType()) {
2512
98.7k
            case 0:
2513
98.7k
                writeBinary(0, 1);
2514
98.7k
                writeBinary(0, f1_size);
2515
98.7k
                writeBinary(0, f2_size);
2516
98.7k
                break;
2517
2518
126k
            case 1:
2519
126k
                {
2520
126k
                    qpdf_offset_t offset = e.getOffset();
2521
126k
                    if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2522
37.8k
                        offset += hint_length;
2523
37.8k
                    }
2524
126k
                    writeBinary(1, 1);
2525
126k
                    writeBinary(QIntC::to_ulonglong(offset), f1_size);
2526
126k
                    writeBinary(0, f2_size);
2527
126k
                }
2528
126k
                break;
2529
2530
132k
            case 2:
2531
132k
                writeBinary(2, 1);
2532
132k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size);
2533
132k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size);
2534
132k
                break;
2535
2536
0
            default:
2537
0
                throw std::logic_error("invalid type writing xref stream");
2538
0
                break;
2539
357k
            }
2540
357k
        }
2541
25.5k
    }
2542
2543
25.5k
    if (compressed) {
2544
25.5k
        xref_data = pl::pipe<Pl_PNGFilter>(xref_data, Pl_PNGFilter::a_encode, esize);
2545
25.5k
        if (!skip_compression) {
2546
            // Write the stream dictionary for compression but don't actually compress.  This
2547
            // helps us with computation of padding for pass 1 of linearization.
2548
12.5k
            xref_data = pl::pipe<Pl_Flate>(xref_data, Pl_Flate::a_deflate);
2549
12.5k
        }
2550
25.5k
    }
2551
2552
25.5k
    openObject(xref_id);
2553
25.5k
    write("<<").write_qdf("\n ").write(" /Type /XRef").write_qdf("\n ");
2554
25.5k
    write(" /Length ").write(xref_data.size());
2555
25.5k
    if (compressed) {
2556
25.5k
        write_qdf("\n ").write(" /Filter /FlateDecode").write_qdf("\n ");
2557
25.5k
        write(" /DecodeParms << /Columns ").write(esize).write(" /Predictor 12 >>");
2558
25.5k
    }
2559
25.5k
    write_qdf("\n ").write(" /W [ 1 ").write(f1_size).write(" ").write(f2_size).write(" ]");
2560
25.5k
    if (!(first == 0 && last == (size - 1))) {
2561
13.0k
        write(" /Index [ ").write(first).write(" ").write(last - first + 1).write(" ]");
2562
13.0k
    }
2563
25.5k
    writeTrailer(which, size, true, prev, linearization_pass);
2564
25.5k
    write("\nstream\n").write(xref_data).write("\nendstream");
2565
25.5k
    closeObject(xref_id);
2566
25.5k
    return space_before_zero;
2567
25.5k
}
2568
2569
size_t
2570
QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
2571
12.9k
{
2572
    // This routine is called right after a linearization first pass xref stream has been written
2573
    // without compression.  Calculate the amount of padding that would be required in the worst
2574
    // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is
2575
    // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add
2576
    // 10 extra bytes for number length increases.
2577
2578
12.9k
    return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384)));
2579
12.9k
}
2580
2581
void
2582
QPDFWriter::writeLinearized()
2583
9.51k
{
2584
    // Optimize file and enqueue objects in order
2585
2586
9.51k
    std::map<int, int> stream_cache;
2587
2588
55.5k
    auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) {
2589
55.5k
        auto& result = stream_cache[stream.getObjectID()];
2590
55.5k
        if (result == 0) {
2591
25.8k
            bool compress_stream;
2592
25.8k
            bool is_metadata;
2593
25.8k
            if (willFilterStream(stream, compress_stream, is_metadata, nullptr)) {
2594
14.9k
                result = 2;
2595
14.9k
            } else {
2596
10.8k
                result = 1;
2597
10.8k
            }
2598
25.8k
        }
2599
55.5k
        return result;
2600
55.5k
    };
2601
2602
9.51k
    QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters);
2603
2604
9.51k
    std::vector<QPDFObjectHandle> part4;
2605
9.51k
    std::vector<QPDFObjectHandle> part6;
2606
9.51k
    std::vector<QPDFObjectHandle> part7;
2607
9.51k
    std::vector<QPDFObjectHandle> part8;
2608
9.51k
    std::vector<QPDFObjectHandle> part9;
2609
9.51k
    QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9);
2610
2611
    // Object number sequence:
2612
    //
2613
    //  second half
2614
    //    second half uncompressed objects
2615
    //    second half xref stream, if any
2616
    //    second half compressed objects
2617
    //  first half
2618
    //    linearization dictionary
2619
    //    first half xref stream, if any
2620
    //    part 4 uncompresesd objects
2621
    //    encryption dictionary, if any
2622
    //    hint stream
2623
    //    part 6 uncompressed objects
2624
    //    first half compressed objects
2625
    //
2626
2627
    // Second half objects
2628
9.51k
    int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size());
2629
9.51k
    int second_half_first_obj = 1;
2630
9.51k
    int after_second_half = 1 + second_half_uncompressed;
2631
9.51k
    m->next_objid = after_second_half;
2632
9.51k
    int second_half_xref = 0;
2633
9.51k
    bool need_xref_stream = !m->obj.streams_empty;
2634
9.51k
    if (need_xref_stream) {
2635
7.01k
        second_half_xref = m->next_objid++;
2636
7.01k
    }
2637
    // Assign numbers to all compressed objects in the second half.
2638
9.51k
    std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9};
2639
32.1k
    for (int i = 0; i < 3; ++i) {
2640
35.0k
        for (auto const& oh: *vecs2[i]) {
2641
35.0k
            assignCompressedObjectNumbers(oh.getObjGen());
2642
35.0k
        }
2643
22.6k
    }
2644
9.51k
    int second_half_end = m->next_objid - 1;
2645
9.51k
    int second_trailer_size = m->next_objid;
2646
2647
    // First half objects
2648
9.51k
    int first_half_start = m->next_objid;
2649
9.51k
    int lindict_id = m->next_objid++;
2650
9.51k
    int first_half_xref = 0;
2651
9.51k
    if (need_xref_stream) {
2652
7.01k
        first_half_xref = m->next_objid++;
2653
7.01k
    }
2654
9.51k
    int part4_first_obj = m->next_objid;
2655
9.51k
    m->next_objid += QIntC::to_int(part4.size());
2656
9.51k
    int after_part4 = m->next_objid;
2657
9.51k
    if (m->encryption) {
2658
0
        m->encryption_dict_objid = m->next_objid++;
2659
0
    }
2660
9.51k
    int hint_id = m->next_objid++;
2661
9.51k
    int part6_first_obj = m->next_objid;
2662
9.51k
    m->next_objid += QIntC::to_int(part6.size());
2663
9.51k
    int after_part6 = m->next_objid;
2664
    // Assign numbers to all compressed objects in the first half
2665
9.51k
    std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6};
2666
24.6k
    for (int i = 0; i < 2; ++i) {
2667
47.3k
        for (auto const& oh: *vecs1[i]) {
2668
47.3k
            assignCompressedObjectNumbers(oh.getObjGen());
2669
47.3k
        }
2670
15.0k
    }
2671
9.51k
    int first_half_end = m->next_objid - 1;
2672
9.51k
    int first_trailer_size = m->next_objid;
2673
2674
9.51k
    int part4_end_marker = part4.back().getObjectID();
2675
9.51k
    int part6_end_marker = part6.back().getObjectID();
2676
9.51k
    qpdf_offset_t space_before_zero = 0;
2677
9.51k
    qpdf_offset_t file_size = 0;
2678
9.51k
    qpdf_offset_t part6_end_offset = 0;
2679
9.51k
    qpdf_offset_t first_half_max_obj_offset = 0;
2680
9.51k
    qpdf_offset_t second_xref_offset = 0;
2681
9.51k
    qpdf_offset_t first_xref_end = 0;
2682
9.51k
    qpdf_offset_t second_xref_end = 0;
2683
2684
9.51k
    m->next_objid = part4_first_obj;
2685
9.51k
    enqueuePart(part4);
2686
9.51k
    if (m->next_objid != after_part4) {
2687
        // This can happen with very botched files as in the fuzzer test. There are likely some
2688
        // faulty assumptions in calculateLinearizationData
2689
9
        throw std::runtime_error("error encountered after writing part 4 of linearized data");
2690
9
    }
2691
9.50k
    m->next_objid = part6_first_obj;
2692
9.50k
    enqueuePart(part6);
2693
9.50k
    if (m->next_objid != after_part6) {
2694
130
        throw std::runtime_error("error encountered after writing part 6 of linearized data");
2695
130
    }
2696
9.37k
    m->next_objid = second_half_first_obj;
2697
9.37k
    enqueuePart(part7);
2698
9.37k
    enqueuePart(part8);
2699
9.37k
    enqueuePart(part9);
2700
9.37k
    if (m->next_objid != after_second_half) {
2701
339
        throw std::runtime_error("error encountered after writing part 9 of linearized data");
2702
339
    }
2703
2704
9.03k
    qpdf_offset_t hint_length = 0;
2705
9.03k
    std::string hint_buffer;
2706
2707
    // Write file in two passes.  Part numbers refer to PDF spec 1.4.
2708
2709
9.03k
    FILE* lin_pass1_file = nullptr;
2710
9.03k
    auto pp_pass1 = m->pipeline_stack.popper();
2711
9.03k
    auto pp_md5 = m->pipeline_stack.popper();
2712
13.6k
    for (int pass: {1, 2}) {
2713
13.6k
        if (pass == 1) {
2714
7.07k
            if (!m->lin_pass1_filename.empty()) {
2715
0
                lin_pass1_file = QUtil::safe_fopen(m->lin_pass1_filename.c_str(), "wb");
2716
0
                m->pipeline_stack.activate(
2717
0
                    pp_pass1,
2718
0
                    std::make_unique<Pl_StdioFile>("linearization pass1", lin_pass1_file));
2719
7.07k
            } else {
2720
7.07k
                m->pipeline_stack.activate(pp_pass1, true);
2721
7.07k
            }
2722
7.07k
            if (m->deterministic_id) {
2723
7.07k
                m->pipeline_stack.activate_md5(pp_md5);
2724
7.07k
            }
2725
7.07k
        }
2726
2727
        // Part 1: header
2728
2729
13.6k
        writeHeader();
2730
2731
        // Part 2: linearization parameter dictionary.  Save enough space to write real dictionary.
2732
        // 200 characters is enough space if all numerical values in the parameter dictionary that
2733
        // contain offsets are 20 digits long plus a few extra characters for safety.  The entire
2734
        // linearization parameter dictionary must appear within the first 1024 characters of the
2735
        // file.
2736
2737
13.6k
        qpdf_offset_t pos = m->pipeline->getCount();
2738
13.6k
        openObject(lindict_id);
2739
13.6k
        write("<<");
2740
13.6k
        if (pass == 2) {
2741
6.55k
            std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages();
2742
6.55k
            int first_page_object = m->obj[pages.at(0)].renumber;
2743
2744
6.55k
            write(" /Linearized 1 /L ").write(file_size + hint_length);
2745
            // Implementation note 121 states that a space is mandatory after this open bracket.
2746
6.55k
            write(" /H [ ").write(m->new_obj[hint_id].xref.getOffset()).write(" ");
2747
6.55k
            write(hint_length);
2748
6.55k
            write(" ] /O ").write(first_page_object);
2749
6.55k
            write(" /E ").write(part6_end_offset + hint_length);
2750
6.55k
            write(" /N ").write(pages.size());
2751
6.55k
            write(" /T ").write(space_before_zero + hint_length);
2752
6.55k
        }
2753
13.6k
        write(" >>");
2754
13.6k
        closeObject(lindict_id);
2755
13.6k
        static int const pad = 200;
2756
13.6k
        write(QIntC::to_size(pos - m->pipeline->getCount() + pad), ' ').write("\n");
2757
2758
        // If the user supplied any additional header text, write it here after the linearization
2759
        // parameter dictionary.
2760
13.6k
        write(m->extra_header_text);
2761
2762
        // Part 3: first page cross reference table and trailer.
2763
2764
13.6k
        qpdf_offset_t first_xref_offset = m->pipeline->getCount();
2765
13.6k
        qpdf_offset_t hint_offset = 0;
2766
13.6k
        if (pass == 2) {
2767
6.55k
            hint_offset = m->new_obj[hint_id].xref.getOffset();
2768
6.55k
        }
2769
13.6k
        if (need_xref_stream) {
2770
            // Must pad here too.
2771
13.0k
            if (pass == 1) {
2772
                // Set first_half_max_obj_offset to a value large enough to force four bytes to be
2773
                // reserved for each file offset.  This would provide adequate space for the xref
2774
                // stream as long as the last object in page 1 starts with in the first 4 GB of the
2775
                // file, which is extremely likely.  In the second pass, we will know the actual
2776
                // value for this, but it's okay if it's smaller.
2777
6.77k
                first_half_max_obj_offset = 1 << 25;
2778
6.77k
            }
2779
13.0k
            pos = m->pipeline->getCount();
2780
13.0k
            writeXRefStream(
2781
13.0k
                first_half_xref,
2782
13.0k
                first_half_end,
2783
13.0k
                first_half_max_obj_offset,
2784
13.0k
                t_lin_first,
2785
13.0k
                first_half_start,
2786
13.0k
                first_half_end,
2787
13.0k
                first_trailer_size,
2788
13.0k
                hint_length + second_xref_offset,
2789
13.0k
                hint_id,
2790
13.0k
                hint_offset,
2791
13.0k
                hint_length,
2792
13.0k
                (pass == 1),
2793
13.0k
                pass);
2794
13.0k
            qpdf_offset_t endpos = m->pipeline->getCount();
2795
13.0k
            if (pass == 1) {
2796
                // Pad so we have enough room for the real xref stream.
2797
6.64k
                write(calculateXrefStreamPadding(endpos - pos), ' ');
2798
6.64k
                first_xref_end = m->pipeline->getCount();
2799
6.64k
            } else {
2800
                // Pad so that the next object starts at the same place as in pass 1.
2801
6.40k
                write(QIntC::to_size(first_xref_end - endpos), ' ');
2802
2803
6.40k
                if (m->pipeline->getCount() != first_xref_end) {
2804
0
                    throw std::logic_error(
2805
0
                        "insufficient padding for first pass xref stream; first_xref_end=" +
2806
0
                        std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos));
2807
0
                }
2808
6.40k
            }
2809
13.0k
            write("\n");
2810
13.0k
        } else {
2811
577
            writeXRefTable(
2812
577
                t_lin_first,
2813
577
                first_half_start,
2814
577
                first_half_end,
2815
577
                first_trailer_size,
2816
577
                hint_length + second_xref_offset,
2817
577
                (pass == 1),
2818
577
                hint_id,
2819
577
                hint_offset,
2820
577
                hint_length,
2821
577
                pass);
2822
577
            write("startxref\n0\n%%EOF\n");
2823
577
        }
2824
2825
        // Parts 4 through 9
2826
2827
126k
        for (auto const& cur_object: m->object_queue) {
2828
126k
            if (cur_object.getObjectID() == part6_end_marker) {
2829
13.4k
                first_half_max_obj_offset = m->pipeline->getCount();
2830
13.4k
            }
2831
126k
            writeObject(cur_object);
2832
126k
            if (cur_object.getObjectID() == part4_end_marker) {
2833
13.4k
                if (m->encryption) {
2834
0
                    writeEncryptionDictionary();
2835
0
                }
2836
13.4k
                if (pass == 1) {
2837
6.93k
                    m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2838
6.93k
                } else {
2839
                    // Part 5: hint stream
2840
6.54k
                    write(hint_buffer);
2841
6.54k
                }
2842
13.4k
            }
2843
126k
            if (cur_object.getObjectID() == part6_end_marker) {
2844
13.3k
                part6_end_offset = m->pipeline->getCount();
2845
13.3k
            }
2846
126k
        }
2847
2848
        // Part 10: overflow hint stream -- not used
2849
2850
        // Part 11: main cross reference table and trailer
2851
2852
13.6k
        second_xref_offset = m->pipeline->getCount();
2853
13.6k
        if (need_xref_stream) {
2854
12.5k
            pos = m->pipeline->getCount();
2855
12.5k
            space_before_zero = writeXRefStream(
2856
12.5k
                second_half_xref,
2857
12.5k
                second_half_end,
2858
12.5k
                second_xref_offset,
2859
12.5k
                t_lin_second,
2860
12.5k
                0,
2861
12.5k
                second_half_end,
2862
12.5k
                second_trailer_size,
2863
12.5k
                0,
2864
12.5k
                0,
2865
12.5k
                0,
2866
12.5k
                0,
2867
12.5k
                (pass == 1),
2868
12.5k
                pass);
2869
12.5k
            qpdf_offset_t endpos = m->pipeline->getCount();
2870
2871
12.5k
            if (pass == 1) {
2872
                // Pad so we have enough room for the real xref stream.  See comments for previous
2873
                // xref stream on how we calculate the padding.
2874
6.26k
                write(calculateXrefStreamPadding(endpos - pos), ' ').write("\n");
2875
6.26k
                second_xref_end = m->pipeline->getCount();
2876
6.26k
            } else {
2877
                // Make the file size the same.
2878
6.24k
                auto padding =
2879
6.24k
                    QIntC::to_size(second_xref_end + hint_length - 1 - m->pipeline->getCount());
2880
6.24k
                write(padding, ' ').write("\n");
2881
2882
                // If this assertion fails, maybe we didn't have enough padding above.
2883
6.24k
                if (m->pipeline->getCount() != second_xref_end + hint_length) {
2884
0
                    throw std::logic_error(
2885
0
                        "count mismatch after xref stream; possible insufficient padding?");
2886
0
                }
2887
6.24k
            }
2888
12.5k
        } else {
2889
1.10k
            space_before_zero = writeXRefTable(
2890
1.10k
                t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass);
2891
1.10k
        }
2892
13.6k
        write("startxref\n").write(first_xref_offset).write("\n%%EOF\n");
2893
2894
13.6k
        if (pass == 1) {
2895
6.55k
            if (m->deterministic_id) {
2896
6.55k
                QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1);
2897
6.55k
                computeDeterministicIDData();
2898
6.55k
                pp_md5.pop();
2899
6.55k
            }
2900
2901
            // Close first pass pipeline
2902
6.55k
            file_size = m->pipeline->getCount();
2903
6.55k
            pp_pass1.pop();
2904
2905
            // Save hint offset since it will be set to zero by calling openObject.
2906
6.55k
            qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset();
2907
2908
            // Write hint stream to a buffer
2909
6.55k
            {
2910
6.55k
                auto pp_hint = m->pipeline_stack.activate(hint_buffer);
2911
6.55k
                writeHintStream(hint_id);
2912
6.55k
            }
2913
6.55k
            hint_length = QIntC::to_offset(hint_buffer.size());
2914
2915
            // Restore hint offset
2916
6.55k
            m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1);
2917
6.55k
            if (lin_pass1_file) {
2918
                // Write some debugging information
2919
0
                fprintf(
2920
0
                    lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str());
2921
0
                fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str());
2922
0
                fprintf(
2923
0
                    lin_pass1_file,
2924
0
                    "%% second_xref_offset=%s\n",
2925
0
                    std::to_string(second_xref_offset).c_str());
2926
0
                fprintf(
2927
0
                    lin_pass1_file,
2928
0
                    "%% second_xref_end=%s\n",
2929
0
                    std::to_string(second_xref_end).c_str());
2930
0
                fclose(lin_pass1_file);
2931
0
                lin_pass1_file = nullptr;
2932
0
            }
2933
6.55k
        }
2934
13.6k
    }
2935
9.03k
}
2936
2937
void
2938
QPDFWriter::enqueueObjectsStandard()
2939
0
{
2940
0
    if (m->preserve_unreferenced_objects) {
2941
0
        QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard");
2942
0
        for (auto const& oh: m->pdf.getAllObjects()) {
2943
0
            enqueueObject(oh);
2944
0
        }
2945
0
    }
2946
2947
    // Put root first on queue.
2948
0
    QPDFObjectHandle trailer = getTrimmedTrailer();
2949
0
    enqueueObject(trailer.getKey("/Root"));
2950
2951
    // Next place any other objects referenced from the trailer dictionary into the queue, handling
2952
    // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op.
2953
0
    for (auto& item: trailer.as_dictionary()) {
2954
0
        if (!item.second.null()) {
2955
0
            enqueueObject(item.second);
2956
0
        }
2957
0
    }
2958
0
}
2959
2960
void
2961
QPDFWriter::enqueueObjectsPCLm()
2962
0
{
2963
    // Image transform stream content for page strip images. Each of this new stream has to come
2964
    // after every page image strip written in the pclm file.
2965
0
    std::string image_transform_content = "q /image Do Q\n";
2966
2967
    // enqueue all pages first
2968
0
    std::vector<QPDFObjectHandle> all = m->pdf.getAllPages();
2969
0
    for (auto& page: all) {
2970
        // enqueue page
2971
0
        enqueueObject(page);
2972
2973
        // enqueue page contents stream
2974
0
        enqueueObject(page.getKey("/Contents"));
2975
2976
        // enqueue all the strips for each page
2977
0
        QPDFObjectHandle strips = page.getKey("/Resources").getKey("/XObject");
2978
0
        for (auto& image: strips.as_dictionary()) {
2979
0
            if (!image.second.null()) {
2980
0
                enqueueObject(image.second);
2981
0
                enqueueObject(QPDFObjectHandle::newStream(&m->pdf, image_transform_content));
2982
0
            }
2983
0
        }
2984
0
    }
2985
2986
    // Put root in queue.
2987
0
    QPDFObjectHandle trailer = getTrimmedTrailer();
2988
0
    enqueueObject(trailer.getKey("/Root"));
2989
0
}
2990
2991
void
2992
QPDFWriter::indicateProgress(bool decrement, bool finished)
2993
467k
{
2994
467k
    if (decrement) {
2995
174k
        --m->events_seen;
2996
174k
        return;
2997
174k
    }
2998
2999
293k
    ++m->events_seen;
3000
3001
293k
    if (!m->progress_reporter.get()) {
3002
293k
        return;
3003
293k
    }
3004
3005
0
    if (finished || (m->events_seen >= m->next_progress_report)) {
3006
0
        int percentage =
3007
0
            (finished ? 100
3008
0
                 : m->next_progress_report == 0
3009
0
                 ? 0
3010
0
                 : std::min(99, 1 + ((100 * m->events_seen) / m->events_expected)));
3011
0
        m->progress_reporter->reportProgress(percentage);
3012
0
    }
3013
0
    int increment = std::max(1, (m->events_expected / 100));
3014
0
    while (m->events_seen >= m->next_progress_report) {
3015
0
        m->next_progress_report += increment;
3016
0
    }
3017
0
}
3018
3019
void
3020
QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr)
3021
0
{
3022
0
    m->progress_reporter = pr;
3023
0
}
3024
3025
void
3026
QPDFWriter::writeStandard()
3027
0
{
3028
0
    auto pp_md5 = m->pipeline_stack.popper();
3029
0
    if (m->deterministic_id) {
3030
0
        m->pipeline_stack.activate_md5(pp_md5);
3031
0
    }
3032
3033
    // Start writing
3034
3035
0
    writeHeader();
3036
0
    write(m->extra_header_text);
3037
3038
0
    if (m->pclm) {
3039
0
        enqueueObjectsPCLm();
3040
0
    } else {
3041
0
        enqueueObjectsStandard();
3042
0
    }
3043
3044
    // Now start walking queue, outputting each object.
3045
0
    while (m->object_queue_front < m->object_queue.size()) {
3046
0
        QPDFObjectHandle cur_object = m->object_queue.at(m->object_queue_front);
3047
0
        ++m->object_queue_front;
3048
0
        writeObject(cur_object);
3049
0
    }
3050
3051
    // Write out the encryption dictionary, if any
3052
0
    if (m->encryption) {
3053
0
        writeEncryptionDictionary();
3054
0
    }
3055
3056
    // Now write out xref.  next_objid is now the number of objects.
3057
0
    qpdf_offset_t xref_offset = m->pipeline->getCount();
3058
0
    if (m->object_stream_to_objects.empty()) {
3059
        // Write regular cross-reference table
3060
0
        writeXRefTable(t_normal, 0, m->next_objid - 1, m->next_objid);
3061
0
    } else {
3062
        // Write cross-reference stream.
3063
0
        int xref_id = m->next_objid++;
3064
0
        writeXRefStream(
3065
0
            xref_id, xref_id, xref_offset, t_normal, 0, m->next_objid - 1, m->next_objid);
3066
0
    }
3067
0
    write("startxref\n").write(xref_offset).write("\n%%EOF\n");
3068
3069
0
    if (m->deterministic_id) {
3070
0
        QTC::TC(
3071
0
            "qpdf",
3072
0
            "QPDFWriter standard deterministic ID",
3073
0
            m->object_stream_to_objects.empty() ? 0 : 1);
3074
0
    }
3075
0
}