Coverage Report

Created: 2025-08-26 07:11

/src/qpdf/libqpdf/QPDFWriter.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/assert_debug.h>
2
3
#include <qpdf/qpdf-config.h> // include early for large file support
4
5
#include <qpdf/QPDFWriter_private.hh>
6
7
#include <qpdf/MD5.hh>
8
#include <qpdf/Pl_AES_PDF.hh>
9
#include <qpdf/Pl_Flate.hh>
10
#include <qpdf/Pl_MD5.hh>
11
#include <qpdf/Pl_PNGFilter.hh>
12
#include <qpdf/Pl_RC4.hh>
13
#include <qpdf/Pl_StdioFile.hh>
14
#include <qpdf/Pl_String.hh>
15
#include <qpdf/QIntC.hh>
16
#include <qpdf/QPDFObjectHandle_private.hh>
17
#include <qpdf/QPDFObject_private.hh>
18
#include <qpdf/QPDF_private.hh>
19
#include <qpdf/QTC.hh>
20
#include <qpdf/QUtil.hh>
21
#include <qpdf/RC4.hh>
22
#include <qpdf/Util.hh>
23
24
#include <algorithm>
25
#include <cstdlib>
26
#include <stdexcept>
27
28
using namespace std::literals;
29
using namespace qpdf;
30
31
QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default)
32
0
{
33
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
34
0
}
35
36
QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) :
37
0
    handler(handler)
38
0
{
39
0
}
40
41
QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT
42
                                                                  // (modernize-use-equals-default)
43
0
{
44
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
45
0
}
46
47
void
48
QPDFWriter::FunctionProgressReporter::reportProgress(int progress)
49
0
{
50
0
    handler(progress);
51
0
}
52
53
namespace
54
{
55
    class Pl_stack
56
    {
57
        // A pipeline Popper is normally returned by Pl_stack::activate, or, if necessary, a
58
        // reference to a Popper instance can be passed into activate. When the Popper goes out of
59
        // scope, the pipeline stack is popped. This causes finish to be called on the current
60
        // pipeline and the pipeline stack to be popped until the top of stack is a previous active
61
        // top of stack and restores the pipeline to that point. It deletes any pipelines that it
62
        // pops.
63
        class Popper
64
        {
65
            friend class Pl_stack;
66
67
          public:
68
            Popper() = default;
69
            Popper(Popper const&) = delete;
70
            Popper(Popper&& other) noexcept
71
0
            {
72
0
                // For MSVC, default pops the stack
73
0
                if (this != &other) {
74
0
                    stack = other.stack;
75
0
                    stack_id = other.stack_id;
76
0
                    other.stack = nullptr;
77
0
                    other.stack_id = 0;
78
0
                };
79
0
            }
80
            Popper& operator=(Popper const&) = delete;
81
            Popper&
82
            operator=(Popper&& other) noexcept
83
0
            {
84
0
                // For MSVC, default pops the stack
85
0
                if (this != &other) {
86
0
                    stack = other.stack;
87
0
                    stack_id = other.stack_id;
88
0
                    other.stack = nullptr;
89
0
                    other.stack_id = 0;
90
0
                };
91
0
                return *this;
92
0
            }
93
94
            ~Popper();
95
96
            // Manually pop pipeline from the pipeline stack.
97
            void pop();
98
99
          private:
100
            Popper(Pl_stack& stack) :
101
186k
                stack(&stack)
102
186k
            {
103
186k
            }
104
105
            Pl_stack* stack{nullptr};
106
            unsigned long stack_id{0};
107
        };
108
109
      public:
110
        Pl_stack(pl::Count*& top) :
111
9.78k
            top(top)
112
9.78k
        {
113
9.78k
        }
114
115
        Popper
116
        popper()
117
16.4k
        {
118
16.4k
            return {*this};
119
16.4k
        }
120
121
        void
122
        initialize(Pipeline* p)
123
9.78k
        {
124
9.78k
            auto c = std::make_unique<pl::Count>(++last_id, p);
125
9.78k
            top = c.get();
126
9.78k
            stack.emplace_back(std::move(c));
127
9.78k
        }
128
129
        Popper
130
        activate(std::string& str)
131
120k
        {
132
120k
            Popper pp{*this};
133
120k
            activate(pp, str);
134
120k
            return pp;
135
120k
        }
136
137
        void
138
        activate(Popper& pp, std::string& str)
139
120k
        {
140
120k
            activate(pp, false, &str, nullptr);
141
120k
        }
142
143
        void
144
        activate(Popper& pp, std::unique_ptr<Pipeline> next)
145
0
        {
146
0
            count_buffer.clear();
147
0
            activate(pp, false, &count_buffer, std::move(next));
148
0
        }
149
150
        Popper
151
        activate(
152
            bool discard = false,
153
            std::string* str = nullptr,
154
            std::unique_ptr<Pipeline> next = nullptr)
155
49.9k
        {
156
49.9k
            Popper pp{*this};
157
49.9k
            activate(pp, discard, str, std::move(next));
158
49.9k
            return pp;
159
49.9k
        }
160
161
        void
162
        activate(
163
            Popper& pp,
164
            bool discard = false,
165
            std::string* str = nullptr,
166
            std::unique_ptr<Pipeline> next = nullptr)
167
178k
        {
168
178k
            std::unique_ptr<pl::Count> c;
169
178k
            if (next) {
170
0
                c = std::make_unique<pl::Count>(++last_id, count_buffer, std::move(next));
171
178k
            } else if (discard) {
172
58.2k
                c = std::make_unique<pl::Count>(++last_id, nullptr);
173
120k
            } else if (!str) {
174
0
                c = std::make_unique<pl::Count>(++last_id, top);
175
120k
            } else {
176
120k
                c = std::make_unique<pl::Count>(++last_id, *str);
177
120k
            }
178
178k
            pp.stack_id = last_id;
179
178k
            top = c.get();
180
178k
            stack.emplace_back(std::move(c));
181
178k
        }
182
        void
183
        activate_md5(Popper& pp)
184
8.22k
        {
185
8.22k
            qpdf_assert_debug(!md5_pipeline);
186
8.22k
            qpdf_assert_debug(md5_id == 0);
187
8.22k
            qpdf_assert_debug(top->getCount() == 0);
188
8.22k
            md5_pipeline = std::make_unique<Pl_MD5>("qpdf md5", top);
189
8.22k
            md5_pipeline->persistAcrossFinish(true);
190
            // Special case code in pop clears m->md5_pipeline upon deletion.
191
8.22k
            auto c = std::make_unique<pl::Count>(++last_id, md5_pipeline.get());
192
8.22k
            pp.stack_id = last_id;
193
8.22k
            md5_id = last_id;
194
8.22k
            top = c.get();
195
8.22k
            stack.emplace_back(std::move(c));
196
8.22k
        }
197
198
        // Return the hex digest and disable the MD5 pipeline.
199
        std::string
200
        hex_digest()
201
7.80k
        {
202
7.80k
            qpdf_assert_debug(md5_pipeline);
203
7.80k
            auto digest = md5_pipeline->getHexDigest();
204
7.80k
            md5_pipeline->enable(false);
205
7.80k
            return digest;
206
7.80k
        }
207
208
        void
209
        clear_buffer()
210
0
        {
211
0
            count_buffer.clear();
212
0
        }
213
214
      private:
215
        void
216
        pop(unsigned long stack_id)
217
186k
        {
218
186k
            if (!stack_id) {
219
0
                return;
220
0
            }
221
186k
            qpdf_assert_debug(stack.size() >= 2);
222
186k
            top->finish();
223
186k
            qpdf_assert_debug(stack.back().get() == top);
224
            // It used to be possible for this assertion to fail if writeLinearized exits by
225
            // exception when deterministic ID. There are no longer any cases in which two
226
            // dynamically allocated pipeline Popper objects ever exist at the same time, so the
227
            // assertion will fail if they get popped out of order from automatic destruction.
228
186k
            qpdf_assert_debug(top->id() == stack_id);
229
186k
            if (stack_id == md5_id) {
230
8.22k
                md5_pipeline = nullptr;
231
8.22k
                md5_id = 0;
232
8.22k
            }
233
186k
            stack.pop_back();
234
186k
            top = stack.back().get();
235
186k
        }
236
237
        std::vector<std::unique_ptr<pl::Count>> stack;
238
        pl::Count*& top;
239
        std::unique_ptr<Pl_MD5> md5_pipeline{nullptr};
240
        unsigned long last_id{0};
241
        unsigned long md5_id{0};
242
        std::string count_buffer;
243
    };
244
} // namespace
245
246
Pl_stack::Popper::~Popper()
247
186k
{
248
186k
    if (stack) {
249
171k
        stack->pop(stack_id);
250
171k
    }
251
186k
}
252
253
void
254
Pl_stack::Popper::pop()
255
15.6k
{
256
15.6k
    if (stack) {
257
15.6k
        stack->pop(stack_id);
258
15.6k
    }
259
15.6k
    stack_id = 0;
260
15.6k
    stack = nullptr;
261
15.6k
}
262
263
class QPDFWriter::Members
264
{
265
    friend class QPDFWriter;
266
267
  public:
268
    ~Members();
269
270
  private:
271
    Members(QPDF& pdf);
272
    Members(Members const&) = delete;
273
274
    QPDF& pdf;
275
    QPDFObjGen root_og{-1, 0};
276
    char const* filename{"unspecified"};
277
    FILE* file{nullptr};
278
    bool close_file{false};
279
    std::unique_ptr<Pl_Buffer> buffer_pipeline{nullptr};
280
    Buffer* output_buffer{nullptr};
281
    bool normalize_content_set{false};
282
    bool normalize_content{false};
283
    bool compress_streams{true};
284
    bool compress_streams_set{false};
285
    qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_generalized};
286
    bool stream_decode_level_set{false};
287
    bool recompress_flate{false};
288
    bool qdf_mode{false};
289
    bool preserve_unreferenced_objects{false};
290
    bool newline_before_endstream{false};
291
    bool static_id{false};
292
    bool suppress_original_object_ids{false};
293
    bool direct_stream_lengths{true};
294
    bool preserve_encryption{true};
295
    bool linearized{false};
296
    bool pclm{false};
297
    qpdf_object_stream_e object_stream_mode{qpdf_o_preserve};
298
299
    std::unique_ptr<QPDF::EncryptionData> encryption;
300
    std::string encryption_key;
301
    bool encrypt_use_aes{false};
302
303
    std::string id1; // for /ID key of
304
    std::string id2; // trailer dictionary
305
    std::string final_pdf_version;
306
    int final_extension_level{0};
307
    std::string min_pdf_version;
308
    int min_extension_level{0};
309
    std::string forced_pdf_version;
310
    int forced_extension_level{0};
311
    std::string extra_header_text;
312
    int encryption_dict_objid{0};
313
    std::string cur_data_key;
314
    std::unique_ptr<Pipeline> file_pl;
315
    qpdf::pl::Count* pipeline{nullptr};
316
    std::vector<QPDFObjectHandle> object_queue;
317
    size_t object_queue_front{0};
318
    QPDFWriter::ObjTable obj;
319
    QPDFWriter::NewObjTable new_obj;
320
    int next_objid{1};
321
    int cur_stream_length_id{0};
322
    size_t cur_stream_length{0};
323
    bool added_newline{false};
324
    size_t max_ostream_index{0};
325
    std::set<QPDFObjGen> normalized_streams;
326
    std::map<QPDFObjGen, int> page_object_to_seq;
327
    std::map<QPDFObjGen, int> contents_to_page_seq;
328
    std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects;
329
    Pl_stack pipeline_stack;
330
    bool deterministic_id{false};
331
    std::string deterministic_id_data;
332
    bool did_write_setup{false};
333
334
    // For linearization only
335
    std::string lin_pass1_filename;
336
337
    // For progress reporting
338
    std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter;
339
    int events_expected{0};
340
    int events_seen{0};
341
    int next_progress_report{0};
342
};
343
344
QPDFWriter::Members::Members(QPDF& pdf) :
345
9.96k
    pdf(pdf),
346
9.96k
    root_og(pdf.getRoot().getObjGen().isIndirect() ? pdf.getRoot().getObjGen() : QPDFObjGen(-1, 0)),
347
9.96k
    pipeline_stack(pipeline)
348
9.96k
{
349
9.96k
}
350
351
QPDFWriter::Members::~Members()
352
9.78k
{
353
9.78k
    if (file && close_file) {
354
0
        fclose(file);
355
0
    }
356
9.78k
    delete output_buffer;
357
9.78k
}
358
359
QPDFWriter::QPDFWriter(QPDF& pdf) :
360
9.96k
    m(new Members(pdf))
361
9.96k
{
362
9.96k
}
363
364
QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) :
365
0
    m(new Members(pdf))
366
0
{
367
0
    setOutputFilename(filename);
368
0
}
369
370
QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) :
371
0
    m(new Members(pdf))
372
0
{
373
0
    setOutputFile(description, file, close_file);
374
0
}
375
376
void
377
QPDFWriter::setOutputFilename(char const* filename)
378
0
{
379
0
    char const* description = filename;
380
0
    FILE* f = nullptr;
381
0
    bool close_file = false;
382
0
    if (filename == nullptr) {
383
0
        description = "standard output";
384
0
        QTC::TC("qpdf", "QPDFWriter write to stdout");
385
0
        f = stdout;
386
0
        QUtil::binary_stdout();
387
0
    } else {
388
0
        QTC::TC("qpdf", "QPDFWriter write to file");
389
0
        f = QUtil::safe_fopen(filename, "wb+");
390
0
        close_file = true;
391
0
    }
392
0
    setOutputFile(description, f, close_file);
393
0
}
394
395
void
396
QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file)
397
0
{
398
0
    m->filename = description;
399
0
    m->file = file;
400
0
    m->close_file = close_file;
401
0
    m->file_pl = std::make_unique<Pl_StdioFile>("qpdf output", file);
402
0
    m->pipeline_stack.initialize(m->file_pl.get());
403
0
}
404
405
void
406
QPDFWriter::setOutputMemory()
407
0
{
408
0
    m->filename = "memory buffer";
409
0
    m->buffer_pipeline = std::make_unique<Pl_Buffer>("qpdf output");
410
0
    m->pipeline_stack.initialize(m->buffer_pipeline.get());
411
0
}
412
413
Buffer*
414
QPDFWriter::getBuffer()
415
0
{
416
0
    Buffer* result = m->output_buffer;
417
0
    m->output_buffer = nullptr;
418
0
    return result;
419
0
}
420
421
std::shared_ptr<Buffer>
422
QPDFWriter::getBufferSharedPointer()
423
0
{
424
0
    return std::shared_ptr<Buffer>(getBuffer());
425
0
}
426
427
void
428
QPDFWriter::setOutputPipeline(Pipeline* p)
429
9.78k
{
430
9.78k
    m->filename = "custom pipeline";
431
9.78k
    m->pipeline_stack.initialize(p);
432
9.78k
}
433
434
void
435
QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode)
436
9.78k
{
437
9.78k
    m->object_stream_mode = mode;
438
9.78k
}
439
440
void
441
QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode)
442
0
{
443
0
    switch (mode) {
444
0
    case qpdf_s_uncompress:
445
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
446
0
        m->compress_streams = false;
447
0
        break;
448
449
0
    case qpdf_s_preserve:
450
0
        m->stream_decode_level = qpdf_dl_none;
451
0
        m->compress_streams = false;
452
0
        break;
453
454
0
    case qpdf_s_compress:
455
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
456
0
        m->compress_streams = true;
457
0
        break;
458
0
    }
459
0
    m->stream_decode_level_set = true;
460
0
    m->compress_streams_set = true;
461
0
}
462
463
void
464
QPDFWriter::setCompressStreams(bool val)
465
0
{
466
0
    m->compress_streams = val;
467
0
    m->compress_streams_set = true;
468
0
}
469
470
void
471
QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
472
9.78k
{
473
9.78k
    m->stream_decode_level = val;
474
9.78k
    m->stream_decode_level_set = true;
475
9.78k
}
476
477
void
478
QPDFWriter::setRecompressFlate(bool val)
479
0
{
480
0
    m->recompress_flate = val;
481
0
}
482
483
void
484
QPDFWriter::setContentNormalization(bool val)
485
0
{
486
0
    m->normalize_content_set = true;
487
0
    m->normalize_content = val;
488
0
}
489
490
void
491
QPDFWriter::setQDFMode(bool val)
492
0
{
493
0
    m->qdf_mode = val;
494
0
}
495
496
void
497
QPDFWriter::setPreserveUnreferencedObjects(bool val)
498
0
{
499
0
    m->preserve_unreferenced_objects = val;
500
0
}
501
502
void
503
QPDFWriter::setNewlineBeforeEndstream(bool val)
504
0
{
505
0
    m->newline_before_endstream = val;
506
0
}
507
508
void
509
QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level)
510
18.6k
{
511
18.6k
    bool set_version = false;
512
18.6k
    bool set_extension_level = false;
513
18.6k
    if (m->min_pdf_version.empty()) {
514
9.58k
        set_version = true;
515
9.58k
        set_extension_level = true;
516
9.58k
    } else {
517
9.10k
        int old_major = 0;
518
9.10k
        int old_minor = 0;
519
9.10k
        int min_major = 0;
520
9.10k
        int min_minor = 0;
521
9.10k
        parseVersion(version, old_major, old_minor);
522
9.10k
        parseVersion(m->min_pdf_version, min_major, min_minor);
523
9.10k
        int compare = compareVersions(old_major, old_minor, min_major, min_minor);
524
9.10k
        if (compare > 0) {
525
1.12k
            QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1);
526
1.12k
            set_version = true;
527
1.12k
            set_extension_level = true;
528
7.97k
        } else if (compare == 0) {
529
102
            if (extension_level > m->min_extension_level) {
530
1
                QTC::TC("qpdf", "QPDFWriter increasing extension level");
531
1
                set_extension_level = true;
532
1
            }
533
102
        }
534
9.10k
    }
535
536
18.6k
    if (set_version) {
537
10.7k
        m->min_pdf_version = version;
538
10.7k
    }
539
18.6k
    if (set_extension_level) {
540
10.7k
        m->min_extension_level = extension_level;
541
10.7k
    }
542
18.6k
}
543
544
void
545
QPDFWriter::setMinimumPDFVersion(PDFVersion const& v)
546
0
{
547
0
    std::string version;
548
0
    int extension_level;
549
0
    v.getVersion(version, extension_level);
550
0
    setMinimumPDFVersion(version, extension_level);
551
0
}
552
553
void
554
QPDFWriter::forcePDFVersion(std::string const& version, int extension_level)
555
0
{
556
0
    m->forced_pdf_version = version;
557
0
    m->forced_extension_level = extension_level;
558
0
}
559
560
void
561
QPDFWriter::setExtraHeaderText(std::string const& text)
562
0
{
563
0
    m->extra_header_text = text;
564
0
    if (!m->extra_header_text.empty() && *m->extra_header_text.rbegin() != '\n') {
565
0
        QTC::TC("qpdf", "QPDFWriter extra header text add newline");
566
0
        m->extra_header_text += "\n";
567
0
    } else {
568
0
        QTC::TC("qpdf", "QPDFWriter extra header text no newline");
569
0
    }
570
0
}
571
572
void
573
QPDFWriter::setStaticID(bool val)
574
0
{
575
0
    m->static_id = val;
576
0
}
577
578
void
579
QPDFWriter::setDeterministicID(bool val)
580
9.78k
{
581
9.78k
    m->deterministic_id = val;
582
9.78k
}
583
584
void
585
QPDFWriter::setStaticAesIV(bool val)
586
0
{
587
0
    if (val) {
588
0
        Pl_AES_PDF::useStaticIV();
589
0
    }
590
0
}
591
592
void
593
QPDFWriter::setSuppressOriginalObjectIDs(bool val)
594
0
{
595
0
    m->suppress_original_object_ids = val;
596
0
}
597
598
void
599
QPDFWriter::setPreserveEncryption(bool val)
600
0
{
601
0
    m->preserve_encryption = val;
602
0
}
603
604
void
605
QPDFWriter::setLinearization(bool val)
606
9.78k
{
607
9.78k
    m->linearized = val;
608
9.78k
    if (val) {
609
9.78k
        m->pclm = false;
610
9.78k
    }
611
9.78k
}
612
613
void
614
QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
615
0
{
616
0
    m->lin_pass1_filename = filename;
617
0
}
618
619
void
620
QPDFWriter::setPCLm(bool val)
621
0
{
622
0
    m->pclm = val;
623
0
    if (val) {
624
0
        m->linearized = false;
625
0
    }
626
0
}
627
628
void
629
QPDFWriter::setR2EncryptionParametersInsecure(
630
    char const* user_password,
631
    char const* owner_password,
632
    bool allow_print,
633
    bool allow_modify,
634
    bool allow_extract,
635
    bool allow_annotate)
636
0
{
637
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(1, 2, 5, true);
638
0
    if (!allow_print) {
639
0
        m->encryption->setP(3, false);
640
0
    }
641
0
    if (!allow_modify) {
642
0
        m->encryption->setP(4, false);
643
0
    }
644
0
    if (!allow_extract) {
645
0
        m->encryption->setP(5, false);
646
0
    }
647
0
    if (!allow_annotate) {
648
0
        m->encryption->setP(6, false);
649
0
    }
650
0
    setEncryptionParameters(user_password, owner_password);
651
0
}
652
653
void
654
QPDFWriter::setR3EncryptionParametersInsecure(
655
    char const* user_password,
656
    char const* owner_password,
657
    bool allow_accessibility,
658
    bool allow_extract,
659
    bool allow_assemble,
660
    bool allow_annotate_and_form,
661
    bool allow_form_filling,
662
    bool allow_modify_other,
663
    qpdf_r3_print_e print)
664
0
{
665
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(2, 3, 16, true);
666
0
    interpretR3EncryptionParameters(
667
0
        allow_accessibility,
668
0
        allow_extract,
669
0
        allow_assemble,
670
0
        allow_annotate_and_form,
671
0
        allow_form_filling,
672
0
        allow_modify_other,
673
0
        print,
674
0
        qpdf_r3m_all);
675
0
    setEncryptionParameters(user_password, owner_password);
676
0
}
677
678
void
679
QPDFWriter::setR4EncryptionParametersInsecure(
680
    char const* user_password,
681
    char const* owner_password,
682
    bool allow_accessibility,
683
    bool allow_extract,
684
    bool allow_assemble,
685
    bool allow_annotate_and_form,
686
    bool allow_form_filling,
687
    bool allow_modify_other,
688
    qpdf_r3_print_e print,
689
    bool encrypt_metadata,
690
    bool use_aes)
691
0
{
692
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(4, 4, 16, encrypt_metadata);
693
0
    m->encrypt_use_aes = use_aes;
694
0
    interpretR3EncryptionParameters(
695
0
        allow_accessibility,
696
0
        allow_extract,
697
0
        allow_assemble,
698
0
        allow_annotate_and_form,
699
0
        allow_form_filling,
700
0
        allow_modify_other,
701
0
        print,
702
0
        qpdf_r3m_all);
703
0
    setEncryptionParameters(user_password, owner_password);
704
0
}
705
706
void
707
QPDFWriter::setR5EncryptionParameters(
708
    char const* user_password,
709
    char const* owner_password,
710
    bool allow_accessibility,
711
    bool allow_extract,
712
    bool allow_assemble,
713
    bool allow_annotate_and_form,
714
    bool allow_form_filling,
715
    bool allow_modify_other,
716
    qpdf_r3_print_e print,
717
    bool encrypt_metadata)
718
0
{
719
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(5, 5, 32, encrypt_metadata);
720
0
    m->encrypt_use_aes = true;
721
0
    interpretR3EncryptionParameters(
722
0
        allow_accessibility,
723
0
        allow_extract,
724
0
        allow_assemble,
725
0
        allow_annotate_and_form,
726
0
        allow_form_filling,
727
0
        allow_modify_other,
728
0
        print,
729
0
        qpdf_r3m_all);
730
0
    setEncryptionParameters(user_password, owner_password);
731
0
}
732
733
void
734
QPDFWriter::setR6EncryptionParameters(
735
    char const* user_password,
736
    char const* owner_password,
737
    bool allow_accessibility,
738
    bool allow_extract,
739
    bool allow_assemble,
740
    bool allow_annotate_and_form,
741
    bool allow_form_filling,
742
    bool allow_modify_other,
743
    qpdf_r3_print_e print,
744
    bool encrypt_metadata)
745
0
{
746
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(5, 6, 32, encrypt_metadata);
747
0
    interpretR3EncryptionParameters(
748
0
        allow_accessibility,
749
0
        allow_extract,
750
0
        allow_assemble,
751
0
        allow_annotate_and_form,
752
0
        allow_form_filling,
753
0
        allow_modify_other,
754
0
        print,
755
0
        qpdf_r3m_all);
756
0
    m->encrypt_use_aes = true;
757
0
    setEncryptionParameters(user_password, owner_password);
758
0
}
759
760
void
761
QPDFWriter::interpretR3EncryptionParameters(
762
    bool allow_accessibility,
763
    bool allow_extract,
764
    bool allow_assemble,
765
    bool allow_annotate_and_form,
766
    bool allow_form_filling,
767
    bool allow_modify_other,
768
    qpdf_r3_print_e print,
769
    qpdf_r3_modify_e modify)
770
0
{
771
    // Acrobat 5 security options:
772
773
    // Checkboxes:
774
    //   Enable Content Access for the Visually Impaired
775
    //   Allow Content Copying and Extraction
776
777
    // Allowed changes menu:
778
    //   None
779
    //   Only Document Assembly
780
    //   Only Form Field Fill-in or Signing
781
    //   Comment Authoring, Form Field Fill-in or Signing
782
    //   General Editing, Comment and Form Field Authoring
783
784
    // Allowed printing menu:
785
    //   None
786
    //   Low Resolution
787
    //   Full printing
788
789
    // Meanings of bits in P when R >= 3
790
    //
791
    //  3: low-resolution printing
792
    //  4: document modification except as controlled by 6, 9, and 11
793
    //  5: extraction
794
    //  6: add/modify annotations (comment), fill in forms
795
    //     if 4+6 are set, also allows modification of form fields
796
    //  9: fill in forms even if 6 is clear
797
    // 10: accessibility; ignored by readers, should always be set
798
    // 11: document assembly even if 4 is clear
799
    // 12: high-resolution printing
800
0
    if (!allow_accessibility && m->encryption->getR() <= 3) {
801
        // Bit 10 is deprecated and should always be set.  This used to mean accessibility.  There
802
        // is no way to disable accessibility with R > 3.
803
0
        m->encryption->setP(10, false);
804
0
    }
805
0
    if (!allow_extract) {
806
0
        m->encryption->setP(5, false);
807
0
    }
808
809
0
    switch (print) {
810
0
    case qpdf_r3p_none:
811
0
        m->encryption->setP(3, false); // any printing
812
0
        [[fallthrough]];
813
0
    case qpdf_r3p_low:
814
0
        m->encryption->setP(12, false); // high resolution printing
815
0
        [[fallthrough]];
816
0
    case qpdf_r3p_full:
817
0
        break;
818
        // no default so gcc warns for missing cases
819
0
    }
820
821
    // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full
822
    // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're
823
    // stuck with it. See also allow checks below to control the bits individually.
824
825
    // NOT EXERCISED IN TEST SUITE
826
0
    switch (modify) {
827
0
    case qpdf_r3m_none:
828
0
        m->encryption->setP(11, false); // document assembly
829
0
        [[fallthrough]];
830
0
    case qpdf_r3m_assembly:
831
0
        m->encryption->setP(9, false); // filling in form fields
832
0
        [[fallthrough]];
833
0
    case qpdf_r3m_form:
834
0
        m->encryption->setP(6, false); // modify annotations, fill in form fields
835
0
        [[fallthrough]];
836
0
    case qpdf_r3m_annotate:
837
0
        m->encryption->setP(4, false); // other modifications
838
0
        [[fallthrough]];
839
0
    case qpdf_r3m_all:
840
0
        break;
841
        // no default so gcc warns for missing cases
842
0
    }
843
    // END NOT EXERCISED IN TEST SUITE
844
845
0
    if (!allow_assemble) {
846
0
        m->encryption->setP(11, false);
847
0
    }
848
0
    if (!allow_annotate_and_form) {
849
0
        m->encryption->setP(6, false);
850
0
    }
851
0
    if (!allow_form_filling) {
852
0
        m->encryption->setP(9, false);
853
0
    }
854
0
    if (!allow_modify_other) {
855
0
        m->encryption->setP(4, false);
856
0
    }
857
0
}
858
859
void
860
QPDFWriter::setEncryptionParameters(char const* user_password, char const* owner_password)
861
0
{
862
0
    generateID(true);
863
0
    m->encryption->setId1(m->id1);
864
0
    m->encryption_key = m->encryption->compute_parameters(user_password, owner_password);
865
0
    setEncryptionMinimumVersion();
866
0
}
867
868
void
869
QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
870
9.78k
{
871
9.78k
    m->preserve_encryption = false;
872
9.78k
    QPDFObjectHandle trailer = qpdf.getTrailer();
873
9.78k
    if (trailer.hasKey("/Encrypt")) {
874
57
        generateID(true);
875
57
        m->id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue();
876
57
        QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
877
57
        int V = encrypt.getKey("/V").getIntValueAsInt();
878
57
        int key_len = 5;
879
57
        if (V > 1) {
880
0
            key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8;
881
0
        }
882
57
        const bool encrypt_metadata =
883
57
            encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool()
884
57
            ? encrypt.getKey("/EncryptMetadata").getBoolValue()
885
57
            : true;
886
57
        if (V >= 4) {
887
            // When copying encryption parameters, use AES even if the original file did not.
888
            // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of
889
            // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF
890
            // all potentially having different values.
891
0
            m->encrypt_use_aes = true;
892
0
        }
893
57
        QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", encrypt_metadata ? 0 : 1);
894
57
        QTC::TC("qpdf", "QPDFWriter copy use_aes", m->encrypt_use_aes ? 0 : 1);
895
896
57
        m->encryption = std::make_unique<QPDF::EncryptionData>(
897
57
            V,
898
57
            encrypt.getKey("/R").getIntValueAsInt(),
899
57
            key_len,
900
57
            static_cast<int>(encrypt.getKey("/P").getIntValue()),
901
57
            encrypt.getKey("/O").getStringValue(),
902
57
            encrypt.getKey("/U").getStringValue(),
903
57
            V < 5 ? "" : encrypt.getKey("/OE").getStringValue(),
904
57
            V < 5 ? "" : encrypt.getKey("/UE").getStringValue(),
905
57
            V < 5 ? "" : encrypt.getKey("/Perms").getStringValue(),
906
57
            m->id1, // m->id1 == the other file's id1
907
57
            encrypt_metadata);
908
57
        m->encryption_key = V >= 5
909
57
            ? qpdf.getEncryptionKey()
910
57
            : m->encryption->compute_encryption_key(qpdf.getPaddedUserPassword());
911
57
        setEncryptionMinimumVersion();
912
57
    }
913
9.78k
}
914
915
void
916
QPDFWriter::disableIncompatibleEncryption(int major, int minor, int extension_level)
917
0
{
918
0
    if (!m->encryption) {
919
0
        return;
920
0
    }
921
0
    if (compareVersions(major, minor, 1, 3) < 0) {
922
0
        m->encryption = nullptr;
923
0
        return;
924
0
    }
925
0
    int V = m->encryption->getV();
926
0
    int R = m->encryption->getR();
927
0
    if (compareVersions(major, minor, 1, 4) < 0) {
928
0
        if (V > 1 || R > 2) {
929
0
            m->encryption = nullptr;
930
0
        }
931
0
    } else if (compareVersions(major, minor, 1, 5) < 0) {
932
0
        if (V > 2 || R > 3) {
933
0
            m->encryption = nullptr;
934
0
        }
935
0
    } else if (compareVersions(major, minor, 1, 6) < 0) {
936
0
        if (m->encrypt_use_aes) {
937
0
            m->encryption = nullptr;
938
0
        }
939
0
    } else if (
940
0
        (compareVersions(major, minor, 1, 7) < 0) ||
941
0
        ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) {
942
0
        if (V >= 5 || R >= 5) {
943
0
            m->encryption = nullptr;
944
0
        }
945
0
    }
946
947
0
    if (!m->encryption) {
948
0
        QTC::TC("qpdf", "QPDFWriter forced version disabled encryption");
949
0
    }
950
0
}
951
952
void
953
QPDFWriter::parseVersion(std::string const& version, int& major, int& minor) const
954
18.2k
{
955
18.2k
    major = QUtil::string_to_int(version.c_str());
956
18.2k
    minor = 0;
957
18.2k
    size_t p = version.find('.');
958
18.2k
    if ((p != std::string::npos) && (version.length() > p)) {
959
18.2k
        minor = QUtil::string_to_int(version.substr(p + 1).c_str());
960
18.2k
    }
961
18.2k
    std::string tmp = std::to_string(major) + "." + std::to_string(minor);
962
18.2k
    if (tmp != version) {
963
        // The version number in the input is probably invalid. This happens with some files that
964
        // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately
965
        // QPDFWriter doesn't have a way to give a warning, so we just ignore this case.
966
28
    }
967
18.2k
}
968
969
int
970
QPDFWriter::compareVersions(int major1, int minor1, int major2, int minor2) const
971
9.10k
{
972
9.10k
    if (major1 < major2) {
973
80
        return -1;
974
9.02k
    } else if (major1 > major2) {
975
238
        return 1;
976
8.78k
    } else if (minor1 < minor2) {
977
7.78k
        return -1;
978
7.78k
    } else if (minor1 > minor2) {
979
891
        return 1;
980
891
    } else {
981
102
        return 0;
982
102
    }
983
9.10k
}
984
985
void
986
QPDFWriter::setEncryptionMinimumVersion()
987
0
{
988
0
    auto const R = m->encryption->getR();
989
0
    if (R >= 6) {
990
0
        setMinimumPDFVersion("1.7", 8);
991
0
    } else if (R == 5) {
992
0
        setMinimumPDFVersion("1.7", 3);
993
0
    } else if (R == 4) {
994
0
        setMinimumPDFVersion(m->encrypt_use_aes ? "1.6" : "1.5");
995
0
    } else if (R == 3) {
996
0
        setMinimumPDFVersion("1.4");
997
0
    } else {
998
0
        setMinimumPDFVersion("1.3");
999
0
    }
1000
0
}
1001
1002
void
1003
QPDFWriter::setDataKey(int objid)
1004
125k
{
1005
125k
    if (m->encryption) {
1006
0
        m->cur_data_key = QPDF::compute_data_key(
1007
0
            m->encryption_key,
1008
0
            objid,
1009
0
            0,
1010
0
            m->encrypt_use_aes,
1011
0
            m->encryption->getV(),
1012
0
            m->encryption->getR());
1013
0
    }
1014
125k
}
1015
1016
unsigned int
1017
QPDFWriter::bytesNeeded(long long n)
1018
92.6k
{
1019
92.6k
    unsigned int bytes = 0;
1020
212k
    while (n) {
1021
119k
        ++bytes;
1022
119k
        n >>= 8;
1023
119k
    }
1024
92.6k
    return bytes;
1025
92.6k
}
1026
1027
void
1028
QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes)
1029
1.03M
{
1030
1.03M
    if (bytes > sizeof(unsigned long long)) {
1031
0
        throw std::logic_error("QPDFWriter::writeBinary called with too many bytes");
1032
0
    }
1033
1.03M
    unsigned char data[sizeof(unsigned long long)];
1034
2.50M
    for (unsigned int i = 0; i < bytes; ++i) {
1035
1.47M
        data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff);
1036
1.47M
        val >>= 8;
1037
1.47M
    }
1038
1.03M
    m->pipeline->write(data, bytes);
1039
1.03M
}
1040
1041
QPDFWriter&
1042
QPDFWriter::write(std::string_view str)
1043
8.04M
{
1044
8.04M
    m->pipeline->write(str);
1045
8.04M
    return *this;
1046
8.04M
}
1047
1048
QPDFWriter&
1049
QPDFWriter::write(std::integral auto val)
1050
1.16M
{
1051
1.16M
    m->pipeline->write(std::to_string(val));
1052
1.16M
    return *this;
1053
1.16M
}
_ZN10QPDFWriter5writeITkNSt3__18integralEiEERS_T_
Line
Count
Source
1050
576k
{
1051
576k
    m->pipeline->write(std::to_string(val));
1052
576k
    return *this;
1053
576k
}
_ZN10QPDFWriter5writeITkNSt3__18integralExEERS_T_
Line
Count
Source
1050
369k
{
1051
369k
    m->pipeline->write(std::to_string(val));
1052
369k
    return *this;
1053
369k
}
_ZN10QPDFWriter5writeITkNSt3__18integralEmEERS_T_
Line
Count
Source
1050
125k
{
1051
125k
    m->pipeline->write(std::to_string(val));
1052
125k
    return *this;
1053
125k
}
_ZN10QPDFWriter5writeITkNSt3__18integralEjEERS_T_
Line
Count
Source
1050
92.6k
{
1051
92.6k
    m->pipeline->write(std::to_string(val));
1052
92.6k
    return *this;
1053
92.6k
}
1054
1055
QPDFWriter&
1056
QPDFWriter::write(size_t count, char c)
1057
49.1k
{
1058
49.1k
    m->pipeline->write(count, c);
1059
49.1k
    return *this;
1060
49.1k
}
1061
1062
QPDFWriter&
1063
QPDFWriter::write_name(std::string const& str)
1064
673k
{
1065
673k
    m->pipeline->write(Name::normalize(str));
1066
673k
    return *this;
1067
673k
}
1068
1069
QPDFWriter&
1070
QPDFWriter::write_string(std::string const& str, bool force_binary)
1071
31.1k
{
1072
31.1k
    m->pipeline->write(QPDF_String(str).unparse(force_binary));
1073
31.1k
    return *this;
1074
31.1k
}
1075
1076
template <typename... Args>
1077
QPDFWriter&
1078
QPDFWriter::write_qdf(Args&&... args)
1079
834k
{
1080
834k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
834k
    return *this;
1084
834k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1079
541k
{
1080
541k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
541k
    return *this;
1084
541k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [3]>(char const (&) [3])
Line
Count
Source
1079
245k
{
1080
245k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
245k
    return *this;
1084
245k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1079
31.4k
{
1080
31.4k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
31.4k
    return *this;
1084
31.4k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [11]>(char const (&) [11])
Line
Count
Source
1079
16.0k
{
1080
16.0k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
16.0k
    return *this;
1084
16.0k
}
1085
1086
template <typename... Args>
1087
QPDFWriter&
1088
QPDFWriter::write_no_qdf(Args&&... args)
1089
337k
{
1090
337k
    if (!m->qdf_mode) {
1091
337k
        m->pipeline->write(std::forward<Args>(args)...);
1092
337k
    }
1093
337k
    return *this;
1094
337k
}
QPDFWriter& QPDFWriter::write_no_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1089
306k
{
1090
306k
    if (!m->qdf_mode) {
1091
306k
        m->pipeline->write(std::forward<Args>(args)...);
1092
306k
    }
1093
306k
    return *this;
1094
306k
}
QPDFWriter& QPDFWriter::write_no_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1089
31.4k
{
1090
31.4k
    if (!m->qdf_mode) {
1091
31.4k
        m->pipeline->write(std::forward<Args>(args)...);
1092
31.4k
    }
1093
31.4k
    return *this;
1094
31.4k
}
1095
1096
void
1097
QPDFWriter::adjustAESStreamLength(size_t& length)
1098
71.0k
{
1099
71.0k
    if (m->encryption && !m->cur_data_key.empty() && m->encrypt_use_aes) {
1100
        // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16.  It will
1101
        // also be prepended by 16 bits of random data.
1102
0
        length += 32 - (length & 0xf);
1103
0
    }
1104
71.0k
}
1105
1106
QPDFWriter&
1107
QPDFWriter::write_encrypted(std::string_view str)
1108
70.7k
{
1109
70.7k
    if (!(m->encryption && !m->cur_data_key.empty())) {
1110
70.7k
        write(str);
1111
70.7k
    } else if (m->encrypt_use_aes) {
1112
0
        write(pl::pipe<Pl_AES_PDF>(str, true, m->cur_data_key));
1113
0
    } else {
1114
0
        write(pl::pipe<Pl_RC4>(str, m->cur_data_key));
1115
0
    }
1116
1117
70.7k
    return *this;
1118
70.7k
}
1119
1120
void
1121
QPDFWriter::computeDeterministicIDData()
1122
7.80k
{
1123
7.80k
    if (!m->id2.empty()) {
1124
        // Can't happen in the code
1125
0
        throw std::logic_error(
1126
0
            "Deterministic ID computation enabled after ID generation has already occurred.");
1127
0
    }
1128
7.80k
    qpdf_assert_debug(m->deterministic_id_data.empty());
1129
7.80k
    m->deterministic_id_data = m->pipeline_stack.hex_digest();
1130
7.80k
}
1131
1132
int
1133
QPDFWriter::openObject(int objid)
1134
172k
{
1135
172k
    if (objid == 0) {
1136
0
        objid = m->next_objid++;
1137
0
    }
1138
172k
    m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount());
1139
172k
    write(objid).write(" 0 obj\n");
1140
172k
    return objid;
1141
172k
}
1142
1143
void
1144
QPDFWriter::closeObject(int objid)
1145
171k
{
1146
    // Write a newline before endobj as it makes the file easier to repair.
1147
171k
    write("\nendobj\n").write_qdf("\n");
1148
171k
    auto& new_obj = m->new_obj[objid];
1149
171k
    new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset();
1150
171k
}
1151
1152
void
1153
QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen og)
1154
74.2k
{
1155
74.2k
    int objid = og.getObj();
1156
74.2k
    if ((og.getGen() != 0) || (!m->object_stream_to_objects.contains(objid))) {
1157
        // This is not an object stream.
1158
65.5k
        return;
1159
65.5k
    }
1160
1161
    // Reserve numbers for the objects that belong to this object stream.
1162
81.7k
    for (auto const& iter: m->object_stream_to_objects[objid]) {
1163
81.7k
        m->obj[iter].renumber = m->next_objid++;
1164
81.7k
    }
1165
8.77k
}
1166
1167
void
1168
QPDFWriter::enqueueObject(QPDFObjectHandle object)
1169
74.0k
{
1170
74.0k
    if (object.isIndirect()) {
1171
        // This owner check can only be done for indirect objects. It is possible for a direct
1172
        // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle from
1173
        // one file was insert into another file without copying. Doing that is safe even if the
1174
        // original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from its owner.
1175
74.0k
        if (object.getOwningQPDF() != &(m->pdf)) {
1176
0
            QTC::TC("qpdf", "QPDFWriter foreign object");
1177
0
            throw std::logic_error(
1178
0
                "QPDFObjectHandle from different QPDF found while writing.  Use "
1179
0
                "QPDF::copyForeignObject to add objects from another file.");
1180
0
        }
1181
1182
74.0k
        if (m->qdf_mode && object.isStreamOfType("/XRef")) {
1183
            // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so
1184
            // will confuse fix-qdf, which expects to see only one XRef stream at the end of the
1185
            // file. This case can occur when creating a QDF from a file with object streams when
1186
            // preserving unreferenced objects since the old cross reference streams are not
1187
            // actually referenced by object number.
1188
0
            QTC::TC("qpdf", "QPDFWriter ignore XRef in qdf mode");
1189
0
            return;
1190
0
        }
1191
1192
74.0k
        QPDFObjGen og = object.getObjGen();
1193
74.0k
        auto& obj = m->obj[og];
1194
1195
74.0k
        if (obj.renumber == 0) {
1196
73.5k
            if (obj.object_stream > 0) {
1197
                // This is in an object stream.  Don't process it here.  Instead, enqueue the object
1198
                // stream.  Object streams always have generation 0.
1199
                // Detect loops by storing invalid object ID -1, which will get overwritten later.
1200
0
                obj.renumber = -1;
1201
0
                enqueueObject(m->pdf.getObject(obj.object_stream, 0));
1202
73.5k
            } else {
1203
73.5k
                m->object_queue.push_back(object);
1204
73.5k
                obj.renumber = m->next_objid++;
1205
1206
73.5k
                if ((og.getGen() == 0) && m->object_stream_to_objects.contains(og.getObj())) {
1207
                    // For linearized files, uncompressed objects go at end, and we take care of
1208
                    // assigning numbers to them elsewhere.
1209
8.65k
                    if (!m->linearized) {
1210
0
                        assignCompressedObjectNumbers(og);
1211
0
                    }
1212
64.8k
                } else if ((!m->direct_stream_lengths) && object.isStream()) {
1213
                    // reserve next object ID for length
1214
0
                    ++m->next_objid;
1215
0
                }
1216
73.5k
            }
1217
73.5k
        } else if (obj.renumber == -1) {
1218
            // This can happen if a specially constructed file indicates that an object stream is
1219
            // inside itself.
1220
0
        }
1221
74.0k
        return;
1222
74.0k
    } else if (!m->linearized) {
1223
0
        if (object.isArray()) {
1224
0
            for (auto& item: object.as_array()) {
1225
0
                enqueueObject(item);
1226
0
            }
1227
0
        } else if (auto d = object.as_dictionary()) {
1228
0
            for (auto const& item: d) {
1229
0
                if (!item.second.null()) {
1230
0
                    enqueueObject(item.second);
1231
0
                }
1232
0
            }
1233
0
        }
1234
0
    } else {
1235
        // ignore
1236
0
    }
1237
74.0k
}
1238
1239
void
1240
QPDFWriter::unparseChild(QPDFObjectHandle const& child, size_t level, int flags)
1241
2.19M
{
1242
2.19M
    if (!m->linearized) {
1243
0
        enqueueObject(child);
1244
0
    }
1245
2.19M
    if (child.isIndirect()) {
1246
338k
        write(m->obj[child].renumber).write(" 0 R");
1247
1.85M
    } else {
1248
1.85M
        unparseObject(child, level, flags);
1249
1.85M
    }
1250
2.19M
}
1251
1252
void
1253
QPDFWriter::writeTrailer(
1254
    trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass)
1255
31.5k
{
1256
31.5k
    QPDFObjectHandle trailer = getTrimmedTrailer();
1257
31.5k
    if (xref_stream) {
1258
30.8k
        m->cur_data_key.clear();
1259
30.8k
    } else {
1260
706
        write("trailer <<");
1261
706
    }
1262
31.5k
    write_qdf("\n");
1263
31.5k
    if (which == t_lin_second) {
1264
15.5k
        write(" /Size ").write(size);
1265
16.0k
    } else {
1266
32.1k
        for (auto const& [key, value]: trailer.as_dictionary()) {
1267
32.1k
            if (value.null()) {
1268
5.94k
                continue;
1269
5.94k
            }
1270
26.2k
            write_qdf("  ").write_no_qdf(" ").write_name(key).write(" ");
1271
26.2k
            if (key == "/Size") {
1272
1.84k
                write(size);
1273
1.84k
                if (which == t_lin_first) {
1274
1.84k
                    write(" /Prev ");
1275
1.84k
                    qpdf_offset_t pos = m->pipeline->getCount();
1276
1.84k
                    write(prev).write(QIntC::to_size(pos - m->pipeline->getCount() + 21), ' ');
1277
1.84k
                }
1278
24.3k
            } else {
1279
24.3k
                unparseChild(value, 1, 0);
1280
24.3k
            }
1281
26.2k
            write_qdf("\n");
1282
26.2k
        }
1283
16.0k
    }
1284
1285
    // Write ID
1286
31.5k
    write_qdf(" ").write(" /ID [");
1287
31.5k
    if (linearization_pass == 1) {
1288
16.0k
        std::string original_id1 = getOriginalID1();
1289
16.0k
        if (original_id1.empty()) {
1290
15.3k
            write("<00000000000000000000000000000000>");
1291
15.3k
        } else {
1292
            // Write a string of zeroes equal in length to the representation of the original ID.
1293
            // While writing the original ID would have the same number of bytes, it would cause a
1294
            // change to the deterministic ID generated by older versions of the software that
1295
            // hard-coded the length of the ID to 16 bytes.
1296
683
            size_t len = QPDF_String(original_id1).unparse(true).length() - 2;
1297
683
            write("<").write(len, '0').write(">");
1298
683
        }
1299
16.0k
        write("<00000000000000000000000000000000>");
1300
16.0k
    } else {
1301
15.5k
        if (linearization_pass == 0 && m->deterministic_id) {
1302
0
            computeDeterministicIDData();
1303
0
        }
1304
15.5k
        generateID(m->encryption.get());
1305
15.5k
        write_string(m->id1, true).write_string(m->id2, true);
1306
15.5k
    }
1307
31.5k
    write("]");
1308
1309
31.5k
    if (which != t_lin_second) {
1310
        // Write reference to encryption dictionary
1311
15.8k
        if (m->encryption) {
1312
0
            write(" /Encrypt ").write(m->encryption_dict_objid).write(" 0 R");
1313
0
        }
1314
15.8k
    }
1315
1316
31.5k
    write_qdf("\n>>").write_no_qdf(" >>");
1317
31.5k
}
1318
1319
bool
1320
QPDFWriter::willFilterStream(
1321
    QPDFObjectHandle stream,
1322
    bool& compress_stream,  // out only
1323
    bool& is_root_metadata, // out only
1324
    std::string* stream_data)
1325
72.4k
{
1326
72.4k
    compress_stream = false;
1327
72.4k
    is_root_metadata = false;
1328
1329
72.4k
    QPDFObjGen old_og = stream.getObjGen();
1330
72.4k
    QPDFObjectHandle stream_dict = stream.getDict();
1331
1332
72.4k
    if (stream.isRootMetadata()) {
1333
183
        is_root_metadata = true;
1334
183
    }
1335
72.4k
    bool filter = stream.isDataModified() || m->compress_streams || m->stream_decode_level;
1336
72.4k
    bool filter_on_write = stream.getFilterOnWrite();
1337
72.4k
    if (!filter_on_write) {
1338
15.2k
        QTC::TC("qpdf", "QPDFWriter getFilterOnWrite false");
1339
15.2k
        filter = false;
1340
15.2k
    }
1341
72.4k
    if (filter_on_write && m->compress_streams) {
1342
        // Don't filter if the stream is already compressed with FlateDecode. This way we don't make
1343
        // it worse if the original file used a better Flate algorithm, and we don't spend time and
1344
        // CPU cycles uncompressing and recompressing stuff. This can be overridden with
1345
        // setRecompressFlate(true).
1346
57.1k
        QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
1347
57.1k
        if (!m->recompress_flate && !stream.isDataModified() && filter_obj.isName() &&
1348
57.1k
            (filter_obj.getName() == "/FlateDecode" || filter_obj.getName() == "/Fl")) {
1349
12.8k
            QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode");
1350
12.8k
            filter = false;
1351
12.8k
        }
1352
57.1k
    }
1353
72.4k
    bool normalize = false;
1354
72.4k
    bool uncompress = false;
1355
72.4k
    if (filter_on_write && is_root_metadata &&
1356
72.4k
        (!m->encryption || !m->encryption->getEncryptMetadata())) {
1357
183
        QTC::TC("qpdf", "QPDFWriter not compressing metadata");
1358
183
        filter = true;
1359
183
        compress_stream = false;
1360
183
        uncompress = true;
1361
72.2k
    } else if (filter_on_write && m->normalize_content && m->normalized_streams.contains(old_og)) {
1362
0
        normalize = true;
1363
0
        filter = true;
1364
72.2k
    } else if (filter_on_write && filter && m->compress_streams) {
1365
44.1k
        compress_stream = true;
1366
44.1k
        QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");
1367
44.1k
    }
1368
1369
    // Disable compression for empty streams to improve compatibility
1370
72.4k
    if (stream_dict.getKey("/Length").isInteger() &&
1371
72.4k
        stream_dict.getKey("/Length").getIntValue() == 0) {
1372
3.31k
        filter = true;
1373
3.31k
        compress_stream = false;
1374
3.31k
    }
1375
1376
72.4k
    bool filtered = false;
1377
82.8k
    for (bool first_attempt: {true, false}) {
1378
82.8k
        auto pp_stream_data = stream_data ? m->pipeline_stack.activate(*stream_data)
1379
82.8k
                                          : m->pipeline_stack.activate(true);
1380
1381
82.8k
        try {
1382
82.8k
            filtered = stream.pipeStreamData(
1383
82.8k
                m->pipeline,
1384
82.8k
                !filter ? 0
1385
82.8k
                        : ((normalize ? qpdf_ef_normalize : 0) |
1386
46.4k
                           (compress_stream ? qpdf_ef_compress : 0)),
1387
82.8k
                !filter ? qpdf_dl_none : (uncompress ? qpdf_dl_all : m->stream_decode_level),
1388
82.8k
                false,
1389
82.8k
                first_attempt);
1390
82.8k
            if (filter && !filtered) {
1391
                // Try again
1392
10.3k
                filter = false;
1393
10.3k
                stream.setFilterOnWrite(false);
1394
72.4k
            } else {
1395
72.4k
                break;
1396
72.4k
            }
1397
82.8k
        } catch (std::runtime_error& e) {
1398
94
            if (filter && first_attempt) {
1399
69
                stream.warn("error while getting stream data: "s + e.what());
1400
69
                stream.warn("qpdf will attempt to write the damaged stream unchanged");
1401
69
                filter = false;
1402
69
                stream.setFilterOnWrite(false);
1403
69
                continue;
1404
69
            }
1405
25
            throw std::runtime_error(
1406
25
                "error while getting stream data for " + stream.unparse() + ": " + e.what());
1407
94
        }
1408
10.3k
        if (stream_data) {
1409
2.08k
            stream_data->clear();
1410
2.08k
        }
1411
10.3k
    }
1412
72.4k
    if (!filtered) {
1413
36.1k
        compress_stream = false;
1414
36.1k
    }
1415
72.4k
    return filtered;
1416
72.4k
}
1417
1418
void
1419
QPDFWriter::unparseObject(
1420
    QPDFObjectHandle object, size_t level, int flags, size_t stream_length, bool compress)
1421
2.15M
{
1422
2.15M
    QPDFObjGen old_og = object.getObjGen();
1423
2.15M
    int child_flags = flags & ~f_stream;
1424
    // For non-qdf, "indent" and "indent_large" are a single space between tokens. For qdf, they
1425
    // include the preceding newline.
1426
2.15M
    std::string indent_large = " ";
1427
2.15M
    if (m->qdf_mode) {
1428
0
        indent_large.append(2 * (level + 1), ' ');
1429
0
        indent_large[0] = '\n';
1430
0
    }
1431
2.15M
    std::string_view indent{indent_large.data(), m->qdf_mode ? indent_large.size() - 2 : 1};
1432
1433
2.15M
    if (auto const tc = object.getTypeCode(); tc == ::ot_array) {
1434
        // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the
1435
        // [ in the /H key of the linearization parameter dictionary.  We'll do this unconditionally
1436
        // for all arrays because it looks nicer and doesn't make the files that much bigger.
1437
109k
        write("[");
1438
1.52M
        for (auto const& item: object.as_array()) {
1439
1.52M
            write(indent_large);
1440
1.52M
            unparseChild(item, level + 1, child_flags);
1441
1.52M
        }
1442
109k
        write(indent).write("]");
1443
2.04M
    } else if (tc == ::ot_dictionary) {
1444
        // Handle special cases for specific dictionaries.
1445
1446
221k
        if (old_og == m->root_og) {
1447
            // Extensions dictionaries.
1448
1449
            // We have one of several cases:
1450
            //
1451
            // * We need ADBE
1452
            //    - We already have Extensions
1453
            //       - If it has the right ADBE, preserve it
1454
            //       - Otherwise, replace ADBE
1455
            //    - We don't have Extensions: create one from scratch
1456
            // * We don't want ADBE
1457
            //    - We already have Extensions
1458
            //       - If it only has ADBE, remove it
1459
            //       - If it has other things, keep those and remove ADBE
1460
            //    - We have no extensions: no action required
1461
            //
1462
            // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE
1463
            // dictionary, so we can modify in place.
1464
1465
15.8k
            auto extensions = object.getKey("/Extensions");
1466
15.8k
            const bool has_extensions = extensions.isDictionary();
1467
15.8k
            const bool need_extensions_adbe = m->final_extension_level > 0;
1468
1469
15.8k
            if (has_extensions || need_extensions_adbe) {
1470
                // Make a shallow copy of this object so we can modify it safely without affecting
1471
                // the original. This code has logic to skip certain keys in agreement with
1472
                // prepareFileForWrite and with skip_stream_parameters so that replacing them
1473
                // doesn't leave unreferenced objects in the output. We can use unsafeShallowCopy
1474
                // here because all we are doing is removing or replacing top-level keys.
1475
1.36k
                object = object.unsafeShallowCopy();
1476
1.36k
                if (!has_extensions) {
1477
0
                    extensions = QPDFObjectHandle();
1478
0
                }
1479
1480
1.36k
                const bool have_extensions_adbe = extensions && extensions.hasKey("/ADBE");
1481
1.36k
                const bool have_extensions_other =
1482
1.36k
                    extensions && extensions.getKeys().size() > (have_extensions_adbe ? 1u : 0u);
1483
1484
1.36k
                if (need_extensions_adbe) {
1485
26
                    if (!(have_extensions_other || have_extensions_adbe)) {
1486
                        // We need Extensions and don't have it.  Create it here.
1487
0
                        QTC::TC("qpdf", "QPDFWriter create Extensions", m->qdf_mode ? 0 : 1);
1488
0
                        extensions = object.replaceKeyAndGetNew(
1489
0
                            "/Extensions", QPDFObjectHandle::newDictionary());
1490
0
                    }
1491
1.33k
                } else if (!have_extensions_other) {
1492
                    // We have Extensions dictionary and don't want one.
1493
969
                    if (have_extensions_adbe) {
1494
920
                        QTC::TC("qpdf", "QPDFWriter remove existing Extensions");
1495
920
                        object.removeKey("/Extensions");
1496
920
                        extensions = QPDFObjectHandle(); // uninitialized
1497
920
                    }
1498
969
                }
1499
1500
1.36k
                if (extensions) {
1501
440
                    QTC::TC("qpdf", "QPDFWriter preserve Extensions");
1502
440
                    QPDFObjectHandle adbe = extensions.getKey("/ADBE");
1503
440
                    if (adbe.isDictionary() &&
1504
440
                        adbe.getKey("/BaseVersion").isNameAndEquals("/" + m->final_pdf_version) &&
1505
440
                        adbe.getKey("/ExtensionLevel").isInteger() &&
1506
440
                        (adbe.getKey("/ExtensionLevel").getIntValue() ==
1507
18
                         m->final_extension_level)) {
1508
14
                        QTC::TC("qpdf", "QPDFWriter preserve ADBE");
1509
426
                    } else {
1510
426
                        if (need_extensions_adbe) {
1511
12
                            extensions.replaceKey(
1512
12
                                "/ADBE",
1513
12
                                QPDFObjectHandle::parse(
1514
12
                                    "<< /BaseVersion /" + m->final_pdf_version +
1515
12
                                    " /ExtensionLevel " + std::to_string(m->final_extension_level) +
1516
12
                                    " >>"));
1517
414
                        } else {
1518
414
                            QTC::TC("qpdf", "QPDFWriter remove ADBE");
1519
414
                            extensions.removeKey("/ADBE");
1520
414
                        }
1521
426
                    }
1522
440
                }
1523
1.36k
            }
1524
15.8k
        }
1525
1526
        // Stream dictionaries.
1527
1528
221k
        if (flags & f_stream) {
1529
            // Suppress /Length since we will write it manually
1530
1531
            // Make a shallow copy of this object so we can modify it safely without affecting the
1532
            // original. This code has logic to skip certain keys in agreement with
1533
            // prepareFileForWrite and with skip_stream_parameters so that replacing them doesn't
1534
            // leave unreferenced objects in the output. We can use unsafeShallowCopy here because
1535
            // all we are doing is removing or replacing top-level keys.
1536
46.9k
            object = object.unsafeShallowCopy();
1537
1538
46.9k
            object.removeKey("/Length");
1539
1540
            // If /DecodeParms is an empty list, remove it.
1541
46.9k
            if (object.getKey("/DecodeParms").empty()) {
1542
44.6k
                object.removeKey("/DecodeParms");
1543
44.6k
            }
1544
1545
46.9k
            if (flags & f_filtered) {
1546
                // We will supply our own filter and decode parameters.
1547
23.4k
                object.removeKey("/Filter");
1548
23.4k
                object.removeKey("/DecodeParms");
1549
23.4k
            } else {
1550
                // Make sure, no matter what else we have, that we don't have /Crypt in the output
1551
                // filters.
1552
23.4k
                QPDFObjectHandle filter = object.getKey("/Filter");
1553
23.4k
                QPDFObjectHandle decode_parms = object.getKey("/DecodeParms");
1554
23.4k
                if (filter.isOrHasName("/Crypt")) {
1555
418
                    if (filter.isName()) {
1556
23
                        object.removeKey("/Filter");
1557
23
                        object.removeKey("/DecodeParms");
1558
395
                    } else {
1559
395
                        int idx = 0;
1560
1.26k
                        for (auto const& item: filter.as_array()) {
1561
1.26k
                            if (item.isNameAndEquals("/Crypt")) {
1562
                                // If filter is an array, then the code in QPDF_Stream has already
1563
                                // verified that DecodeParms and Filters are arrays of the same
1564
                                // length, but if they weren't for some reason, eraseItem does type
1565
                                // and bounds checking. Fuzzing tells us that this can actually
1566
                                // happen.
1567
395
                                filter.eraseItem(idx);
1568
395
                                decode_parms.eraseItem(idx);
1569
395
                                break;
1570
395
                            }
1571
873
                            ++idx;
1572
873
                        }
1573
395
                    }
1574
418
                }
1575
23.4k
            }
1576
46.9k
        }
1577
1578
221k
        write("<<");
1579
1580
785k
        for (auto const& [key, value]: object.as_dictionary()) {
1581
785k
            if (!value.null()) {
1582
646k
                write(indent_large).write_name(key).write(" ");
1583
646k
                if (key == "/Contents" && object.isDictionaryOfType("/Sig") &&
1584
646k
                    object.hasKey("/ByteRange")) {
1585
46
                    QTC::TC("qpdf", "QPDFWriter no encryption sig contents");
1586
46
                    unparseChild(value, level + 1, child_flags | f_hex_string | f_no_encryption);
1587
646k
                } else {
1588
646k
                    unparseChild(value, level + 1, child_flags);
1589
646k
                }
1590
646k
            }
1591
785k
        }
1592
1593
221k
        if (flags & f_stream) {
1594
46.6k
            write(indent_large).write("/Length ");
1595
1596
46.6k
            if (m->direct_stream_lengths) {
1597
46.6k
                write(stream_length);
1598
46.6k
            } else {
1599
0
                write(m->cur_stream_length_id).write(" 0 R");
1600
0
            }
1601
46.6k
            if (compress && (flags & f_filtered)) {
1602
23.1k
                write(indent_large).write("/Filter /FlateDecode");
1603
23.1k
            }
1604
46.6k
        }
1605
1606
221k
        write(indent).write(">>");
1607
1.82M
    } else if (tc == ::ot_stream) {
1608
        // Write stream data to a buffer.
1609
46.9k
        if (!m->direct_stream_lengths) {
1610
0
            m->cur_stream_length_id = m->obj[old_og].renumber + 1;
1611
0
        }
1612
1613
46.9k
        flags |= f_stream;
1614
46.9k
        bool compress_stream = false;
1615
46.9k
        bool is_metadata = false;
1616
46.9k
        std::string stream_data;
1617
46.9k
        if (willFilterStream(object, compress_stream, is_metadata, &stream_data)) {
1618
23.4k
            flags |= f_filtered;
1619
23.4k
        }
1620
46.9k
        QPDFObjectHandle stream_dict = object.getDict();
1621
1622
46.9k
        m->cur_stream_length = stream_data.size();
1623
46.9k
        if (is_metadata && m->encryption && !m->encryption->getEncryptMetadata()) {
1624
            // Don't encrypt stream data for the metadata stream
1625
0
            m->cur_data_key.clear();
1626
0
        }
1627
46.9k
        adjustAESStreamLength(m->cur_stream_length);
1628
46.9k
        unparseObject(stream_dict, 0, flags, m->cur_stream_length, compress_stream);
1629
46.9k
        char last_char = stream_data.empty() ? '\0' : stream_data.back();
1630
46.9k
        write("\nstream\n").write_encrypted(stream_data);
1631
46.9k
        m->added_newline = m->newline_before_endstream || (m->qdf_mode && last_char != '\n');
1632
46.9k
        write(m->added_newline ? "\nendstream" : "endstream");
1633
1.77M
    } else if (tc == ::ot_string) {
1634
46.1k
        std::string val;
1635
46.1k
        if (m->encryption && !(flags & f_in_ostream) && !(flags & f_no_encryption) &&
1636
46.1k
            !m->cur_data_key.empty()) {
1637
0
            val = object.getStringValue();
1638
0
            if (m->encrypt_use_aes) {
1639
0
                Pl_Buffer bufpl("encrypted string");
1640
0
                Pl_AES_PDF pl("aes encrypt string", &bufpl, true, m->cur_data_key);
1641
0
                pl.writeString(val);
1642
0
                pl.finish();
1643
0
                val = QPDF_String(bufpl.getString()).unparse(true);
1644
0
            } else {
1645
0
                auto tmp_ph = QUtil::make_unique_cstr(val);
1646
0
                char* tmp = tmp_ph.get();
1647
0
                size_t vlen = val.length();
1648
0
                RC4 rc4(
1649
0
                    QUtil::unsigned_char_pointer(m->cur_data_key),
1650
0
                    QIntC::to_int(m->cur_data_key.length()));
1651
0
                auto data = QUtil::unsigned_char_pointer(tmp);
1652
0
                rc4.process(data, vlen, data);
1653
0
                val = QPDF_String(std::string(tmp, vlen)).unparse();
1654
0
            }
1655
46.1k
        } else if (flags & f_hex_string) {
1656
24
            val = QPDF_String(object.getStringValue()).unparse(true);
1657
46.1k
        } else {
1658
46.1k
            val = object.unparseResolved();
1659
46.1k
        }
1660
46.1k
        write(val);
1661
1.72M
    } else {
1662
1.72M
        write(object.unparseResolved());
1663
1.72M
    }
1664
2.15M
}
1665
1666
void
1667
QPDFWriter::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj)
1668
32.4k
{
1669
32.4k
    qpdf_assert_debug(first_obj > 0);
1670
32.4k
    bool is_first = true;
1671
32.4k
    auto id = std::to_string(first_obj) + ' ';
1672
296k
    for (auto& offset: offsets) {
1673
296k
        if (is_first) {
1674
32.4k
            is_first = false;
1675
263k
        } else {
1676
263k
            write_qdf("\n").write_no_qdf(" ");
1677
263k
        }
1678
296k
        write(id);
1679
296k
        util::increment(id, 1);
1680
296k
        write(offset);
1681
296k
    }
1682
32.4k
    write("\n");
1683
32.4k
}
1684
1685
void
1686
QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1687
16.2k
{
1688
    // Note: object might be null if this is a place-holder for an object stream that we are
1689
    // generating from scratch.
1690
1691
16.2k
    QPDFObjGen old_og = object.getObjGen();
1692
16.2k
    qpdf_assert_debug(old_og.getGen() == 0);
1693
16.2k
    int old_id = old_og.getObj();
1694
16.2k
    int new_stream_id = m->obj[old_og].renumber;
1695
1696
16.2k
    std::vector<qpdf_offset_t> offsets;
1697
16.2k
    qpdf_offset_t first = 0;
1698
1699
    // Generate stream itself.  We have to do this in two passes so we can calculate offsets in the
1700
    // first pass.
1701
16.2k
    std::string stream_buffer_pass1;
1702
16.2k
    std::string stream_buffer_pass2;
1703
16.2k
    int first_obj = -1;
1704
16.2k
    const bool compressed = m->compress_streams && !m->qdf_mode;
1705
16.2k
    {
1706
        // Pass 1
1707
16.2k
        auto pp_ostream_pass1 = m->pipeline_stack.activate(stream_buffer_pass1);
1708
1709
16.2k
        int count = -1;
1710
148k
        for (auto const& obj: m->object_stream_to_objects[old_id]) {
1711
148k
            ++count;
1712
148k
            int new_obj = m->obj[obj].renumber;
1713
148k
            if (first_obj == -1) {
1714
16.2k
                first_obj = new_obj;
1715
16.2k
            }
1716
148k
            if (m->qdf_mode) {
1717
0
                write("%% Object stream: object ").write(new_obj).write(", index ").write(count);
1718
0
                if (!m->suppress_original_object_ids) {
1719
0
                    write("; original object ID: ").write(obj.getObj());
1720
                    // For compatibility, only write the generation if non-zero.  While object
1721
                    // streams only allow objects with generation 0, if we are generating object
1722
                    // streams, the old object could have a non-zero generation.
1723
0
                    if (obj.getGen() != 0) {
1724
0
                        QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
1725
0
                        write(" ").write(obj.getGen());
1726
0
                    }
1727
0
                }
1728
0
                write("\n");
1729
0
            }
1730
1731
148k
            offsets.push_back(m->pipeline->getCount());
1732
            // To avoid double-counting objects being written in object streams for progress
1733
            // reporting, decrement in pass 1.
1734
148k
            indicateProgress(true, false);
1735
1736
148k
            QPDFObjectHandle obj_to_write = m->pdf.getObject(obj);
1737
148k
            if (obj_to_write.isStream()) {
1738
                // This condition occurred in a fuzz input. Ideally we should block it at parse
1739
                // time, but it's not clear to me how to construct a case for this.
1740
0
                obj_to_write.warn("stream found inside object stream; treating as null");
1741
0
                obj_to_write = QPDFObjectHandle::newNull();
1742
0
            }
1743
148k
            writeObject(obj_to_write, count);
1744
1745
148k
            m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count);
1746
148k
        }
1747
16.2k
    }
1748
16.2k
    {
1749
        // Adjust offsets to skip over comment before first object
1750
16.2k
        first = offsets.at(0);
1751
148k
        for (auto& iter: offsets) {
1752
148k
            iter -= first;
1753
148k
        }
1754
1755
        // Take one pass at writing pairs of numbers so we can get their size information
1756
16.2k
        {
1757
16.2k
            auto pp_discard = m->pipeline_stack.activate(true);
1758
16.2k
            writeObjectStreamOffsets(offsets, first_obj);
1759
16.2k
            first += m->pipeline->getCount();
1760
16.2k
        }
1761
1762
        // Set up a stream to write the stream data into a buffer.
1763
16.2k
        auto pp_ostream = m->pipeline_stack.activate(stream_buffer_pass2);
1764
1765
16.2k
        writeObjectStreamOffsets(offsets, first_obj);
1766
16.2k
        write(stream_buffer_pass1);
1767
16.2k
        stream_buffer_pass1.clear();
1768
16.2k
        stream_buffer_pass1.shrink_to_fit();
1769
16.2k
        if (compressed) {
1770
16.2k
            stream_buffer_pass2 = pl::pipe<Pl_Flate>(stream_buffer_pass2, Pl_Flate::a_deflate);
1771
16.2k
        }
1772
16.2k
    }
1773
1774
    // Write the object
1775
16.2k
    openObject(new_stream_id);
1776
16.2k
    setDataKey(new_stream_id);
1777
16.2k
    write("<<").write_qdf("\n ").write(" /Type /ObjStm").write_qdf("\n ");
1778
16.2k
    size_t length = stream_buffer_pass2.size();
1779
16.2k
    adjustAESStreamLength(length);
1780
16.2k
    write(" /Length ").write(length).write_qdf("\n ");
1781
16.2k
    if (compressed) {
1782
16.2k
        write(" /Filter /FlateDecode");
1783
16.2k
    }
1784
16.2k
    write(" /N ").write(offsets.size()).write_qdf("\n ").write(" /First ").write(first);
1785
16.2k
    if (!object.isNull()) {
1786
        // If the original object has an /Extends key, preserve it.
1787
0
        QPDFObjectHandle dict = object.getDict();
1788
0
        QPDFObjectHandle extends = dict.getKey("/Extends");
1789
0
        if (extends.isIndirect()) {
1790
0
            QTC::TC("qpdf", "QPDFWriter copy Extends");
1791
0
            write_qdf("\n ").write(" /Extends ");
1792
0
            unparseChild(extends, 1, f_in_ostream);
1793
0
        }
1794
0
    }
1795
16.2k
    write_qdf("\n").write_no_qdf(" ").write(">>\nstream\n").write_encrypted(stream_buffer_pass2);
1796
16.2k
    if (m->encryption) {
1797
0
        QTC::TC("qpdf", "QPDFWriter encrypt object stream");
1798
0
    }
1799
16.2k
    write(m->newline_before_endstream ? "\nendstream" : "endstream");
1800
16.2k
    m->cur_data_key.clear();
1801
16.2k
    closeObject(new_stream_id);
1802
16.2k
}
1803
1804
void
1805
QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
1806
265k
{
1807
265k
    QPDFObjGen old_og = object.getObjGen();
1808
1809
265k
    if (object_stream_index == -1 && old_og.getGen() == 0 &&
1810
265k
        m->object_stream_to_objects.contains(old_og.getObj())) {
1811
16.2k
        writeObjectStream(object);
1812
16.2k
        return;
1813
16.2k
    }
1814
1815
249k
    indicateProgress(false, false);
1816
249k
    auto new_id = m->obj[old_og].renumber;
1817
249k
    if (m->qdf_mode) {
1818
0
        if (m->page_object_to_seq.contains(old_og)) {
1819
0
            write("%% Page ").write(m->page_object_to_seq[old_og]).write("\n");
1820
0
        }
1821
0
        if (m->contents_to_page_seq.contains(old_og)) {
1822
0
            write("%% Contents for page ").write(m->contents_to_page_seq[old_og]).write("\n");
1823
0
        }
1824
0
    }
1825
249k
    if (object_stream_index == -1) {
1826
101k
        if (m->qdf_mode && (!m->suppress_original_object_ids)) {
1827
0
            write("%% Original object ID: ").write(object.getObjGen().unparse(' ')).write("\n");
1828
0
        }
1829
101k
        openObject(new_id);
1830
101k
        setDataKey(new_id);
1831
101k
        unparseObject(object, 0, 0);
1832
101k
        m->cur_data_key.clear();
1833
101k
        closeObject(new_id);
1834
148k
    } else {
1835
148k
        unparseObject(object, 0, f_in_ostream);
1836
148k
        write("\n");
1837
148k
    }
1838
1839
249k
    if (!m->direct_stream_lengths && object.isStream()) {
1840
0
        if (m->qdf_mode) {
1841
0
            if (m->added_newline) {
1842
0
                write("%QDF: ignore_newline\n");
1843
0
            }
1844
0
        }
1845
0
        openObject(new_id + 1);
1846
0
        write(m->cur_stream_length);
1847
0
        closeObject(new_id + 1);
1848
0
    }
1849
249k
}
1850
1851
std::string
1852
QPDFWriter::getOriginalID1()
1853
23.8k
{
1854
23.8k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1855
23.8k
    if (trailer.hasKey("/ID")) {
1856
1.04k
        return trailer.getKey("/ID").getArrayItem(0).getStringValue();
1857
22.7k
    } else {
1858
22.7k
        return "";
1859
22.7k
    }
1860
23.8k
}
1861
1862
void
1863
QPDFWriter::generateID(bool encrypted)
1864
15.6k
{
1865
    // Generate the ID lazily so that we can handle the user's preference to use static or
1866
    // deterministic ID generation.
1867
1868
15.6k
    if (!m->id2.empty()) {
1869
7.75k
        return;
1870
7.75k
    }
1871
1872
7.85k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1873
1874
7.85k
    std::string result;
1875
1876
7.85k
    if (m->static_id) {
1877
        // For test suite use only...
1878
0
        static unsigned char tmp[] = {
1879
0
            0x31,
1880
0
            0x41,
1881
0
            0x59,
1882
0
            0x26,
1883
0
            0x53,
1884
0
            0x58,
1885
0
            0x97,
1886
0
            0x93,
1887
0
            0x23,
1888
0
            0x84,
1889
0
            0x62,
1890
0
            0x64,
1891
0
            0x33,
1892
0
            0x83,
1893
0
            0x27,
1894
0
            0x95,
1895
0
            0x00};
1896
0
        result = reinterpret_cast<char*>(tmp);
1897
7.85k
    } else {
1898
        // The PDF specification has guidelines for creating IDs, but it states clearly that the
1899
        // only thing that's really important is that it is very likely to be unique.  We can't
1900
        // really follow the guidelines in the spec exactly because we haven't written the file yet.
1901
        // This scheme should be fine though.  The deterministic ID case uses a digest of a
1902
        // sufficient portion of the file's contents such no two non-matching files would match in
1903
        // the subsets used for this computation.  Note that we explicitly omit the filename from
1904
        // the digest calculation for deterministic ID so that the same file converted with qpdf, in
1905
        // that case, would have the same ID regardless of the output file's name.
1906
1907
7.85k
        std::string seed;
1908
7.85k
        if (m->deterministic_id) {
1909
7.85k
            if (encrypted) {
1910
57
                throw std::runtime_error(
1911
57
                    "QPDFWriter: unable to generated a deterministic ID because the file to be "
1912
57
                    "written is encrypted (even though the file may not require a password)");
1913
57
            }
1914
7.80k
            if (m->deterministic_id_data.empty()) {
1915
0
                throw std::logic_error(
1916
0
                    "INTERNAL ERROR: QPDFWriter::generateID has no data for deterministic ID");
1917
0
            }
1918
7.80k
            seed += m->deterministic_id_data;
1919
7.80k
        } else {
1920
0
            seed += std::to_string(QUtil::get_current_time());
1921
0
            seed += m->filename;
1922
0
            seed += " ";
1923
0
        }
1924
7.80k
        seed += " QPDF ";
1925
7.80k
        if (trailer.hasKey("/Info")) {
1926
1.59k
            for (auto const& item: trailer.getKey("/Info").as_dictionary()) {
1927
1.59k
                if (item.second.isString()) {
1928
458
                    seed += " ";
1929
458
                    seed += item.second.getStringValue();
1930
458
                }
1931
1.59k
            }
1932
166
        }
1933
1934
7.80k
        MD5 m;
1935
7.80k
        m.encodeString(seed.c_str());
1936
7.80k
        MD5::Digest digest;
1937
7.80k
        m.digest(digest);
1938
7.80k
        result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest));
1939
7.80k
    }
1940
1941
    // If /ID already exists, follow the spec: use the original first word and generate a new second
1942
    // word.  Otherwise, we'll use the generated ID for both.
1943
1944
7.80k
    m->id2 = result;
1945
    // Note: keep /ID from old file even if --static-id was given.
1946
7.80k
    m->id1 = getOriginalID1();
1947
7.80k
    if (m->id1.empty()) {
1948
7.54k
        m->id1 = m->id2;
1949
7.54k
    }
1950
7.80k
}
1951
1952
void
1953
QPDFWriter::initializeSpecialStreams()
1954
0
{
1955
    // Mark all page content streams in case we are filtering or normalizing.
1956
0
    std::vector<QPDFObjectHandle> pages = m->pdf.getAllPages();
1957
0
    int num = 0;
1958
0
    for (auto& page: pages) {
1959
0
        m->page_object_to_seq[page.getObjGen()] = ++num;
1960
0
        QPDFObjectHandle contents = page.getKey("/Contents");
1961
0
        std::vector<QPDFObjGen> contents_objects;
1962
0
        if (contents.isArray()) {
1963
0
            int n = static_cast<int>(contents.size());
1964
0
            for (int i = 0; i < n; ++i) {
1965
0
                contents_objects.push_back(contents.getArrayItem(i).getObjGen());
1966
0
            }
1967
0
        } else if (contents.isStream()) {
1968
0
            contents_objects.push_back(contents.getObjGen());
1969
0
        }
1970
1971
0
        for (auto const& c: contents_objects) {
1972
0
            m->contents_to_page_seq[c] = num;
1973
0
            m->normalized_streams.insert(c);
1974
0
        }
1975
0
    }
1976
0
}
1977
1978
void
1979
QPDFWriter::preserveObjectStreams()
1980
0
{
1981
0
    auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
1982
    // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
1983
    // streams out of old objects that have generation numbers greater than zero. However in an
1984
    // existing PDF, all object stream objects and all objects in them must have generation 0
1985
    // because the PDF spec does not provide any way to do otherwise. This code filters out objects
1986
    // that are not allowed to be in object streams. In addition to removing objects that were
1987
    // erroneously included in object streams in the source PDF, it also prevents unreferenced
1988
    // objects from being included.
1989
0
    auto end = xref.cend();
1990
0
    m->obj.streams_empty = true;
1991
0
    if (m->preserve_unreferenced_objects) {
1992
0
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
1993
0
            if (iter->second.getType() == 2) {
1994
                // Pdf contains object streams.
1995
0
                QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
1996
0
                m->obj.streams_empty = false;
1997
0
                m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
1998
0
            }
1999
0
        }
2000
0
    } else {
2001
        // Start by scanning for first compressed object in case we don't have any object streams to
2002
        // process.
2003
0
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
2004
0
            if (iter->second.getType() == 2) {
2005
                // Pdf contains object streams.
2006
0
                QTC::TC("qpdf", "QPDFWriter preserve object streams");
2007
0
                m->obj.streams_empty = false;
2008
0
                auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
2009
                // The object pointed to by iter may be a previous generation, in which case it is
2010
                // removed by getCompressibleObjSet. We need to restart the loop (while the object
2011
                // table may contain multiple generations of an object).
2012
0
                for (iter = xref.cbegin(); iter != end; ++iter) {
2013
0
                    if (iter->second.getType() == 2) {
2014
0
                        auto id = static_cast<size_t>(iter->first.getObj());
2015
0
                        if (id < eligible.size() && eligible[id]) {
2016
0
                            m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
2017
0
                        } else {
2018
0
                            QTC::TC("qpdf", "QPDFWriter exclude from object stream");
2019
0
                        }
2020
0
                    }
2021
0
                }
2022
0
                return;
2023
0
            }
2024
0
        }
2025
0
    }
2026
0
}
2027
2028
void
2029
QPDFWriter::generateObjectStreams()
2030
9.72k
{
2031
    // Basic strategy: make a list of objects that can go into an object stream.  Then figure out
2032
    // how many object streams are needed so that we can distribute objects approximately evenly
2033
    // without having any object stream exceed 100 members.  We don't have to worry about linearized
2034
    // files here -- if the file is linearized, we take care of excluding things that aren't allowed
2035
    // here later.
2036
2037
    // This code doesn't do anything with /Extends.
2038
2039
9.72k
    std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf);
2040
9.72k
    size_t n_object_streams = (eligible.size() + 99U) / 100U;
2041
2042
9.72k
    initializeTables(2U * n_object_streams);
2043
9.72k
    if (n_object_streams == 0) {
2044
26
        m->obj.streams_empty = true;
2045
26
        return;
2046
26
    }
2047
9.69k
    size_t n_per = eligible.size() / n_object_streams;
2048
9.69k
    if (n_per * n_object_streams < eligible.size()) {
2049
120
        ++n_per;
2050
120
    }
2051
9.69k
    unsigned int n = 0;
2052
9.69k
    int cur_ostream = m->pdf.newIndirectNull().getObjectID();
2053
111k
    for (auto const& item: eligible) {
2054
111k
        if (n == n_per) {
2055
541
            QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
2056
541
            n = 0;
2057
            // Construct a new null object as the "original" object stream.  The rest of the code
2058
            // knows that this means we're creating the object stream from scratch.
2059
541
            cur_ostream = m->pdf.newIndirectNull().getObjectID();
2060
541
        }
2061
111k
        auto& obj = m->obj[item];
2062
111k
        obj.object_stream = cur_ostream;
2063
111k
        obj.gen = item.getGen();
2064
111k
        ++n;
2065
111k
    }
2066
9.69k
}
2067
2068
QPDFObjectHandle
2069
QPDFWriter::getTrimmedTrailer()
2070
31.5k
{
2071
    // Remove keys from the trailer that necessarily have to be replaced when writing the file.
2072
2073
31.5k
    QPDFObjectHandle trailer = m->pdf.getTrailer().unsafeShallowCopy();
2074
2075
    // Remove encryption keys
2076
31.5k
    trailer.removeKey("/ID");
2077
31.5k
    trailer.removeKey("/Encrypt");
2078
2079
    // Remove modification information
2080
31.5k
    trailer.removeKey("/Prev");
2081
2082
    // Remove all trailer keys that potentially come from a cross-reference stream
2083
31.5k
    trailer.removeKey("/Index");
2084
31.5k
    trailer.removeKey("/W");
2085
31.5k
    trailer.removeKey("/Length");
2086
31.5k
    trailer.removeKey("/Filter");
2087
31.5k
    trailer.removeKey("/DecodeParms");
2088
31.5k
    trailer.removeKey("/Type");
2089
31.5k
    trailer.removeKey("/XRefStm");
2090
2091
31.5k
    return trailer;
2092
31.5k
}
2093
2094
// Make document extension level information direct as required by the spec.
2095
void
2096
QPDFWriter::prepareFileForWrite()
2097
9.55k
{
2098
9.55k
    m->pdf.fixDanglingReferences();
2099
9.55k
    auto root = m->pdf.getRoot();
2100
9.55k
    auto oh = root.getKey("/Extensions");
2101
9.55k
    if (oh.isDictionary()) {
2102
723
        const bool extensions_indirect = oh.isIndirect();
2103
723
        if (extensions_indirect) {
2104
239
            QTC::TC("qpdf", "QPDFWriter make Extensions direct");
2105
239
            oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy());
2106
239
        }
2107
723
        if (oh.hasKey("/ADBE")) {
2108
543
            auto adbe = oh.getKey("/ADBE");
2109
543
            if (adbe.isIndirect()) {
2110
138
                QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1);
2111
138
                adbe.makeDirect();
2112
138
                oh.replaceKey("/ADBE", adbe);
2113
138
            }
2114
543
        }
2115
723
    }
2116
9.55k
}
2117
2118
void
2119
QPDFWriter::initializeTables(size_t extra)
2120
9.68k
{
2121
9.68k
    auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra;
2122
9.68k
    m->obj.resize(size);
2123
9.68k
    m->new_obj.resize(size);
2124
9.68k
}
2125
2126
void
2127
QPDFWriter::doWriteSetup()
2128
9.78k
{
2129
9.78k
    if (m->did_write_setup) {
2130
0
        return;
2131
0
    }
2132
9.78k
    m->did_write_setup = true;
2133
2134
    // Do preliminary setup
2135
2136
9.78k
    if (m->linearized) {
2137
9.78k
        m->qdf_mode = false;
2138
9.78k
    }
2139
2140
9.78k
    if (m->pclm) {
2141
0
        m->stream_decode_level = qpdf_dl_none;
2142
0
        m->compress_streams = false;
2143
0
        m->encryption = nullptr;
2144
0
    }
2145
2146
9.78k
    if (m->qdf_mode) {
2147
0
        if (!m->normalize_content_set) {
2148
0
            m->normalize_content = true;
2149
0
        }
2150
0
        if (!m->compress_streams_set) {
2151
0
            m->compress_streams = false;
2152
0
        }
2153
0
        if (!m->stream_decode_level_set) {
2154
0
            m->stream_decode_level = qpdf_dl_generalized;
2155
0
        }
2156
0
    }
2157
2158
9.78k
    if (m->encryption) {
2159
        // Encryption has been explicitly set
2160
0
        m->preserve_encryption = false;
2161
9.78k
    } else if (m->normalize_content || !m->compress_streams || m->pclm || m->qdf_mode) {
2162
        // Encryption makes looking at contents pretty useless.  If the user explicitly encrypted
2163
        // though, we still obey that.
2164
0
        m->preserve_encryption = false;
2165
0
    }
2166
2167
9.78k
    if (m->preserve_encryption) {
2168
9.78k
        copyEncryptionParameters(m->pdf);
2169
9.78k
    }
2170
2171
9.78k
    if (!m->forced_pdf_version.empty()) {
2172
0
        int major = 0;
2173
0
        int minor = 0;
2174
0
        parseVersion(m->forced_pdf_version, major, minor);
2175
0
        disableIncompatibleEncryption(major, minor, m->forced_extension_level);
2176
0
        if (compareVersions(major, minor, 1, 5) < 0) {
2177
0
            QTC::TC("qpdf", "QPDFWriter forcing object stream disable");
2178
0
            m->object_stream_mode = qpdf_o_disable;
2179
0
        }
2180
0
    }
2181
2182
9.78k
    if (m->qdf_mode || m->normalize_content) {
2183
0
        initializeSpecialStreams();
2184
0
    }
2185
2186
9.78k
    if (m->qdf_mode) {
2187
        // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing
2188
        // recomputed stream length data. Certain streams such as object streams, xref streams, and
2189
        // hint streams always get direct stream lengths.
2190
0
        m->direct_stream_lengths = false;
2191
0
    }
2192
2193
9.78k
    switch (m->object_stream_mode) {
2194
0
    case qpdf_o_disable:
2195
0
        initializeTables();
2196
0
        m->obj.streams_empty = true;
2197
0
        break;
2198
2199
0
    case qpdf_o_preserve:
2200
0
        initializeTables();
2201
0
        preserveObjectStreams();
2202
0
        break;
2203
2204
9.72k
    case qpdf_o_generate:
2205
9.72k
        generateObjectStreams();
2206
9.72k
        break;
2207
2208
        // no default so gcc will warn for missing case tag
2209
9.78k
    }
2210
2211
9.67k
    if (!m->obj.streams_empty) {
2212
9.65k
        if (m->linearized) {
2213
            // Page dictionaries are not allowed to be compressed objects.
2214
17.3k
            for (auto& page: m->pdf.getAllPages()) {
2215
17.3k
                if (m->obj[page].object_stream > 0) {
2216
16.2k
                    QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
2217
16.2k
                    m->obj[page].object_stream = 0;
2218
16.2k
                }
2219
17.3k
            }
2220
9.65k
        }
2221
2222
9.65k
        if (m->linearized || m->encryption) {
2223
            // The document catalog is not allowed to be compressed in linearized files either.  It
2224
            // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
2225
            // handle encrypted files with compressed document catalogs, so we disable them in that
2226
            // case as well.
2227
9.60k
            if (m->obj[m->root_og].object_stream > 0) {
2228
9.03k
                QTC::TC("qpdf", "QPDFWriter uncompressing root");
2229
9.03k
                m->obj[m->root_og].object_stream = 0;
2230
9.03k
            }
2231
9.60k
        }
2232
2233
        // Generate reverse mapping from object stream to objects
2234
3.64M
        m->obj.forEach([this](auto id, auto const& item) -> void {
2235
3.64M
            if (item.object_stream > 0) {
2236
86.0k
                auto& vec = m->object_stream_to_objects[item.object_stream];
2237
86.0k
                vec.emplace_back(id, item.gen);
2238
86.0k
                if (m->max_ostream_index < vec.size()) {
2239
40.3k
                    ++m->max_ostream_index;
2240
40.3k
                }
2241
86.0k
            }
2242
3.64M
        });
2243
9.65k
        --m->max_ostream_index;
2244
2245
9.65k
        if (m->object_stream_to_objects.empty()) {
2246
481
            m->obj.streams_empty = true;
2247
9.17k
        } else {
2248
9.17k
            setMinimumPDFVersion("1.5");
2249
9.17k
        }
2250
9.65k
    }
2251
2252
9.67k
    setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel());
2253
9.67k
    m->final_pdf_version = m->min_pdf_version;
2254
9.67k
    m->final_extension_level = m->min_extension_level;
2255
9.67k
    if (!m->forced_pdf_version.empty()) {
2256
0
        QTC::TC("qpdf", "QPDFWriter using forced PDF version");
2257
0
        m->final_pdf_version = m->forced_pdf_version;
2258
0
        m->final_extension_level = m->forced_extension_level;
2259
0
    }
2260
9.67k
}
2261
2262
void
2263
QPDFWriter::write()
2264
9.78k
{
2265
9.78k
    doWriteSetup();
2266
2267
    // Set up progress reporting. For linearized files, we write two passes. events_expected is an
2268
    // approximation, but it's good enough for progress reporting, which is mostly a guess anyway.
2269
9.78k
    m->events_expected = QIntC::to_int(m->pdf.getObjectCount() * (m->linearized ? 2 : 1));
2270
2271
9.78k
    prepareFileForWrite();
2272
2273
9.78k
    if (m->linearized) {
2274
9.52k
        writeLinearized();
2275
9.52k
    } else {
2276
258
        writeStandard();
2277
258
    }
2278
2279
9.78k
    m->pipeline->finish();
2280
9.78k
    if (m->close_file) {
2281
0
        fclose(m->file);
2282
0
    }
2283
9.78k
    m->file = nullptr;
2284
9.78k
    if (m->buffer_pipeline) {
2285
0
        m->output_buffer = m->buffer_pipeline->getBuffer();
2286
0
        m->buffer_pipeline = nullptr;
2287
0
    }
2288
9.78k
    indicateProgress(false, true);
2289
9.78k
}
2290
2291
QPDFObjGen
2292
QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
2293
0
{
2294
0
    return {m->obj[og].renumber, 0};
2295
0
}
2296
2297
std::map<QPDFObjGen, QPDFXRefEntry>
2298
QPDFWriter::getWrittenXRefTable()
2299
0
{
2300
0
    std::map<QPDFObjGen, QPDFXRefEntry> result;
2301
2302
0
    auto it = result.begin();
2303
0
    m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void {
2304
0
        if (item.xref.getType() != 0) {
2305
0
            it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref);
2306
0
        }
2307
0
    });
2308
0
    return result;
2309
0
}
2310
2311
void
2312
QPDFWriter::enqueuePart(std::vector<QPDFObjectHandle>& part)
2313
42.7k
{
2314
74.0k
    for (auto const& oh: part) {
2315
74.0k
        enqueueObject(oh);
2316
74.0k
    }
2317
42.7k
}
2318
2319
void
2320
QPDFWriter::writeEncryptionDictionary()
2321
0
{
2322
0
    m->encryption_dict_objid = openObject(m->encryption_dict_objid);
2323
0
    auto& enc = *m->encryption;
2324
0
    auto const V = enc.getV();
2325
2326
0
    write("<<");
2327
0
    if (V >= 4) {
2328
0
        write(" /CF << /StdCF << /AuthEvent /DocOpen /CFM ");
2329
0
        write(m->encrypt_use_aes ? ((V < 5) ? "/AESV2" : "/AESV3") : "/V2");
2330
        // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of
2331
        // MacOS won't open encrypted files without it.
2332
0
        write((V < 5) ? " /Length 16 >> >>" : " /Length 32 >> >>");
2333
0
        if (!m->encryption->getEncryptMetadata()) {
2334
0
            write(" /EncryptMetadata false");
2335
0
        }
2336
0
    }
2337
0
    write(" /Filter /Standard /Length ").write(enc.getLengthBytes() * 8);
2338
0
    write(" /O ").write_string(enc.getO(), true);
2339
0
    if (V >= 4) {
2340
0
        write(" /OE ").write_string(enc.getOE(), true);
2341
0
    }
2342
0
    write(" /P ").write(enc.getP());
2343
0
    if (V >= 5) {
2344
0
        write(" /Perms ").write_string(enc.getPerms(), true);
2345
0
    }
2346
0
    write(" /R ").write(enc.getR());
2347
2348
0
    if (V >= 4) {
2349
0
        write(" /StmF /StdCF /StrF /StdCF");
2350
0
    }
2351
0
    write(" /U ").write_string(enc.getU(), true);
2352
0
    if (V >= 4) {
2353
0
        write(" /UE ").write_string(enc.getUE(), true);
2354
0
    }
2355
0
    write(" /V ").write(enc.getV()).write(" >>");
2356
0
    closeObject(m->encryption_dict_objid);
2357
0
}
2358
2359
std::string
2360
QPDFWriter::getFinalVersion()
2361
0
{
2362
0
    doWriteSetup();
2363
0
    return m->final_pdf_version;
2364
0
}
2365
2366
void
2367
QPDFWriter::writeHeader()
2368
16.0k
{
2369
16.0k
    write("%PDF-").write(m->final_pdf_version);
2370
16.0k
    if (m->pclm) {
2371
        // PCLm version
2372
0
        write("\n%PCLm 1.0\n");
2373
16.0k
    } else {
2374
        // This string of binary characters would not be valid UTF-8, so it really should be treated
2375
        // as binary.
2376
16.0k
        write("\n%\xbf\xf7\xa2\xfe\n");
2377
16.0k
    }
2378
16.0k
    write_qdf("%QDF-1.0\n\n");
2379
2380
    // Note: do not write extra header text here.  Linearized PDFs must include the entire
2381
    // linearization parameter dictionary within the first 1024 characters of the PDF file, so for
2382
    // linearized files, we have to write extra header text after the linearization parameter
2383
    // dictionary.
2384
16.0k
}
2385
2386
void
2387
QPDFWriter::writeHintStream(int hint_id)
2388
7.80k
{
2389
7.80k
    std::string hint_buffer;
2390
7.80k
    int S = 0;
2391
7.80k
    int O = 0;
2392
7.80k
    bool compressed = m->compress_streams && !m->qdf_mode;
2393
7.80k
    QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed);
2394
2395
7.80k
    openObject(hint_id);
2396
7.80k
    setDataKey(hint_id);
2397
2398
7.80k
    size_t hlen = hint_buffer.size();
2399
2400
7.80k
    write("<< ");
2401
7.80k
    if (compressed) {
2402
7.80k
        write("/Filter /FlateDecode ");
2403
7.80k
    }
2404
7.80k
    write("/S ").write(S);
2405
7.80k
    if (O) {
2406
188
        write(" /O ").write(O);
2407
188
    }
2408
7.80k
    adjustAESStreamLength(hlen);
2409
7.80k
    write(" /Length ").write(hlen);
2410
7.80k
    write(" >>\nstream\n").write_encrypted(hint_buffer);
2411
2412
7.80k
    if (m->encryption) {
2413
0
        QTC::TC("qpdf", "QPDFWriter encrypted hint stream");
2414
0
    }
2415
2416
7.80k
    write(hint_buffer.empty() || hint_buffer.back() != '\n' ? "\nendstream" : "endstream");
2417
7.80k
    closeObject(hint_id);
2418
7.80k
}
2419
2420
qpdf_offset_t
2421
QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size)
2422
0
{
2423
    // There are too many extra arguments to replace overloaded function with defaults in the header
2424
    // file...too much risk of leaving something off.
2425
0
    return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0);
2426
0
}
2427
2428
qpdf_offset_t
2429
QPDFWriter::writeXRefTable(
2430
    trailer_e which,
2431
    int first,
2432
    int last,
2433
    int size,
2434
    qpdf_offset_t prev,
2435
    bool suppress_offsets,
2436
    int hint_id,
2437
    qpdf_offset_t hint_offset,
2438
    qpdf_offset_t hint_length,
2439
    int linearization_pass)
2440
706
{
2441
706
    write("xref\n").write(first).write(" ").write(last - first + 1);
2442
706
    qpdf_offset_t space_before_zero = m->pipeline->getCount();
2443
706
    write("\n");
2444
706
    if (first == 0) {
2445
344
        write("0000000000 65535 f \n");
2446
344
        ++first;
2447
344
    }
2448
2.86k
    for (int i = first; i <= last; ++i) {
2449
2.16k
        qpdf_offset_t offset = 0;
2450
2.16k
        if (!suppress_offsets) {
2451
1.14k
            offset = m->new_obj[i].xref.getOffset();
2452
1.14k
            if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2453
401
                offset += hint_length;
2454
401
            }
2455
1.14k
        }
2456
2.16k
        write(QUtil::int_to_string(offset, 10)).write(" 00000 n \n");
2457
2.16k
    }
2458
706
    writeTrailer(which, size, false, prev, linearization_pass);
2459
706
    write("\n");
2460
706
    return space_before_zero;
2461
706
}
2462
2463
qpdf_offset_t
2464
QPDFWriter::writeXRefStream(
2465
    int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size)
2466
0
{
2467
    // There are too many extra arguments to replace overloaded function with defaults in the header
2468
    // file...too much risk of leaving something off.
2469
0
    return writeXRefStream(
2470
0
        objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0);
2471
0
}
2472
2473
qpdf_offset_t
2474
QPDFWriter::writeXRefStream(
2475
    int xref_id,
2476
    int max_id,
2477
    qpdf_offset_t max_offset,
2478
    trailer_e which,
2479
    int first,
2480
    int last,
2481
    int size,
2482
    qpdf_offset_t prev,
2483
    int hint_id,
2484
    qpdf_offset_t hint_offset,
2485
    qpdf_offset_t hint_length,
2486
    bool skip_compression,
2487
    int linearization_pass)
2488
30.8k
{
2489
30.8k
    qpdf_offset_t xref_offset = m->pipeline->getCount();
2490
30.8k
    qpdf_offset_t space_before_zero = xref_offset - 1;
2491
2492
    // field 1 contains offsets and object stream identifiers
2493
30.8k
    unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id));
2494
2495
    // field 2 contains object stream indices
2496
30.8k
    unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index));
2497
2498
30.8k
    unsigned int esize = 1 + f1_size + f2_size;
2499
2500
    // Must store in xref table in advance of writing the actual data rather than waiting for
2501
    // openObject to do it.
2502
30.8k
    m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2503
2504
30.8k
    std::string xref_data;
2505
30.8k
    const bool compressed = m->compress_streams && !m->qdf_mode;
2506
30.8k
    {
2507
30.8k
        auto pp_xref = m->pipeline_stack.activate(xref_data);
2508
2509
374k
        for (int i = first; i <= last; ++i) {
2510
343k
            QPDFXRefEntry& e = m->new_obj[i].xref;
2511
343k
            switch (e.getType()) {
2512
93.0k
            case 0:
2513
93.0k
                writeBinary(0, 1);
2514
93.0k
                writeBinary(0, f1_size);
2515
93.0k
                writeBinary(0, f2_size);
2516
93.0k
                break;
2517
2518
132k
            case 1:
2519
132k
                {
2520
132k
                    qpdf_offset_t offset = e.getOffset();
2521
132k
                    if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2522
41.7k
                        offset += hint_length;
2523
41.7k
                    }
2524
132k
                    writeBinary(1, 1);
2525
132k
                    writeBinary(QIntC::to_ulonglong(offset), f1_size);
2526
132k
                    writeBinary(0, f2_size);
2527
132k
                }
2528
132k
                break;
2529
2530
117k
            case 2:
2531
117k
                writeBinary(2, 1);
2532
117k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size);
2533
117k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size);
2534
117k
                break;
2535
2536
0
            default:
2537
0
                throw std::logic_error("invalid type writing xref stream");
2538
0
                break;
2539
343k
            }
2540
343k
        }
2541
30.8k
    }
2542
2543
30.8k
    if (compressed) {
2544
30.8k
        xref_data = pl::pipe<Pl_PNGFilter>(xref_data, Pl_PNGFilter::a_encode, esize);
2545
30.8k
        if (!skip_compression) {
2546
            // Write the stream dictionary for compression but don't actually compress.  This
2547
            // helps us with computation of padding for pass 1 of linearization.
2548
15.2k
            xref_data = pl::pipe<Pl_Flate>(xref_data, Pl_Flate::a_deflate);
2549
15.2k
        }
2550
30.8k
    }
2551
2552
30.8k
    openObject(xref_id);
2553
30.8k
    write("<<").write_qdf("\n ").write(" /Type /XRef").write_qdf("\n ");
2554
30.8k
    write(" /Length ").write(xref_data.size());
2555
30.8k
    if (compressed) {
2556
30.8k
        write_qdf("\n ").write(" /Filter /FlateDecode").write_qdf("\n ");
2557
30.8k
        write(" /DecodeParms << /Columns ").write(esize).write(" /Predictor 12 >>");
2558
30.8k
    }
2559
30.8k
    write_qdf("\n ").write(" /W [ 1 ").write(f1_size).write(" ").write(f2_size).write(" ]");
2560
30.8k
    if (!(first == 0 && last == (size - 1))) {
2561
15.6k
        write(" /Index [ ").write(first).write(" ").write(last - first + 1).write(" ]");
2562
15.6k
    }
2563
30.8k
    writeTrailer(which, size, true, prev, linearization_pass);
2564
30.8k
    write("\nstream\n").write(xref_data).write("\nendstream");
2565
30.8k
    closeObject(xref_id);
2566
30.8k
    return space_before_zero;
2567
30.8k
}
2568
2569
size_t
2570
QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
2571
15.5k
{
2572
    // This routine is called right after a linearization first pass xref stream has been written
2573
    // without compression.  Calculate the amount of padding that would be required in the worst
2574
    // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is
2575
    // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add
2576
    // 10 extra bytes for number length increases.
2577
2578
15.5k
    return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384)));
2579
15.5k
}
2580
2581
void
2582
QPDFWriter::writeLinearized()
2583
9.52k
{
2584
    // Optimize file and enqueue objects in order
2585
2586
9.52k
    std::map<int, int> stream_cache;
2587
2588
58.0k
    auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) {
2589
58.0k
        auto& result = stream_cache[stream.getObjectID()];
2590
58.0k
        if (result == 0) {
2591
25.4k
            bool compress_stream;
2592
25.4k
            bool is_metadata;
2593
25.4k
            if (willFilterStream(stream, compress_stream, is_metadata, nullptr)) {
2594
12.7k
                result = 2;
2595
12.7k
            } else {
2596
12.6k
                result = 1;
2597
12.6k
            }
2598
25.4k
        }
2599
58.0k
        return result;
2600
58.0k
    };
2601
2602
9.52k
    QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters);
2603
2604
9.52k
    std::vector<QPDFObjectHandle> part4;
2605
9.52k
    std::vector<QPDFObjectHandle> part6;
2606
9.52k
    std::vector<QPDFObjectHandle> part7;
2607
9.52k
    std::vector<QPDFObjectHandle> part8;
2608
9.52k
    std::vector<QPDFObjectHandle> part9;
2609
9.52k
    QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9);
2610
2611
    // Object number sequence:
2612
    //
2613
    //  second half
2614
    //    second half uncompressed objects
2615
    //    second half xref stream, if any
2616
    //    second half compressed objects
2617
    //  first half
2618
    //    linearization dictionary
2619
    //    first half xref stream, if any
2620
    //    part 4 uncompresesd objects
2621
    //    encryption dictionary, if any
2622
    //    hint stream
2623
    //    part 6 uncompressed objects
2624
    //    first half compressed objects
2625
    //
2626
2627
    // Second half objects
2628
9.52k
    int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size());
2629
9.52k
    int second_half_first_obj = 1;
2630
9.52k
    int after_second_half = 1 + second_half_uncompressed;
2631
9.52k
    m->next_objid = after_second_half;
2632
9.52k
    int second_half_xref = 0;
2633
9.52k
    bool need_xref_stream = !m->obj.streams_empty;
2634
9.52k
    if (need_xref_stream) {
2635
8.24k
        second_half_xref = m->next_objid++;
2636
8.24k
    }
2637
    // Assign numbers to all compressed objects in the second half.
2638
9.52k
    std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9};
2639
35.4k
    for (int i = 0; i < 3; ++i) {
2640
30.1k
        for (auto const& oh: *vecs2[i]) {
2641
30.1k
            assignCompressedObjectNumbers(oh.getObjGen());
2642
30.1k
        }
2643
25.9k
    }
2644
9.52k
    int second_half_end = m->next_objid - 1;
2645
9.52k
    int second_trailer_size = m->next_objid;
2646
2647
    // First half objects
2648
9.52k
    int first_half_start = m->next_objid;
2649
9.52k
    int lindict_id = m->next_objid++;
2650
9.52k
    int first_half_xref = 0;
2651
9.52k
    if (need_xref_stream) {
2652
8.24k
        first_half_xref = m->next_objid++;
2653
8.24k
    }
2654
9.52k
    int part4_first_obj = m->next_objid;
2655
9.52k
    m->next_objid += QIntC::to_int(part4.size());
2656
9.52k
    int after_part4 = m->next_objid;
2657
9.52k
    if (m->encryption) {
2658
0
        m->encryption_dict_objid = m->next_objid++;
2659
0
    }
2660
9.52k
    int hint_id = m->next_objid++;
2661
9.52k
    int part6_first_obj = m->next_objid;
2662
9.52k
    m->next_objid += QIntC::to_int(part6.size());
2663
9.52k
    int after_part6 = m->next_objid;
2664
    // Assign numbers to all compressed objects in the first half
2665
9.52k
    std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6};
2666
26.8k
    for (int i = 0; i < 2; ++i) {
2667
44.1k
        for (auto const& oh: *vecs1[i]) {
2668
44.1k
            assignCompressedObjectNumbers(oh.getObjGen());
2669
44.1k
        }
2670
17.2k
    }
2671
9.52k
    int first_half_end = m->next_objid - 1;
2672
9.52k
    int first_trailer_size = m->next_objid;
2673
2674
9.52k
    int part4_end_marker = part4.back().getObjectID();
2675
9.52k
    int part6_end_marker = part6.back().getObjectID();
2676
9.52k
    qpdf_offset_t space_before_zero = 0;
2677
9.52k
    qpdf_offset_t file_size = 0;
2678
9.52k
    qpdf_offset_t part6_end_offset = 0;
2679
9.52k
    qpdf_offset_t first_half_max_obj_offset = 0;
2680
9.52k
    qpdf_offset_t second_xref_offset = 0;
2681
9.52k
    qpdf_offset_t first_xref_end = 0;
2682
9.52k
    qpdf_offset_t second_xref_end = 0;
2683
2684
9.52k
    m->next_objid = part4_first_obj;
2685
9.52k
    enqueuePart(part4);
2686
9.52k
    if (m->next_objid != after_part4) {
2687
        // This can happen with very botched files as in the fuzzer test. There are likely some
2688
        // faulty assumptions in calculateLinearizationData
2689
8
        throw std::runtime_error("error encountered after writing part 4 of linearized data");
2690
8
    }
2691
9.51k
    m->next_objid = part6_first_obj;
2692
9.51k
    enqueuePart(part6);
2693
9.51k
    if (m->next_objid != after_part6) {
2694
161
        throw std::runtime_error("error encountered after writing part 6 of linearized data");
2695
161
    }
2696
9.35k
    m->next_objid = second_half_first_obj;
2697
9.35k
    enqueuePart(part7);
2698
9.35k
    enqueuePart(part8);
2699
9.35k
    enqueuePart(part9);
2700
9.35k
    if (m->next_objid != after_second_half) {
2701
256
        throw std::runtime_error("error encountered after writing part 9 of linearized data");
2702
256
    }
2703
2704
9.09k
    qpdf_offset_t hint_length = 0;
2705
9.09k
    std::string hint_buffer;
2706
2707
    // Write file in two passes.  Part numbers refer to PDF spec 1.4.
2708
2709
9.09k
    FILE* lin_pass1_file = nullptr;
2710
9.09k
    auto pp_pass1 = m->pipeline_stack.popper();
2711
9.09k
    auto pp_md5 = m->pipeline_stack.popper();
2712
16.0k
    for (int pass: {1, 2}) {
2713
16.0k
        if (pass == 1) {
2714
8.22k
            if (!m->lin_pass1_filename.empty()) {
2715
0
                lin_pass1_file = QUtil::safe_fopen(m->lin_pass1_filename.c_str(), "wb");
2716
0
                m->pipeline_stack.activate(
2717
0
                    pp_pass1,
2718
0
                    std::make_unique<Pl_StdioFile>("linearization pass1", lin_pass1_file));
2719
8.22k
            } else {
2720
8.22k
                m->pipeline_stack.activate(pp_pass1, true);
2721
8.22k
            }
2722
8.22k
            if (m->deterministic_id) {
2723
8.22k
                m->pipeline_stack.activate_md5(pp_md5);
2724
8.22k
            }
2725
8.22k
        }
2726
2727
        // Part 1: header
2728
2729
16.0k
        writeHeader();
2730
2731
        // Part 2: linearization parameter dictionary.  Save enough space to write real dictionary.
2732
        // 200 characters is enough space if all numerical values in the parameter dictionary that
2733
        // contain offsets are 20 digits long plus a few extra characters for safety.  The entire
2734
        // linearization parameter dictionary must appear within the first 1024 characters of the
2735
        // file.
2736
2737
16.0k
        qpdf_offset_t pos = m->pipeline->getCount();
2738
16.0k
        openObject(lindict_id);
2739
16.0k
        write("<<");
2740
16.0k
        if (pass == 2) {
2741
7.80k
            std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages();
2742
7.80k
            int first_page_object = m->obj[pages.at(0)].renumber;
2743
2744
7.80k
            write(" /Linearized 1 /L ").write(file_size + hint_length);
2745
            // Implementation note 121 states that a space is mandatory after this open bracket.
2746
7.80k
            write(" /H [ ").write(m->new_obj[hint_id].xref.getOffset()).write(" ");
2747
7.80k
            write(hint_length);
2748
7.80k
            write(" ] /O ").write(first_page_object);
2749
7.80k
            write(" /E ").write(part6_end_offset + hint_length);
2750
7.80k
            write(" /N ").write(pages.size());
2751
7.80k
            write(" /T ").write(space_before_zero + hint_length);
2752
7.80k
        }
2753
16.0k
        write(" >>");
2754
16.0k
        closeObject(lindict_id);
2755
16.0k
        static int const pad = 200;
2756
16.0k
        write(QIntC::to_size(pos - m->pipeline->getCount() + pad), ' ').write("\n");
2757
2758
        // If the user supplied any additional header text, write it here after the linearization
2759
        // parameter dictionary.
2760
16.0k
        write(m->extra_header_text);
2761
2762
        // Part 3: first page cross reference table and trailer.
2763
2764
16.0k
        qpdf_offset_t first_xref_offset = m->pipeline->getCount();
2765
16.0k
        qpdf_offset_t hint_offset = 0;
2766
16.0k
        if (pass == 2) {
2767
7.80k
            hint_offset = m->new_obj[hint_id].xref.getOffset();
2768
7.80k
        }
2769
16.0k
        if (need_xref_stream) {
2770
            // Must pad here too.
2771
15.6k
            if (pass == 1) {
2772
                // Set first_half_max_obj_offset to a value large enough to force four bytes to be
2773
                // reserved for each file offset.  This would provide adequate space for the xref
2774
                // stream as long as the last object in page 1 starts with in the first 4 GB of the
2775
                // file, which is extremely likely.  In the second pass, we will know the actual
2776
                // value for this, but it's okay if it's smaller.
2777
8.03k
                first_half_max_obj_offset = 1 << 25;
2778
8.03k
            }
2779
15.6k
            pos = m->pipeline->getCount();
2780
15.6k
            writeXRefStream(
2781
15.6k
                first_half_xref,
2782
15.6k
                first_half_end,
2783
15.6k
                first_half_max_obj_offset,
2784
15.6k
                t_lin_first,
2785
15.6k
                first_half_start,
2786
15.6k
                first_half_end,
2787
15.6k
                first_trailer_size,
2788
15.6k
                hint_length + second_xref_offset,
2789
15.6k
                hint_id,
2790
15.6k
                hint_offset,
2791
15.6k
                hint_length,
2792
15.6k
                (pass == 1),
2793
15.6k
                pass);
2794
15.6k
            qpdf_offset_t endpos = m->pipeline->getCount();
2795
15.6k
            if (pass == 1) {
2796
                // Pad so we have enough room for the real xref stream.
2797
7.87k
                write(calculateXrefStreamPadding(endpos - pos), ' ');
2798
7.87k
                first_xref_end = m->pipeline->getCount();
2799
7.87k
            } else {
2800
                // Pad so that the next object starts at the same place as in pass 1.
2801
7.78k
                write(QIntC::to_size(first_xref_end - endpos), ' ');
2802
2803
7.78k
                if (m->pipeline->getCount() != first_xref_end) {
2804
0
                    throw std::logic_error(
2805
0
                        "insufficient padding for first pass xref stream; first_xref_end=" +
2806
0
                        std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos));
2807
0
                }
2808
7.78k
            }
2809
15.6k
            write("\n");
2810
15.6k
        } else {
2811
362
            writeXRefTable(
2812
362
                t_lin_first,
2813
362
                first_half_start,
2814
362
                first_half_end,
2815
362
                first_trailer_size,
2816
362
                hint_length + second_xref_offset,
2817
362
                (pass == 1),
2818
362
                hint_id,
2819
362
                hint_offset,
2820
362
                hint_length,
2821
362
                pass);
2822
362
            write("startxref\n0\n%%EOF\n");
2823
362
        }
2824
2825
        // Parts 4 through 9
2826
2827
117k
        for (auto const& cur_object: m->object_queue) {
2828
117k
            if (cur_object.getObjectID() == part6_end_marker) {
2829
15.7k
                first_half_max_obj_offset = m->pipeline->getCount();
2830
15.7k
            }
2831
117k
            writeObject(cur_object);
2832
117k
            if (cur_object.getObjectID() == part4_end_marker) {
2833
15.8k
                if (m->encryption) {
2834
0
                    writeEncryptionDictionary();
2835
0
                }
2836
15.8k
                if (pass == 1) {
2837
8.05k
                    m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2838
8.05k
                } else {
2839
                    // Part 5: hint stream
2840
7.79k
                    write(hint_buffer);
2841
7.79k
                }
2842
15.8k
            }
2843
117k
            if (cur_object.getObjectID() == part6_end_marker) {
2844
15.6k
                part6_end_offset = m->pipeline->getCount();
2845
15.6k
            }
2846
117k
        }
2847
2848
        // Part 10: overflow hint stream -- not used
2849
2850
        // Part 11: main cross reference table and trailer
2851
2852
16.0k
        second_xref_offset = m->pipeline->getCount();
2853
16.0k
        if (need_xref_stream) {
2854
15.2k
            pos = m->pipeline->getCount();
2855
15.2k
            space_before_zero = writeXRefStream(
2856
15.2k
                second_half_xref,
2857
15.2k
                second_half_end,
2858
15.2k
                second_xref_offset,
2859
15.2k
                t_lin_second,
2860
15.2k
                0,
2861
15.2k
                second_half_end,
2862
15.2k
                second_trailer_size,
2863
15.2k
                0,
2864
15.2k
                0,
2865
15.2k
                0,
2866
15.2k
                0,
2867
15.2k
                (pass == 1),
2868
15.2k
                pass);
2869
15.2k
            qpdf_offset_t endpos = m->pipeline->getCount();
2870
2871
15.2k
            if (pass == 1) {
2872
                // Pad so we have enough room for the real xref stream.  See comments for previous
2873
                // xref stream on how we calculate the padding.
2874
7.62k
                write(calculateXrefStreamPadding(endpos - pos), ' ').write("\n");
2875
7.62k
                second_xref_end = m->pipeline->getCount();
2876
7.62k
            } else {
2877
                // Make the file size the same.
2878
7.58k
                auto padding =
2879
7.58k
                    QIntC::to_size(second_xref_end + hint_length - 1 - m->pipeline->getCount());
2880
7.58k
                write(padding, ' ').write("\n");
2881
2882
                // If this assertion fails, maybe we didn't have enough padding above.
2883
7.58k
                if (m->pipeline->getCount() != second_xref_end + hint_length) {
2884
0
                    throw std::logic_error(
2885
0
                        "count mismatch after xref stream; possible insufficient padding?");
2886
0
                }
2887
7.58k
            }
2888
15.2k
        } else {
2889
814
            space_before_zero = writeXRefTable(
2890
814
                t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass);
2891
814
        }
2892
16.0k
        write("startxref\n").write(first_xref_offset).write("\n%%EOF\n");
2893
2894
16.0k
        if (pass == 1) {
2895
7.80k
            if (m->deterministic_id) {
2896
7.80k
                QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1);
2897
7.80k
                computeDeterministicIDData();
2898
7.80k
                pp_md5.pop();
2899
7.80k
            }
2900
2901
            // Close first pass pipeline
2902
7.80k
            file_size = m->pipeline->getCount();
2903
7.80k
            pp_pass1.pop();
2904
2905
            // Save hint offset since it will be set to zero by calling openObject.
2906
7.80k
            qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset();
2907
2908
            // Write hint stream to a buffer
2909
7.80k
            {
2910
7.80k
                auto pp_hint = m->pipeline_stack.activate(hint_buffer);
2911
7.80k
                writeHintStream(hint_id);
2912
7.80k
            }
2913
7.80k
            hint_length = QIntC::to_offset(hint_buffer.size());
2914
2915
            // Restore hint offset
2916
7.80k
            m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1);
2917
7.80k
            if (lin_pass1_file) {
2918
                // Write some debugging information
2919
0
                fprintf(
2920
0
                    lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str());
2921
0
                fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str());
2922
0
                fprintf(
2923
0
                    lin_pass1_file,
2924
0
                    "%% second_xref_offset=%s\n",
2925
0
                    std::to_string(second_xref_offset).c_str());
2926
0
                fprintf(
2927
0
                    lin_pass1_file,
2928
0
                    "%% second_xref_end=%s\n",
2929
0
                    std::to_string(second_xref_end).c_str());
2930
0
                fclose(lin_pass1_file);
2931
0
                lin_pass1_file = nullptr;
2932
0
            }
2933
7.80k
        }
2934
16.0k
    }
2935
9.09k
}
2936
2937
void
2938
QPDFWriter::enqueueObjectsStandard()
2939
0
{
2940
0
    if (m->preserve_unreferenced_objects) {
2941
0
        QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard");
2942
0
        for (auto const& oh: m->pdf.getAllObjects()) {
2943
0
            enqueueObject(oh);
2944
0
        }
2945
0
    }
2946
2947
    // Put root first on queue.
2948
0
    QPDFObjectHandle trailer = getTrimmedTrailer();
2949
0
    enqueueObject(trailer.getKey("/Root"));
2950
2951
    // Next place any other objects referenced from the trailer dictionary into the queue, handling
2952
    // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op.
2953
0
    for (auto& item: trailer.as_dictionary()) {
2954
0
        if (!item.second.null()) {
2955
0
            enqueueObject(item.second);
2956
0
        }
2957
0
    }
2958
0
}
2959
2960
void
2961
QPDFWriter::enqueueObjectsPCLm()
2962
0
{
2963
    // Image transform stream content for page strip images. Each of this new stream has to come
2964
    // after every page image strip written in the pclm file.
2965
0
    std::string image_transform_content = "q /image Do Q\n";
2966
2967
    // enqueue all pages first
2968
0
    std::vector<QPDFObjectHandle> all = m->pdf.getAllPages();
2969
0
    for (auto& page: all) {
2970
        // enqueue page
2971
0
        enqueueObject(page);
2972
2973
        // enqueue page contents stream
2974
0
        enqueueObject(page.getKey("/Contents"));
2975
2976
        // enqueue all the strips for each page
2977
0
        QPDFObjectHandle strips = page.getKey("/Resources").getKey("/XObject");
2978
0
        for (auto& image: strips.as_dictionary()) {
2979
0
            if (!image.second.null()) {
2980
0
                enqueueObject(image.second);
2981
0
                enqueueObject(QPDFObjectHandle::newStream(&m->pdf, image_transform_content));
2982
0
            }
2983
0
        }
2984
0
    }
2985
2986
    // Put root in queue.
2987
0
    QPDFObjectHandle trailer = getTrimmedTrailer();
2988
0
    enqueueObject(trailer.getKey("/Root"));
2989
0
}
2990
2991
void
2992
QPDFWriter::indicateProgress(bool decrement, bool finished)
2993
405k
{
2994
405k
    if (decrement) {
2995
148k
        --m->events_seen;
2996
148k
        return;
2997
148k
    }
2998
2999
257k
    ++m->events_seen;
3000
3001
257k
    if (!m->progress_reporter.get()) {
3002
257k
        return;
3003
257k
    }
3004
3005
0
    if (finished || (m->events_seen >= m->next_progress_report)) {
3006
0
        int percentage =
3007
0
            (finished ? 100
3008
0
                 : m->next_progress_report == 0
3009
0
                 ? 0
3010
0
                 : std::min(99, 1 + ((100 * m->events_seen) / m->events_expected)));
3011
0
        m->progress_reporter->reportProgress(percentage);
3012
0
    }
3013
0
    int increment = std::max(1, (m->events_expected / 100));
3014
0
    while (m->events_seen >= m->next_progress_report) {
3015
0
        m->next_progress_report += increment;
3016
0
    }
3017
0
}
3018
3019
void
3020
QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr)
3021
0
{
3022
0
    m->progress_reporter = pr;
3023
0
}
3024
3025
void
3026
QPDFWriter::writeStandard()
3027
0
{
3028
0
    auto pp_md5 = m->pipeline_stack.popper();
3029
0
    if (m->deterministic_id) {
3030
0
        m->pipeline_stack.activate_md5(pp_md5);
3031
0
    }
3032
3033
    // Start writing
3034
3035
0
    writeHeader();
3036
0
    write(m->extra_header_text);
3037
3038
0
    if (m->pclm) {
3039
0
        enqueueObjectsPCLm();
3040
0
    } else {
3041
0
        enqueueObjectsStandard();
3042
0
    }
3043
3044
    // Now start walking queue, outputting each object.
3045
0
    while (m->object_queue_front < m->object_queue.size()) {
3046
0
        QPDFObjectHandle cur_object = m->object_queue.at(m->object_queue_front);
3047
0
        ++m->object_queue_front;
3048
0
        writeObject(cur_object);
3049
0
    }
3050
3051
    // Write out the encryption dictionary, if any
3052
0
    if (m->encryption) {
3053
0
        writeEncryptionDictionary();
3054
0
    }
3055
3056
    // Now write out xref.  next_objid is now the number of objects.
3057
0
    qpdf_offset_t xref_offset = m->pipeline->getCount();
3058
0
    if (m->object_stream_to_objects.empty()) {
3059
        // Write regular cross-reference table
3060
0
        writeXRefTable(t_normal, 0, m->next_objid - 1, m->next_objid);
3061
0
    } else {
3062
        // Write cross-reference stream.
3063
0
        int xref_id = m->next_objid++;
3064
0
        writeXRefStream(
3065
0
            xref_id, xref_id, xref_offset, t_normal, 0, m->next_objid - 1, m->next_objid);
3066
0
    }
3067
0
    write("startxref\n").write(xref_offset).write("\n%%EOF\n");
3068
3069
0
    if (m->deterministic_id) {
3070
0
        QTC::TC(
3071
0
            "qpdf",
3072
0
            "QPDFWriter standard deterministic ID",
3073
0
            m->object_stream_to_objects.empty() ? 0 : 1);
3074
0
    }
3075
0
}