Coverage Report

Created: 2025-08-29 06:57

/src/qpdf/libqpdf/QPDFWriter.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/assert_debug.h>
2
3
#include <qpdf/qpdf-config.h> // include early for large file support
4
5
#include <qpdf/QPDFWriter_private.hh>
6
7
#include <qpdf/MD5.hh>
8
#include <qpdf/Pl_AES_PDF.hh>
9
#include <qpdf/Pl_Flate.hh>
10
#include <qpdf/Pl_MD5.hh>
11
#include <qpdf/Pl_PNGFilter.hh>
12
#include <qpdf/Pl_RC4.hh>
13
#include <qpdf/Pl_StdioFile.hh>
14
#include <qpdf/Pl_String.hh>
15
#include <qpdf/QIntC.hh>
16
#include <qpdf/QPDFObjectHandle_private.hh>
17
#include <qpdf/QPDFObject_private.hh>
18
#include <qpdf/QPDF_private.hh>
19
#include <qpdf/QTC.hh>
20
#include <qpdf/QUtil.hh>
21
#include <qpdf/RC4.hh>
22
#include <qpdf/Util.hh>
23
24
#include <algorithm>
25
#include <cstdlib>
26
#include <stdexcept>
27
28
using namespace std::literals;
29
using namespace qpdf;
30
31
QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default)
32
0
{
33
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
34
0
}
35
36
QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) :
37
0
    handler(handler)
38
0
{
39
0
}
40
41
QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT
42
                                                                  // (modernize-use-equals-default)
43
0
{
44
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
45
0
}
46
47
void
48
QPDFWriter::FunctionProgressReporter::reportProgress(int progress)
49
0
{
50
0
    handler(progress);
51
0
}
52
53
namespace
54
{
55
    class Pl_stack
56
    {
57
        // A pipeline Popper is normally returned by Pl_stack::activate, or, if necessary, a
58
        // reference to a Popper instance can be passed into activate. When the Popper goes out of
59
        // scope, the pipeline stack is popped. This causes finish to be called on the current
60
        // pipeline and the pipeline stack to be popped until the top of stack is a previous active
61
        // top of stack and restores the pipeline to that point. It deletes any pipelines that it
62
        // pops.
63
        class Popper
64
        {
65
            friend class Pl_stack;
66
67
          public:
68
            Popper() = default;
69
            Popper(Popper const&) = delete;
70
            Popper(Popper&& other) noexcept
71
0
            {
72
0
                // For MSVC, default pops the stack
73
0
                if (this != &other) {
74
0
                    stack = other.stack;
75
0
                    stack_id = other.stack_id;
76
0
                    other.stack = nullptr;
77
0
                    other.stack_id = 0;
78
0
                };
79
0
            }
80
            Popper& operator=(Popper const&) = delete;
81
            Popper&
82
            operator=(Popper&& other) noexcept
83
0
            {
84
0
                // For MSVC, default pops the stack
85
0
                if (this != &other) {
86
0
                    stack = other.stack;
87
0
                    stack_id = other.stack_id;
88
0
                    other.stack = nullptr;
89
0
                    other.stack_id = 0;
90
0
                };
91
0
                return *this;
92
0
            }
93
94
            ~Popper();
95
96
            // Manually pop pipeline from the pipeline stack.
97
            void pop();
98
99
          private:
100
            Popper(Pl_stack& stack) :
101
117k
                stack(&stack)
102
117k
            {
103
117k
            }
104
105
            Pl_stack* stack{nullptr};
106
            unsigned long stack_id{0};
107
        };
108
109
      public:
110
        Pl_stack(pl::Count*& top) :
111
8.71k
            top(top)
112
8.71k
        {
113
8.71k
        }
114
115
        Popper
116
        popper()
117
15.3k
        {
118
15.3k
            return {*this};
119
15.3k
        }
120
121
        void
122
        initialize(Pipeline* p)
123
8.71k
        {
124
8.71k
            auto c = std::make_unique<pl::Count>(++last_id, p);
125
8.71k
            top = c.get();
126
8.71k
            stack.emplace_back(std::move(c));
127
8.71k
        }
128
129
        Popper
130
        activate(std::string& str)
131
65.4k
        {
132
65.4k
            Popper pp{*this};
133
65.4k
            activate(pp, str);
134
65.4k
            return pp;
135
65.4k
        }
136
137
        void
138
        activate(Popper& pp, std::string& str)
139
65.4k
        {
140
65.4k
            activate(pp, false, &str, nullptr);
141
65.4k
        }
142
143
        void
144
        activate(Popper& pp, std::unique_ptr<Pipeline> next)
145
0
        {
146
0
            count_buffer.clear();
147
0
            activate(pp, false, &count_buffer, std::move(next));
148
0
        }
149
150
        Popper
151
        activate(
152
            bool discard = false,
153
            std::string* str = nullptr,
154
            std::unique_ptr<Pipeline> next = nullptr)
155
36.2k
        {
156
36.2k
            Popper pp{*this};
157
36.2k
            activate(pp, discard, str, std::move(next));
158
36.2k
            return pp;
159
36.2k
        }
160
161
        void
162
        activate(
163
            Popper& pp,
164
            bool discard = false,
165
            std::string* str = nullptr,
166
            std::unique_ptr<Pipeline> next = nullptr)
167
109k
        {
168
109k
            std::unique_ptr<pl::Count> c;
169
109k
            if (next) {
170
0
                c = std::make_unique<pl::Count>(++last_id, count_buffer, std::move(next));
171
109k
            } else if (discard) {
172
43.9k
                c = std::make_unique<pl::Count>(++last_id, nullptr);
173
65.4k
            } else if (!str) {
174
0
                c = std::make_unique<pl::Count>(++last_id, top);
175
65.4k
            } else {
176
65.4k
                c = std::make_unique<pl::Count>(++last_id, *str);
177
65.4k
            }
178
109k
            pp.stack_id = last_id;
179
109k
            top = c.get();
180
109k
            stack.emplace_back(std::move(c));
181
109k
        }
182
        void
183
        activate_md5(Popper& pp)
184
0
        {
185
0
            qpdf_assert_debug(!md5_pipeline);
186
0
            qpdf_assert_debug(md5_id == 0);
187
0
            qpdf_assert_debug(top->getCount() == 0);
188
0
            md5_pipeline = std::make_unique<Pl_MD5>("qpdf md5", top);
189
0
            md5_pipeline->persistAcrossFinish(true);
190
            // Special case code in pop clears m->md5_pipeline upon deletion.
191
0
            auto c = std::make_unique<pl::Count>(++last_id, md5_pipeline.get());
192
0
            pp.stack_id = last_id;
193
0
            md5_id = last_id;
194
0
            top = c.get();
195
0
            stack.emplace_back(std::move(c));
196
0
        }
197
198
        // Return the hex digest and disable the MD5 pipeline.
199
        std::string
200
        hex_digest()
201
0
        {
202
0
            qpdf_assert_debug(md5_pipeline);
203
0
            auto digest = md5_pipeline->getHexDigest();
204
0
            md5_pipeline->enable(false);
205
0
            return digest;
206
0
        }
207
208
        void
209
        clear_buffer()
210
0
        {
211
0
            count_buffer.clear();
212
0
        }
213
214
      private:
215
        void
216
        pop(unsigned long stack_id)
217
117k
        {
218
117k
            if (!stack_id) {
219
7.67k
                return;
220
7.67k
            }
221
109k
            qpdf_assert_debug(stack.size() >= 2);
222
109k
            top->finish();
223
109k
            qpdf_assert_debug(stack.back().get() == top);
224
            // It used to be possible for this assertion to fail if writeLinearized exits by
225
            // exception when deterministic ID. There are no longer any cases in which two
226
            // dynamically allocated pipeline Popper objects ever exist at the same time, so the
227
            // assertion will fail if they get popped out of order from automatic destruction.
228
109k
            qpdf_assert_debug(top->id() == stack_id);
229
109k
            if (stack_id == md5_id) {
230
0
                md5_pipeline = nullptr;
231
0
                md5_id = 0;
232
0
            }
233
109k
            stack.pop_back();
234
109k
            top = stack.back().get();
235
109k
        }
236
237
        std::vector<std::unique_ptr<pl::Count>> stack;
238
        pl::Count*& top;
239
        std::unique_ptr<Pl_MD5> md5_pipeline{nullptr};
240
        unsigned long last_id{0};
241
        unsigned long md5_id{0};
242
        std::string count_buffer;
243
    };
244
} // namespace
245
246
Pl_stack::Popper::~Popper()
247
117k
{
248
117k
    if (stack) {
249
109k
        stack->pop(stack_id);
250
109k
    }
251
117k
}
252
253
void
254
Pl_stack::Popper::pop()
255
7.29k
{
256
7.29k
    if (stack) {
257
7.29k
        stack->pop(stack_id);
258
7.29k
    }
259
7.29k
    stack_id = 0;
260
7.29k
    stack = nullptr;
261
7.29k
}
262
263
class QPDFWriter::Members
264
{
265
    friend class QPDFWriter;
266
267
  public:
268
    ~Members();
269
270
  private:
271
    Members(QPDF& pdf);
272
    Members(Members const&) = delete;
273
274
    QPDF& pdf;
275
    QPDFObjGen root_og{-1, 0};
276
    char const* filename{"unspecified"};
277
    FILE* file{nullptr};
278
    bool close_file{false};
279
    std::unique_ptr<Pl_Buffer> buffer_pipeline{nullptr};
280
    Buffer* output_buffer{nullptr};
281
    bool normalize_content_set{false};
282
    bool normalize_content{false};
283
    bool compress_streams{true};
284
    bool compress_streams_set{false};
285
    qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_generalized};
286
    bool stream_decode_level_set{false};
287
    bool recompress_flate{false};
288
    bool qdf_mode{false};
289
    bool preserve_unreferenced_objects{false};
290
    bool newline_before_endstream{false};
291
    bool static_id{false};
292
    bool suppress_original_object_ids{false};
293
    bool direct_stream_lengths{true};
294
    bool preserve_encryption{true};
295
    bool linearized{false};
296
    bool pclm{false};
297
    qpdf_object_stream_e object_stream_mode{qpdf_o_preserve};
298
299
    std::unique_ptr<QPDF::EncryptionData> encryption;
300
    std::string encryption_key;
301
    bool encrypt_use_aes{false};
302
303
    std::string id1; // for /ID key of
304
    std::string id2; // trailer dictionary
305
    std::string final_pdf_version;
306
    int final_extension_level{0};
307
    std::string min_pdf_version;
308
    int min_extension_level{0};
309
    std::string forced_pdf_version;
310
    int forced_extension_level{0};
311
    std::string extra_header_text;
312
    int encryption_dict_objid{0};
313
    std::string cur_data_key;
314
    std::unique_ptr<Pipeline> file_pl;
315
    qpdf::pl::Count* pipeline{nullptr};
316
    std::vector<QPDFObjectHandle> object_queue;
317
    size_t object_queue_front{0};
318
    QPDFWriter::ObjTable obj;
319
    QPDFWriter::NewObjTable new_obj;
320
    int next_objid{1};
321
    int cur_stream_length_id{0};
322
    size_t cur_stream_length{0};
323
    bool added_newline{false};
324
    size_t max_ostream_index{0};
325
    std::set<QPDFObjGen> normalized_streams;
326
    std::map<QPDFObjGen, int> page_object_to_seq;
327
    std::map<QPDFObjGen, int> contents_to_page_seq;
328
    std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects;
329
    Pl_stack pipeline_stack;
330
    bool deterministic_id{false};
331
    std::string deterministic_id_data;
332
    bool did_write_setup{false};
333
334
    // For linearization only
335
    std::string lin_pass1_filename;
336
337
    // For progress reporting
338
    std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter;
339
    int events_expected{0};
340
    int events_seen{0};
341
    int next_progress_report{0};
342
};
343
344
QPDFWriter::Members::Members(QPDF& pdf) :
345
8.95k
    pdf(pdf),
346
8.95k
    root_og(pdf.getRoot().getObjGen().isIndirect() ? pdf.getRoot().getObjGen() : QPDFObjGen(-1, 0)),
347
8.95k
    pipeline_stack(pipeline)
348
8.95k
{
349
8.95k
}
350
351
QPDFWriter::Members::~Members()
352
8.71k
{
353
8.71k
    if (file && close_file) {
354
0
        fclose(file);
355
0
    }
356
8.71k
    delete output_buffer;
357
8.71k
}
358
359
QPDFWriter::QPDFWriter(QPDF& pdf) :
360
8.95k
    m(new Members(pdf))
361
8.95k
{
362
8.95k
}
363
364
QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) :
365
0
    m(new Members(pdf))
366
0
{
367
0
    setOutputFilename(filename);
368
0
}
369
370
QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) :
371
0
    m(new Members(pdf))
372
0
{
373
0
    setOutputFile(description, file, close_file);
374
0
}
375
376
void
377
QPDFWriter::setOutputFilename(char const* filename)
378
0
{
379
0
    char const* description = filename;
380
0
    FILE* f = nullptr;
381
0
    bool close_file = false;
382
0
    if (filename == nullptr) {
383
0
        description = "standard output";
384
0
        QTC::TC("qpdf", "QPDFWriter write to stdout");
385
0
        f = stdout;
386
0
        QUtil::binary_stdout();
387
0
    } else {
388
0
        QTC::TC("qpdf", "QPDFWriter write to file");
389
0
        f = QUtil::safe_fopen(filename, "wb+");
390
0
        close_file = true;
391
0
    }
392
0
    setOutputFile(description, f, close_file);
393
0
}
394
395
void
396
QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file)
397
0
{
398
0
    m->filename = description;
399
0
    m->file = file;
400
0
    m->close_file = close_file;
401
0
    m->file_pl = std::make_unique<Pl_StdioFile>("qpdf output", file);
402
0
    m->pipeline_stack.initialize(m->file_pl.get());
403
0
}
404
405
void
406
QPDFWriter::setOutputMemory()
407
0
{
408
0
    m->filename = "memory buffer";
409
0
    m->buffer_pipeline = std::make_unique<Pl_Buffer>("qpdf output");
410
0
    m->pipeline_stack.initialize(m->buffer_pipeline.get());
411
0
}
412
413
Buffer*
414
QPDFWriter::getBuffer()
415
0
{
416
0
    Buffer* result = m->output_buffer;
417
0
    m->output_buffer = nullptr;
418
0
    return result;
419
0
}
420
421
std::shared_ptr<Buffer>
422
QPDFWriter::getBufferSharedPointer()
423
0
{
424
0
    return std::shared_ptr<Buffer>(getBuffer());
425
0
}
426
427
void
428
QPDFWriter::setOutputPipeline(Pipeline* p)
429
8.71k
{
430
8.71k
    m->filename = "custom pipeline";
431
8.71k
    m->pipeline_stack.initialize(p);
432
8.71k
}
433
434
void
435
QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode)
436
0
{
437
0
    m->object_stream_mode = mode;
438
0
}
439
440
void
441
QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode)
442
0
{
443
0
    switch (mode) {
444
0
    case qpdf_s_uncompress:
445
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
446
0
        m->compress_streams = false;
447
0
        break;
448
449
0
    case qpdf_s_preserve:
450
0
        m->stream_decode_level = qpdf_dl_none;
451
0
        m->compress_streams = false;
452
0
        break;
453
454
0
    case qpdf_s_compress:
455
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
456
0
        m->compress_streams = true;
457
0
        break;
458
0
    }
459
0
    m->stream_decode_level_set = true;
460
0
    m->compress_streams_set = true;
461
0
}
462
463
void
464
QPDFWriter::setCompressStreams(bool val)
465
0
{
466
0
    m->compress_streams = val;
467
0
    m->compress_streams_set = true;
468
0
}
469
470
void
471
QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
472
8.71k
{
473
8.71k
    m->stream_decode_level = val;
474
8.71k
    m->stream_decode_level_set = true;
475
8.71k
}
476
477
void
478
QPDFWriter::setRecompressFlate(bool val)
479
0
{
480
0
    m->recompress_flate = val;
481
0
}
482
483
void
484
QPDFWriter::setContentNormalization(bool val)
485
0
{
486
0
    m->normalize_content_set = true;
487
0
    m->normalize_content = val;
488
0
}
489
490
void
491
QPDFWriter::setQDFMode(bool val)
492
0
{
493
0
    m->qdf_mode = val;
494
0
}
495
496
void
497
QPDFWriter::setPreserveUnreferencedObjects(bool val)
498
0
{
499
0
    m->preserve_unreferenced_objects = val;
500
0
}
501
502
void
503
QPDFWriter::setNewlineBeforeEndstream(bool val)
504
0
{
505
0
    m->newline_before_endstream = val;
506
0
}
507
508
void
509
QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level)
510
18.3k
{
511
18.3k
    bool set_version = false;
512
18.3k
    bool set_extension_level = false;
513
18.3k
    if (m->min_pdf_version.empty()) {
514
8.70k
        set_version = true;
515
8.70k
        set_extension_level = true;
516
9.60k
    } else {
517
9.60k
        int old_major = 0;
518
9.60k
        int old_minor = 0;
519
9.60k
        int min_major = 0;
520
9.60k
        int min_minor = 0;
521
9.60k
        parseVersion(version, old_major, old_minor);
522
9.60k
        parseVersion(m->min_pdf_version, min_major, min_minor);
523
9.60k
        int compare = compareVersions(old_major, old_minor, min_major, min_minor);
524
9.60k
        if (compare > 0) {
525
220
            QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1);
526
220
            set_version = true;
527
220
            set_extension_level = true;
528
9.38k
        } else if (compare == 0) {
529
1.12k
            if (extension_level > m->min_extension_level) {
530
3
                QTC::TC("qpdf", "QPDFWriter increasing extension level");
531
3
                set_extension_level = true;
532
3
            }
533
1.12k
        }
534
9.60k
    }
535
536
18.3k
    if (set_version) {
537
8.92k
        m->min_pdf_version = version;
538
8.92k
    }
539
18.3k
    if (set_extension_level) {
540
8.92k
        m->min_extension_level = extension_level;
541
8.92k
    }
542
18.3k
}
543
544
void
545
QPDFWriter::setMinimumPDFVersion(PDFVersion const& v)
546
0
{
547
0
    std::string version;
548
0
    int extension_level;
549
0
    v.getVersion(version, extension_level);
550
0
    setMinimumPDFVersion(version, extension_level);
551
0
}
552
553
void
554
QPDFWriter::forcePDFVersion(std::string const& version, int extension_level)
555
0
{
556
0
    m->forced_pdf_version = version;
557
0
    m->forced_extension_level = extension_level;
558
0
}
559
560
void
561
QPDFWriter::setExtraHeaderText(std::string const& text)
562
0
{
563
0
    m->extra_header_text = text;
564
0
    if (!m->extra_header_text.empty() && *m->extra_header_text.rbegin() != '\n') {
565
0
        QTC::TC("qpdf", "QPDFWriter extra header text add newline");
566
0
        m->extra_header_text += "\n";
567
0
    } else {
568
0
        QTC::TC("qpdf", "QPDFWriter extra header text no newline");
569
0
    }
570
0
}
571
572
void
573
QPDFWriter::setStaticID(bool val)
574
8.71k
{
575
8.71k
    m->static_id = val;
576
8.71k
}
577
578
void
579
QPDFWriter::setDeterministicID(bool val)
580
0
{
581
0
    m->deterministic_id = val;
582
0
}
583
584
void
585
QPDFWriter::setStaticAesIV(bool val)
586
0
{
587
0
    if (val) {
588
0
        Pl_AES_PDF::useStaticIV();
589
0
    }
590
0
}
591
592
void
593
QPDFWriter::setSuppressOriginalObjectIDs(bool val)
594
0
{
595
0
    m->suppress_original_object_ids = val;
596
0
}
597
598
void
599
QPDFWriter::setPreserveEncryption(bool val)
600
0
{
601
0
    m->preserve_encryption = val;
602
0
}
603
604
void
605
QPDFWriter::setLinearization(bool val)
606
8.71k
{
607
8.71k
    m->linearized = val;
608
8.71k
    if (val) {
609
8.71k
        m->pclm = false;
610
8.71k
    }
611
8.71k
}
612
613
void
614
QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
615
0
{
616
0
    m->lin_pass1_filename = filename;
617
0
}
618
619
void
620
QPDFWriter::setPCLm(bool val)
621
0
{
622
0
    m->pclm = val;
623
0
    if (val) {
624
0
        m->linearized = false;
625
0
    }
626
0
}
627
628
void
629
QPDFWriter::setR2EncryptionParametersInsecure(
630
    char const* user_password,
631
    char const* owner_password,
632
    bool allow_print,
633
    bool allow_modify,
634
    bool allow_extract,
635
    bool allow_annotate)
636
0
{
637
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(1, 2, 5, true);
638
0
    if (!allow_print) {
639
0
        m->encryption->setP(3, false);
640
0
    }
641
0
    if (!allow_modify) {
642
0
        m->encryption->setP(4, false);
643
0
    }
644
0
    if (!allow_extract) {
645
0
        m->encryption->setP(5, false);
646
0
    }
647
0
    if (!allow_annotate) {
648
0
        m->encryption->setP(6, false);
649
0
    }
650
0
    setEncryptionParameters(user_password, owner_password);
651
0
}
652
653
void
654
QPDFWriter::setR3EncryptionParametersInsecure(
655
    char const* user_password,
656
    char const* owner_password,
657
    bool allow_accessibility,
658
    bool allow_extract,
659
    bool allow_assemble,
660
    bool allow_annotate_and_form,
661
    bool allow_form_filling,
662
    bool allow_modify_other,
663
    qpdf_r3_print_e print)
664
0
{
665
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(2, 3, 16, true);
666
0
    interpretR3EncryptionParameters(
667
0
        allow_accessibility,
668
0
        allow_extract,
669
0
        allow_assemble,
670
0
        allow_annotate_and_form,
671
0
        allow_form_filling,
672
0
        allow_modify_other,
673
0
        print,
674
0
        qpdf_r3m_all);
675
0
    setEncryptionParameters(user_password, owner_password);
676
0
}
677
678
void
679
QPDFWriter::setR4EncryptionParametersInsecure(
680
    char const* user_password,
681
    char const* owner_password,
682
    bool allow_accessibility,
683
    bool allow_extract,
684
    bool allow_assemble,
685
    bool allow_annotate_and_form,
686
    bool allow_form_filling,
687
    bool allow_modify_other,
688
    qpdf_r3_print_e print,
689
    bool encrypt_metadata,
690
    bool use_aes)
691
0
{
692
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(4, 4, 16, encrypt_metadata);
693
0
    m->encrypt_use_aes = use_aes;
694
0
    interpretR3EncryptionParameters(
695
0
        allow_accessibility,
696
0
        allow_extract,
697
0
        allow_assemble,
698
0
        allow_annotate_and_form,
699
0
        allow_form_filling,
700
0
        allow_modify_other,
701
0
        print,
702
0
        qpdf_r3m_all);
703
0
    setEncryptionParameters(user_password, owner_password);
704
0
}
705
706
void
707
QPDFWriter::setR5EncryptionParameters(
708
    char const* user_password,
709
    char const* owner_password,
710
    bool allow_accessibility,
711
    bool allow_extract,
712
    bool allow_assemble,
713
    bool allow_annotate_and_form,
714
    bool allow_form_filling,
715
    bool allow_modify_other,
716
    qpdf_r3_print_e print,
717
    bool encrypt_metadata)
718
0
{
719
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(5, 5, 32, encrypt_metadata);
720
0
    m->encrypt_use_aes = true;
721
0
    interpretR3EncryptionParameters(
722
0
        allow_accessibility,
723
0
        allow_extract,
724
0
        allow_assemble,
725
0
        allow_annotate_and_form,
726
0
        allow_form_filling,
727
0
        allow_modify_other,
728
0
        print,
729
0
        qpdf_r3m_all);
730
0
    setEncryptionParameters(user_password, owner_password);
731
0
}
732
733
void
734
QPDFWriter::setR6EncryptionParameters(
735
    char const* user_password,
736
    char const* owner_password,
737
    bool allow_accessibility,
738
    bool allow_extract,
739
    bool allow_assemble,
740
    bool allow_annotate_and_form,
741
    bool allow_form_filling,
742
    bool allow_modify_other,
743
    qpdf_r3_print_e print,
744
    bool encrypt_metadata)
745
8.71k
{
746
8.71k
    m->encryption = std::make_unique<QPDF::EncryptionData>(5, 6, 32, encrypt_metadata);
747
8.71k
    interpretR3EncryptionParameters(
748
8.71k
        allow_accessibility,
749
8.71k
        allow_extract,
750
8.71k
        allow_assemble,
751
8.71k
        allow_annotate_and_form,
752
8.71k
        allow_form_filling,
753
8.71k
        allow_modify_other,
754
8.71k
        print,
755
8.71k
        qpdf_r3m_all);
756
8.71k
    m->encrypt_use_aes = true;
757
8.71k
    setEncryptionParameters(user_password, owner_password);
758
8.71k
}
759
760
void
761
QPDFWriter::interpretR3EncryptionParameters(
762
    bool allow_accessibility,
763
    bool allow_extract,
764
    bool allow_assemble,
765
    bool allow_annotate_and_form,
766
    bool allow_form_filling,
767
    bool allow_modify_other,
768
    qpdf_r3_print_e print,
769
    qpdf_r3_modify_e modify)
770
8.71k
{
771
    // Acrobat 5 security options:
772
773
    // Checkboxes:
774
    //   Enable Content Access for the Visually Impaired
775
    //   Allow Content Copying and Extraction
776
777
    // Allowed changes menu:
778
    //   None
779
    //   Only Document Assembly
780
    //   Only Form Field Fill-in or Signing
781
    //   Comment Authoring, Form Field Fill-in or Signing
782
    //   General Editing, Comment and Form Field Authoring
783
784
    // Allowed printing menu:
785
    //   None
786
    //   Low Resolution
787
    //   Full printing
788
789
    // Meanings of bits in P when R >= 3
790
    //
791
    //  3: low-resolution printing
792
    //  4: document modification except as controlled by 6, 9, and 11
793
    //  5: extraction
794
    //  6: add/modify annotations (comment), fill in forms
795
    //     if 4+6 are set, also allows modification of form fields
796
    //  9: fill in forms even if 6 is clear
797
    // 10: accessibility; ignored by readers, should always be set
798
    // 11: document assembly even if 4 is clear
799
    // 12: high-resolution printing
800
8.71k
    if (!allow_accessibility && m->encryption->getR() <= 3) {
801
        // Bit 10 is deprecated and should always be set.  This used to mean accessibility.  There
802
        // is no way to disable accessibility with R > 3.
803
0
        m->encryption->setP(10, false);
804
0
    }
805
8.71k
    if (!allow_extract) {
806
0
        m->encryption->setP(5, false);
807
0
    }
808
809
8.71k
    switch (print) {
810
0
    case qpdf_r3p_none:
811
0
        m->encryption->setP(3, false); // any printing
812
0
        [[fallthrough]];
813
0
    case qpdf_r3p_low:
814
0
        m->encryption->setP(12, false); // high resolution printing
815
0
        [[fallthrough]];
816
8.71k
    case qpdf_r3p_full:
817
8.71k
        break;
818
        // no default so gcc warns for missing cases
819
8.71k
    }
820
821
    // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full
822
    // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're
823
    // stuck with it. See also allow checks below to control the bits individually.
824
825
    // NOT EXERCISED IN TEST SUITE
826
8.71k
    switch (modify) {
827
0
    case qpdf_r3m_none:
828
0
        m->encryption->setP(11, false); // document assembly
829
0
        [[fallthrough]];
830
0
    case qpdf_r3m_assembly:
831
0
        m->encryption->setP(9, false); // filling in form fields
832
0
        [[fallthrough]];
833
0
    case qpdf_r3m_form:
834
0
        m->encryption->setP(6, false); // modify annotations, fill in form fields
835
0
        [[fallthrough]];
836
0
    case qpdf_r3m_annotate:
837
0
        m->encryption->setP(4, false); // other modifications
838
0
        [[fallthrough]];
839
8.71k
    case qpdf_r3m_all:
840
8.71k
        break;
841
        // no default so gcc warns for missing cases
842
8.71k
    }
843
    // END NOT EXERCISED IN TEST SUITE
844
845
8.71k
    if (!allow_assemble) {
846
0
        m->encryption->setP(11, false);
847
0
    }
848
8.71k
    if (!allow_annotate_and_form) {
849
0
        m->encryption->setP(6, false);
850
0
    }
851
8.71k
    if (!allow_form_filling) {
852
0
        m->encryption->setP(9, false);
853
0
    }
854
8.71k
    if (!allow_modify_other) {
855
0
        m->encryption->setP(4, false);
856
0
    }
857
8.71k
}
858
859
void
860
QPDFWriter::setEncryptionParameters(char const* user_password, char const* owner_password)
861
8.71k
{
862
8.71k
    generateID(true);
863
8.71k
    m->encryption->setId1(m->id1);
864
8.71k
    m->encryption_key = m->encryption->compute_parameters(user_password, owner_password);
865
8.71k
    setEncryptionMinimumVersion();
866
8.71k
}
867
868
void
869
QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
870
0
{
871
0
    m->preserve_encryption = false;
872
0
    QPDFObjectHandle trailer = qpdf.getTrailer();
873
0
    if (trailer.hasKey("/Encrypt")) {
874
0
        generateID(true);
875
0
        m->id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue();
876
0
        QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
877
0
        int V = encrypt.getKey("/V").getIntValueAsInt();
878
0
        int key_len = 5;
879
0
        if (V > 1) {
880
0
            key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8;
881
0
        }
882
0
        const bool encrypt_metadata =
883
0
            encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool()
884
0
            ? encrypt.getKey("/EncryptMetadata").getBoolValue()
885
0
            : true;
886
0
        if (V >= 4) {
887
            // When copying encryption parameters, use AES even if the original file did not.
888
            // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of
889
            // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF
890
            // all potentially having different values.
891
0
            m->encrypt_use_aes = true;
892
0
        }
893
0
        QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", encrypt_metadata ? 0 : 1);
894
0
        QTC::TC("qpdf", "QPDFWriter copy use_aes", m->encrypt_use_aes ? 0 : 1);
895
896
0
        m->encryption = std::make_unique<QPDF::EncryptionData>(
897
0
            V,
898
0
            encrypt.getKey("/R").getIntValueAsInt(),
899
0
            key_len,
900
0
            static_cast<int>(encrypt.getKey("/P").getIntValue()),
901
0
            encrypt.getKey("/O").getStringValue(),
902
0
            encrypt.getKey("/U").getStringValue(),
903
0
            V < 5 ? "" : encrypt.getKey("/OE").getStringValue(),
904
0
            V < 5 ? "" : encrypt.getKey("/UE").getStringValue(),
905
0
            V < 5 ? "" : encrypt.getKey("/Perms").getStringValue(),
906
0
            m->id1, // m->id1 == the other file's id1
907
0
            encrypt_metadata);
908
0
        m->encryption_key = V >= 5
909
0
            ? qpdf.getEncryptionKey()
910
0
            : m->encryption->compute_encryption_key(qpdf.getPaddedUserPassword());
911
0
        setEncryptionMinimumVersion();
912
0
    }
913
0
}
914
915
void
916
QPDFWriter::disableIncompatibleEncryption(int major, int minor, int extension_level)
917
0
{
918
0
    if (!m->encryption) {
919
0
        return;
920
0
    }
921
0
    if (compareVersions(major, minor, 1, 3) < 0) {
922
0
        m->encryption = nullptr;
923
0
        return;
924
0
    }
925
0
    int V = m->encryption->getV();
926
0
    int R = m->encryption->getR();
927
0
    if (compareVersions(major, minor, 1, 4) < 0) {
928
0
        if (V > 1 || R > 2) {
929
0
            m->encryption = nullptr;
930
0
        }
931
0
    } else if (compareVersions(major, minor, 1, 5) < 0) {
932
0
        if (V > 2 || R > 3) {
933
0
            m->encryption = nullptr;
934
0
        }
935
0
    } else if (compareVersions(major, minor, 1, 6) < 0) {
936
0
        if (m->encrypt_use_aes) {
937
0
            m->encryption = nullptr;
938
0
        }
939
0
    } else if (
940
0
        (compareVersions(major, minor, 1, 7) < 0) ||
941
0
        ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) {
942
0
        if (V >= 5 || R >= 5) {
943
0
            m->encryption = nullptr;
944
0
        }
945
0
    }
946
947
0
    if (!m->encryption) {
948
0
        QTC::TC("qpdf", "QPDFWriter forced version disabled encryption");
949
0
    }
950
0
}
951
952
void
953
QPDFWriter::parseVersion(std::string const& version, int& major, int& minor) const
954
19.1k
{
955
19.1k
    major = QUtil::string_to_int(version.c_str());
956
19.1k
    minor = 0;
957
19.1k
    size_t p = version.find('.');
958
19.1k
    if ((p != std::string::npos) && (version.length() > p)) {
959
19.1k
        minor = QUtil::string_to_int(version.substr(p + 1).c_str());
960
19.1k
    }
961
19.1k
    std::string tmp = std::to_string(major) + "." + std::to_string(minor);
962
19.1k
    if (tmp != version) {
963
        // The version number in the input is probably invalid. This happens with some files that
964
        // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately
965
        // QPDFWriter doesn't have a way to give a warning, so we just ignore this case.
966
15
    }
967
19.1k
}
968
969
int
970
QPDFWriter::compareVersions(int major1, int minor1, int major2, int minor2) const
971
9.59k
{
972
9.59k
    if (major1 < major2) {
973
75
        return -1;
974
9.51k
    } else if (major1 > major2) {
975
126
        return 1;
976
9.39k
    } else if (minor1 < minor2) {
977
8.16k
        return -1;
978
8.16k
    } else if (minor1 > minor2) {
979
94
        return 1;
980
1.12k
    } else {
981
1.12k
        return 0;
982
1.12k
    }
983
9.59k
}
984
985
void
986
QPDFWriter::setEncryptionMinimumVersion()
987
8.70k
{
988
8.70k
    auto const R = m->encryption->getR();
989
8.70k
    if (R >= 6) {
990
8.70k
        setMinimumPDFVersion("1.7", 8);
991
8.70k
    } else if (R == 5) {
992
0
        setMinimumPDFVersion("1.7", 3);
993
0
    } else if (R == 4) {
994
0
        setMinimumPDFVersion(m->encrypt_use_aes ? "1.6" : "1.5");
995
0
    } else if (R == 3) {
996
0
        setMinimumPDFVersion("1.4");
997
0
    } else {
998
0
        setMinimumPDFVersion("1.3");
999
0
    }
1000
8.70k
}
1001
1002
void
1003
QPDFWriter::setDataKey(int objid)
1004
221k
{
1005
221k
    if (m->encryption) {
1006
221k
        m->cur_data_key = QPDF::compute_data_key(
1007
221k
            m->encryption_key,
1008
221k
            objid,
1009
221k
            0,
1010
221k
            m->encrypt_use_aes,
1011
221k
            m->encryption->getV(),
1012
221k
            m->encryption->getR());
1013
221k
    }
1014
221k
}
1015
1016
unsigned int
1017
QPDFWriter::bytesNeeded(long long n)
1018
7.58k
{
1019
7.58k
    unsigned int bytes = 0;
1020
18.2k
    while (n) {
1021
10.6k
        ++bytes;
1022
10.6k
        n >>= 8;
1023
10.6k
    }
1024
7.58k
    return bytes;
1025
7.58k
}
1026
1027
void
1028
QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes)
1029
331k
{
1030
331k
    if (bytes > sizeof(unsigned long long)) {
1031
0
        throw std::logic_error("QPDFWriter::writeBinary called with too many bytes");
1032
0
    }
1033
331k
    unsigned char data[sizeof(unsigned long long)];
1034
817k
    for (unsigned int i = 0; i < bytes; ++i) {
1035
486k
        data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff);
1036
486k
        val >>= 8;
1037
486k
    }
1038
331k
    m->pipeline->write(data, bytes);
1039
331k
}
1040
1041
QPDFWriter&
1042
QPDFWriter::write(std::string_view str)
1043
7.59M
{
1044
7.59M
    m->pipeline->write(str);
1045
7.59M
    return *this;
1046
7.59M
}
1047
1048
QPDFWriter&
1049
QPDFWriter::write(std::integral auto val)
1050
1.01M
{
1051
1.01M
    m->pipeline->write(std::to_string(val));
1052
1.01M
    return *this;
1053
1.01M
}
_ZN10QPDFWriter5writeITkNSt3__18integralEiEERS_T_
Line
Count
Source
1050
777k
{
1051
777k
    m->pipeline->write(std::to_string(val));
1052
777k
    return *this;
1053
777k
}
_ZN10QPDFWriter5writeITkNSt3__18integralExEERS_T_
Line
Count
Source
1050
163k
{
1051
163k
    m->pipeline->write(std::to_string(val));
1052
163k
    return *this;
1053
163k
}
_ZN10QPDFWriter5writeITkNSt3__18integralEmEERS_T_
Line
Count
Source
1050
71.2k
{
1051
71.2k
    m->pipeline->write(std::to_string(val));
1052
71.2k
    return *this;
1053
71.2k
}
_ZN10QPDFWriter5writeITkNSt3__18integralEjEERS_T_
Line
Count
Source
1050
7.58k
{
1051
7.58k
    m->pipeline->write(std::to_string(val));
1052
7.58k
    return *this;
1053
7.58k
}
1054
1055
QPDFWriter&
1056
QPDFWriter::write(size_t count, char c)
1057
24.8k
{
1058
24.8k
    m->pipeline->write(count, c);
1059
24.8k
    return *this;
1060
24.8k
}
1061
1062
QPDFWriter&
1063
QPDFWriter::write_name(std::string const& str)
1064
818k
{
1065
818k
    m->pipeline->write(Name::normalize(str));
1066
818k
    return *this;
1067
818k
}
1068
1069
QPDFWriter&
1070
QPDFWriter::write_string(std::string const& str, bool force_binary)
1071
103k
{
1072
103k
    m->pipeline->write(QPDF_String(str).unparse(force_binary));
1073
103k
    return *this;
1074
103k
}
1075
1076
template <typename... Args>
1077
QPDFWriter&
1078
QPDFWriter::write_qdf(Args&&... args)
1079
546k
{
1080
546k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
546k
    return *this;
1084
546k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1079
442k
{
1080
442k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
442k
    return *this;
1084
442k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [3]>(char const (&) [3])
Line
Count
Source
1079
59.9k
{
1080
59.9k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
59.9k
    return *this;
1084
59.9k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1079
29.5k
{
1080
29.5k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
29.5k
    return *this;
1084
29.5k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [11]>(char const (&) [11])
Line
Count
Source
1079
14.9k
{
1080
14.9k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
14.9k
    return *this;
1084
14.9k
}
1085
1086
template <typename... Args>
1087
QPDFWriter&
1088
QPDFWriter::write_no_qdf(Args&&... args)
1089
159k
{
1090
159k
    if (!m->qdf_mode) {
1091
159k
        m->pipeline->write(std::forward<Args>(args)...);
1092
159k
    }
1093
159k
    return *this;
1094
159k
}
QPDFWriter& QPDFWriter::write_no_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1089
129k
{
1090
129k
    if (!m->qdf_mode) {
1091
129k
        m->pipeline->write(std::forward<Args>(args)...);
1092
129k
    }
1093
129k
    return *this;
1094
129k
}
QPDFWriter& QPDFWriter::write_no_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1089
29.5k
{
1090
29.5k
    if (!m->qdf_mode) {
1091
29.5k
        m->pipeline->write(std::forward<Args>(args)...);
1092
29.5k
    }
1093
29.5k
    return *this;
1094
29.5k
}
1095
1096
void
1097
QPDFWriter::adjustAESStreamLength(size_t& length)
1098
57.4k
{
1099
57.4k
    if (m->encryption && !m->cur_data_key.empty() && m->encrypt_use_aes) {
1100
        // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16.  It will
1101
        // also be prepended by 16 bits of random data.
1102
57.4k
        length += 32 - (length & 0xf);
1103
57.4k
    }
1104
57.4k
}
1105
1106
QPDFWriter&
1107
QPDFWriter::write_encrypted(std::string_view str)
1108
57.1k
{
1109
57.1k
    if (!(m->encryption && !m->cur_data_key.empty())) {
1110
0
        write(str);
1111
57.1k
    } else if (m->encrypt_use_aes) {
1112
57.1k
        write(pl::pipe<Pl_AES_PDF>(str, true, m->cur_data_key));
1113
57.1k
    } else {
1114
0
        write(pl::pipe<Pl_RC4>(str, m->cur_data_key));
1115
0
    }
1116
1117
57.1k
    return *this;
1118
57.1k
}
1119
1120
void
1121
QPDFWriter::computeDeterministicIDData()
1122
0
{
1123
0
    if (!m->id2.empty()) {
1124
        // Can't happen in the code
1125
0
        throw std::logic_error(
1126
0
            "Deterministic ID computation enabled after ID generation has already occurred.");
1127
0
    }
1128
0
    qpdf_assert_debug(m->deterministic_id_data.empty());
1129
0
    m->deterministic_id_data = m->pipeline_stack.hex_digest();
1130
0
}
1131
1132
int
1133
QPDFWriter::openObject(int objid)
1134
253k
{
1135
253k
    if (objid == 0) {
1136
0
        objid = m->next_objid++;
1137
0
    }
1138
253k
    m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount());
1139
253k
    write(objid).write(" 0 obj\n");
1140
253k
    return objid;
1141
253k
}
1142
1143
void
1144
QPDFWriter::closeObject(int objid)
1145
253k
{
1146
    // Write a newline before endobj as it makes the file easier to repair.
1147
253k
    write("\nendobj\n").write_qdf("\n");
1148
253k
    auto& new_obj = m->new_obj[objid];
1149
253k
    new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset();
1150
253k
}
1151
1152
void
1153
QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen og)
1154
124k
{
1155
124k
    int objid = og.getObj();
1156
124k
    if ((og.getGen() != 0) || (!m->object_stream_to_objects.contains(objid))) {
1157
        // This is not an object stream.
1158
121k
        return;
1159
121k
    }
1160
1161
    // Reserve numbers for the objects that belong to this object stream.
1162
31.6k
    for (auto const& iter: m->object_stream_to_objects[objid]) {
1163
31.6k
        m->obj[iter].renumber = m->next_objid++;
1164
31.6k
    }
1165
2.95k
}
1166
1167
void
1168
QPDFWriter::enqueueObject(QPDFObjectHandle object)
1169
124k
{
1170
124k
    if (object.isIndirect()) {
1171
        // This owner check can only be done for indirect objects. It is possible for a direct
1172
        // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle from
1173
        // one file was insert into another file without copying. Doing that is safe even if the
1174
        // original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from its owner.
1175
124k
        if (object.getOwningQPDF() != &(m->pdf)) {
1176
0
            QTC::TC("qpdf", "QPDFWriter foreign object");
1177
0
            throw std::logic_error(
1178
0
                "QPDFObjectHandle from different QPDF found while writing.  Use "
1179
0
                "QPDF::copyForeignObject to add objects from another file.");
1180
0
        }
1181
1182
124k
        if (m->qdf_mode && object.isStreamOfType("/XRef")) {
1183
            // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so
1184
            // will confuse fix-qdf, which expects to see only one XRef stream at the end of the
1185
            // file. This case can occur when creating a QDF from a file with object streams when
1186
            // preserving unreferenced objects since the old cross reference streams are not
1187
            // actually referenced by object number.
1188
0
            QTC::TC("qpdf", "QPDFWriter ignore XRef in qdf mode");
1189
0
            return;
1190
0
        }
1191
1192
124k
        QPDFObjGen og = object.getObjGen();
1193
124k
        auto& obj = m->obj[og];
1194
1195
124k
        if (obj.renumber == 0) {
1196
123k
            if (obj.object_stream > 0) {
1197
                // This is in an object stream.  Don't process it here.  Instead, enqueue the object
1198
                // stream.  Object streams always have generation 0.
1199
                // Detect loops by storing invalid object ID -1, which will get overwritten later.
1200
23
                obj.renumber = -1;
1201
23
                enqueueObject(m->pdf.getObject(obj.object_stream, 0));
1202
123k
            } else {
1203
123k
                m->object_queue.push_back(object);
1204
123k
                obj.renumber = m->next_objid++;
1205
1206
123k
                if ((og.getGen() == 0) && m->object_stream_to_objects.contains(og.getObj())) {
1207
                    // For linearized files, uncompressed objects go at end, and we take care of
1208
                    // assigning numbers to them elsewhere.
1209
2.84k
                    if (!m->linearized) {
1210
0
                        assignCompressedObjectNumbers(og);
1211
0
                    }
1212
120k
                } else if ((!m->direct_stream_lengths) && object.isStream()) {
1213
                    // reserve next object ID for length
1214
0
                    ++m->next_objid;
1215
0
                }
1216
123k
            }
1217
123k
        } else if (obj.renumber == -1) {
1218
            // This can happen if a specially constructed file indicates that an object stream is
1219
            // inside itself.
1220
1
        }
1221
124k
        return;
1222
124k
    } else if (!m->linearized) {
1223
0
        if (object.isArray()) {
1224
0
            for (auto& item: object.as_array()) {
1225
0
                enqueueObject(item);
1226
0
            }
1227
0
        } else if (auto d = object.as_dictionary()) {
1228
0
            for (auto const& item: d) {
1229
0
                if (!item.second.null()) {
1230
0
                    enqueueObject(item.second);
1231
0
                }
1232
0
            }
1233
0
        }
1234
197
    } else {
1235
        // ignore
1236
197
    }
1237
124k
}
1238
1239
void
1240
QPDFWriter::unparseChild(QPDFObjectHandle const& child, size_t level, int flags)
1241
1.78M
{
1242
1.78M
    if (!m->linearized) {
1243
0
        enqueueObject(child);
1244
0
    }
1245
1.78M
    if (child.isIndirect()) {
1246
358k
        write(m->obj[child].renumber).write(" 0 R");
1247
1.42M
    } else {
1248
1.42M
        unparseObject(child, level, flags);
1249
1.42M
    }
1250
1.78M
}
1251
1252
void
1253
QPDFWriter::writeTrailer(
1254
    trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass)
1255
29.5k
{
1256
29.5k
    QPDFObjectHandle trailer = getTrimmedTrailer();
1257
29.5k
    if (xref_stream) {
1258
2.52k
        m->cur_data_key.clear();
1259
27.0k
    } else {
1260
27.0k
        write("trailer <<");
1261
27.0k
    }
1262
29.5k
    write_qdf("\n");
1263
29.5k
    if (which == t_lin_second) {
1264
14.5k
        write(" /Size ").write(size);
1265
14.9k
    } else {
1266
39.9k
        for (auto const& [key, value]: trailer.as_dictionary()) {
1267
39.9k
            if (value.null()) {
1268
9.25k
                continue;
1269
9.25k
            }
1270
30.6k
            write_qdf("  ").write_no_qdf(" ").write_name(key).write(" ");
1271
30.6k
            if (key == "/Size") {
1272
4.69k
                write(size);
1273
4.69k
                if (which == t_lin_first) {
1274
4.69k
                    write(" /Prev ");
1275
4.69k
                    qpdf_offset_t pos = m->pipeline->getCount();
1276
4.69k
                    write(prev).write(QIntC::to_size(pos - m->pipeline->getCount() + 21), ' ');
1277
4.69k
                }
1278
25.9k
            } else {
1279
25.9k
                unparseChild(value, 1, 0);
1280
25.9k
            }
1281
30.6k
            write_qdf("\n");
1282
30.6k
        }
1283
14.9k
    }
1284
1285
    // Write ID
1286
29.5k
    write_qdf(" ").write(" /ID [");
1287
29.5k
    if (linearization_pass == 1) {
1288
14.9k
        std::string original_id1 = getOriginalID1();
1289
14.9k
        if (original_id1.empty()) {
1290
12.3k
            write("<00000000000000000000000000000000>");
1291
12.3k
        } else {
1292
            // Write a string of zeroes equal in length to the representation of the original ID.
1293
            // While writing the original ID would have the same number of bytes, it would cause a
1294
            // change to the deterministic ID generated by older versions of the software that
1295
            // hard-coded the length of the ID to 16 bytes.
1296
2.63k
            size_t len = QPDF_String(original_id1).unparse(true).length() - 2;
1297
2.63k
            write("<").write(len, '0').write(">");
1298
2.63k
        }
1299
14.9k
        write("<00000000000000000000000000000000>");
1300
14.9k
    } else {
1301
14.5k
        if (linearization_pass == 0 && m->deterministic_id) {
1302
0
            computeDeterministicIDData();
1303
0
        }
1304
14.5k
        generateID(m->encryption.get());
1305
14.5k
        write_string(m->id1, true).write_string(m->id2, true);
1306
14.5k
    }
1307
29.5k
    write("]");
1308
1309
29.5k
    if (which != t_lin_second) {
1310
        // Write reference to encryption dictionary
1311
14.9k
        if (m->encryption) {
1312
14.9k
            write(" /Encrypt ").write(m->encryption_dict_objid).write(" 0 R");
1313
14.9k
        }
1314
14.9k
    }
1315
1316
29.5k
    write_qdf("\n>>").write_no_qdf(" >>");
1317
29.5k
}
1318
1319
bool
1320
QPDFWriter::willFilterStream(
1321
    QPDFObjectHandle stream,
1322
    bool& compress_stream,  // out only
1323
    bool& is_root_metadata, // out only
1324
    std::string* stream_data)
1325
71.0k
{
1326
71.0k
    compress_stream = false;
1327
71.0k
    is_root_metadata = false;
1328
1329
71.0k
    QPDFObjGen old_og = stream.getObjGen();
1330
71.0k
    QPDFObjectHandle stream_dict = stream.getDict();
1331
1332
71.0k
    if (stream.isRootMetadata()) {
1333
453
        is_root_metadata = true;
1334
453
    }
1335
71.0k
    bool filter = stream.isDataModified() || m->compress_streams || m->stream_decode_level;
1336
71.0k
    bool filter_on_write = stream.getFilterOnWrite();
1337
71.0k
    if (!filter_on_write) {
1338
13.5k
        QTC::TC("qpdf", "QPDFWriter getFilterOnWrite false");
1339
13.5k
        filter = false;
1340
13.5k
    }
1341
71.0k
    if (filter_on_write && m->compress_streams) {
1342
        // Don't filter if the stream is already compressed with FlateDecode. This way we don't make
1343
        // it worse if the original file used a better Flate algorithm, and we don't spend time and
1344
        // CPU cycles uncompressing and recompressing stuff. This can be overridden with
1345
        // setRecompressFlate(true).
1346
57.5k
        QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
1347
57.5k
        if (!m->recompress_flate && !stream.isDataModified() && filter_obj.isName() &&
1348
57.5k
            (filter_obj.getName() == "/FlateDecode" || filter_obj.getName() == "/Fl")) {
1349
13.1k
            QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode");
1350
13.1k
            filter = false;
1351
13.1k
        }
1352
57.5k
    }
1353
71.0k
    bool normalize = false;
1354
71.0k
    bool uncompress = false;
1355
71.0k
    if (filter_on_write && is_root_metadata &&
1356
71.0k
        (!m->encryption || !m->encryption->getEncryptMetadata())) {
1357
0
        QTC::TC("qpdf", "QPDFWriter not compressing metadata");
1358
0
        filter = true;
1359
0
        compress_stream = false;
1360
0
        uncompress = true;
1361
71.0k
    } else if (filter_on_write && m->normalize_content && m->normalized_streams.contains(old_og)) {
1362
0
        normalize = true;
1363
0
        filter = true;
1364
71.0k
    } else if (filter_on_write && filter && m->compress_streams) {
1365
44.3k
        compress_stream = true;
1366
44.3k
        QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");
1367
44.3k
    }
1368
1369
    // Disable compression for empty streams to improve compatibility
1370
71.0k
    if (stream_dict.getKey("/Length").isInteger() &&
1371
71.0k
        stream_dict.getKey("/Length").getIntValue() == 0) {
1372
2.13k
        filter = true;
1373
2.13k
        compress_stream = false;
1374
2.13k
    }
1375
1376
71.0k
    bool filtered = false;
1377
79.5k
    for (bool first_attempt: {true, false}) {
1378
79.5k
        auto pp_stream_data = stream_data ? m->pipeline_stack.activate(*stream_data)
1379
79.5k
                                          : m->pipeline_stack.activate(true);
1380
1381
79.5k
        try {
1382
79.5k
            filtered = stream.pipeStreamData(
1383
79.5k
                m->pipeline,
1384
79.5k
                !filter ? 0
1385
79.5k
                        : ((normalize ? qpdf_ef_normalize : 0) |
1386
45.6k
                           (compress_stream ? qpdf_ef_compress : 0)),
1387
79.5k
                !filter ? qpdf_dl_none : (uncompress ? qpdf_dl_all : m->stream_decode_level),
1388
79.5k
                false,
1389
79.5k
                first_attempt);
1390
79.5k
            if (filter && !filtered) {
1391
                // Try again
1392
8.48k
                filter = false;
1393
8.48k
                stream.setFilterOnWrite(false);
1394
71.0k
            } else {
1395
71.0k
                break;
1396
71.0k
            }
1397
79.5k
        } catch (std::runtime_error& e) {
1398
70
            if (filter && first_attempt) {
1399
53
                stream.warn("error while getting stream data: "s + e.what());
1400
53
                stream.warn("qpdf will attempt to write the damaged stream unchanged");
1401
53
                filter = false;
1402
53
                stream.setFilterOnWrite(false);
1403
53
                continue;
1404
53
            }
1405
17
            throw std::runtime_error(
1406
17
                "error while getting stream data for " + stream.unparse() + ": " + e.what());
1407
70
        }
1408
8.48k
        if (stream_data) {
1409
1.30k
            stream_data->clear();
1410
1.30k
        }
1411
8.48k
    }
1412
71.0k
    if (!filtered) {
1413
33.6k
        compress_stream = false;
1414
33.6k
    }
1415
71.0k
    return filtered;
1416
71.0k
}
1417
1418
void
1419
QPDFWriter::unparseObject(
1420
    QPDFObjectHandle object, size_t level, int flags, size_t stream_length, bool compress)
1421
1.73M
{
1422
1.73M
    QPDFObjGen old_og = object.getObjGen();
1423
1.73M
    int child_flags = flags & ~f_stream;
1424
    // For non-qdf, "indent" and "indent_large" are a single space between tokens. For qdf, they
1425
    // include the preceding newline.
1426
1.73M
    std::string indent_large = " ";
1427
1.73M
    if (m->qdf_mode) {
1428
0
        indent_large.append(2 * (level + 1), ' ');
1429
0
        indent_large[0] = '\n';
1430
0
    }
1431
1.73M
    std::string_view indent{indent_large.data(), m->qdf_mode ? indent_large.size() - 2 : 1};
1432
1433
1.73M
    if (auto const tc = object.getTypeCode(); tc == ::ot_array) {
1434
        // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the
1435
        // [ in the /H key of the linearization parameter dictionary.  We'll do this unconditionally
1436
        // for all arrays because it looks nicer and doesn't make the files that much bigger.
1437
108k
        write("[");
1438
970k
        for (auto const& item: object.as_array()) {
1439
970k
            write(indent_large);
1440
970k
            unparseChild(item, level + 1, child_flags);
1441
970k
        }
1442
108k
        write(indent).write("]");
1443
1.62M
    } else if (tc == ::ot_dictionary) {
1444
        // Handle special cases for specific dictionaries.
1445
1446
274k
        if (old_og == m->root_og) {
1447
            // Extensions dictionaries.
1448
1449
            // We have one of several cases:
1450
            //
1451
            // * We need ADBE
1452
            //    - We already have Extensions
1453
            //       - If it has the right ADBE, preserve it
1454
            //       - Otherwise, replace ADBE
1455
            //    - We don't have Extensions: create one from scratch
1456
            // * We don't want ADBE
1457
            //    - We already have Extensions
1458
            //       - If it only has ADBE, remove it
1459
            //       - If it has other things, keep those and remove ADBE
1460
            //    - We have no extensions: no action required
1461
            //
1462
            // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE
1463
            // dictionary, so we can modify in place.
1464
1465
14.9k
            auto extensions = object.getKey("/Extensions");
1466
14.9k
            const bool has_extensions = extensions.isDictionary();
1467
14.9k
            const bool need_extensions_adbe = m->final_extension_level > 0;
1468
1469
14.9k
            if (has_extensions || need_extensions_adbe) {
1470
                // Make a shallow copy of this object so we can modify it safely without affecting
1471
                // the original. This code has logic to skip certain keys in agreement with
1472
                // prepareFileForWrite and with skip_stream_parameters so that replacing them
1473
                // doesn't leave unreferenced objects in the output. We can use unsafeShallowCopy
1474
                // here because all we are doing is removing or replacing top-level keys.
1475
14.5k
                object = object.unsafeShallowCopy();
1476
14.5k
                if (!has_extensions) {
1477
14.0k
                    extensions = QPDFObjectHandle();
1478
14.0k
                }
1479
1480
14.5k
                const bool have_extensions_adbe = extensions && extensions.hasKey("/ADBE");
1481
14.5k
                const bool have_extensions_other =
1482
14.5k
                    extensions && extensions.getKeys().size() > (have_extensions_adbe ? 1u : 0u);
1483
1484
14.5k
                if (need_extensions_adbe) {
1485
14.5k
                    if (!(have_extensions_other || have_extensions_adbe)) {
1486
                        // We need Extensions and don't have it.  Create it here.
1487
14.0k
                        QTC::TC("qpdf", "QPDFWriter create Extensions", m->qdf_mode ? 0 : 1);
1488
14.0k
                        extensions = object.replaceKeyAndGetNew(
1489
14.0k
                            "/Extensions", QPDFObjectHandle::newDictionary());
1490
14.0k
                    }
1491
14.5k
                } else if (!have_extensions_other) {
1492
                    // We have Extensions dictionary and don't want one.
1493
14
                    if (have_extensions_adbe) {
1494
7
                        QTC::TC("qpdf", "QPDFWriter remove existing Extensions");
1495
7
                        object.removeKey("/Extensions");
1496
7
                        extensions = QPDFObjectHandle(); // uninitialized
1497
7
                    }
1498
14
                }
1499
1500
14.5k
                if (extensions) {
1501
14.5k
                    QTC::TC("qpdf", "QPDFWriter preserve Extensions");
1502
14.5k
                    QPDFObjectHandle adbe = extensions.getKey("/ADBE");
1503
14.5k
                    if (adbe.isDictionary() &&
1504
14.5k
                        adbe.getKey("/BaseVersion").isNameAndEquals("/" + m->final_pdf_version) &&
1505
14.5k
                        adbe.getKey("/ExtensionLevel").isInteger() &&
1506
14.5k
                        (adbe.getKey("/ExtensionLevel").getIntValue() ==
1507
263
                         m->final_extension_level)) {
1508
260
                        QTC::TC("qpdf", "QPDFWriter preserve ADBE");
1509
14.3k
                    } else {
1510
14.3k
                        if (need_extensions_adbe) {
1511
14.2k
                            extensions.replaceKey(
1512
14.2k
                                "/ADBE",
1513
14.2k
                                QPDFObjectHandle::parse(
1514
14.2k
                                    "<< /BaseVersion /" + m->final_pdf_version +
1515
14.2k
                                    " /ExtensionLevel " + std::to_string(m->final_extension_level) +
1516
14.2k
                                    " >>"));
1517
14.2k
                        } else {
1518
20
                            QTC::TC("qpdf", "QPDFWriter remove ADBE");
1519
20
                            extensions.removeKey("/ADBE");
1520
20
                        }
1521
14.3k
                    }
1522
14.5k
                }
1523
14.5k
            }
1524
14.9k
        }
1525
1526
        // Stream dictionaries.
1527
1528
274k
        if (flags & f_stream) {
1529
            // Suppress /Length since we will write it manually
1530
1531
            // Make a shallow copy of this object so we can modify it safely without affecting the
1532
            // original. This code has logic to skip certain keys in agreement with
1533
            // prepareFileForWrite and with skip_stream_parameters so that replacing them doesn't
1534
            // leave unreferenced objects in the output. We can use unsafeShallowCopy here because
1535
            // all we are doing is removing or replacing top-level keys.
1536
46.0k
            object = object.unsafeShallowCopy();
1537
1538
46.0k
            object.removeKey("/Length");
1539
1540
            // If /DecodeParms is an empty list, remove it.
1541
46.0k
            if (object.getKey("/DecodeParms").empty()) {
1542
43.7k
                object.removeKey("/DecodeParms");
1543
43.7k
            }
1544
1545
46.0k
            if (flags & f_filtered) {
1546
                // We will supply our own filter and decode parameters.
1547
24.4k
                object.removeKey("/Filter");
1548
24.4k
                object.removeKey("/DecodeParms");
1549
24.4k
            } else {
1550
                // Make sure, no matter what else we have, that we don't have /Crypt in the output
1551
                // filters.
1552
21.6k
                QPDFObjectHandle filter = object.getKey("/Filter");
1553
21.6k
                QPDFObjectHandle decode_parms = object.getKey("/DecodeParms");
1554
21.6k
                if (filter.isOrHasName("/Crypt")) {
1555
344
                    if (filter.isName()) {
1556
32
                        object.removeKey("/Filter");
1557
32
                        object.removeKey("/DecodeParms");
1558
312
                    } else {
1559
312
                        int idx = 0;
1560
21.7k
                        for (auto const& item: filter.as_array()) {
1561
21.7k
                            if (item.isNameAndEquals("/Crypt")) {
1562
                                // If filter is an array, then the code in QPDF_Stream has already
1563
                                // verified that DecodeParms and Filters are arrays of the same
1564
                                // length, but if they weren't for some reason, eraseItem does type
1565
                                // and bounds checking. Fuzzing tells us that this can actually
1566
                                // happen.
1567
312
                                filter.eraseItem(idx);
1568
312
                                decode_parms.eraseItem(idx);
1569
312
                                break;
1570
312
                            }
1571
21.4k
                            ++idx;
1572
21.4k
                        }
1573
312
                    }
1574
344
                }
1575
21.6k
            }
1576
46.0k
        }
1577
1578
274k
        write("<<");
1579
1580
951k
        for (auto const& [key, value]: object.as_dictionary()) {
1581
951k
            if (!value.null()) {
1582
788k
                write(indent_large).write_name(key).write(" ");
1583
788k
                if (key == "/Contents" && object.isDictionaryOfType("/Sig") &&
1584
788k
                    object.hasKey("/ByteRange")) {
1585
12
                    QTC::TC("qpdf", "QPDFWriter no encryption sig contents");
1586
12
                    unparseChild(value, level + 1, child_flags | f_hex_string | f_no_encryption);
1587
788k
                } else {
1588
788k
                    unparseChild(value, level + 1, child_flags);
1589
788k
                }
1590
788k
            }
1591
951k
        }
1592
1593
274k
        if (flags & f_stream) {
1594
45.8k
            write(indent_large).write("/Length ");
1595
1596
45.8k
            if (m->direct_stream_lengths) {
1597
45.8k
                write(stream_length);
1598
45.8k
            } else {
1599
0
                write(m->cur_stream_length_id).write(" 0 R");
1600
0
            }
1601
45.8k
            if (compress && (flags & f_filtered)) {
1602
24.2k
                write(indent_large).write("/Filter /FlateDecode");
1603
24.2k
            }
1604
45.8k
        }
1605
1606
274k
        write(indent).write(">>");
1607
1.35M
    } else if (tc == ::ot_stream) {
1608
        // Write stream data to a buffer.
1609
46.0k
        if (!m->direct_stream_lengths) {
1610
0
            m->cur_stream_length_id = m->obj[old_og].renumber + 1;
1611
0
        }
1612
1613
46.0k
        flags |= f_stream;
1614
46.0k
        bool compress_stream = false;
1615
46.0k
        bool is_metadata = false;
1616
46.0k
        std::string stream_data;
1617
46.0k
        if (willFilterStream(object, compress_stream, is_metadata, &stream_data)) {
1618
24.4k
            flags |= f_filtered;
1619
24.4k
        }
1620
46.0k
        QPDFObjectHandle stream_dict = object.getDict();
1621
1622
46.0k
        m->cur_stream_length = stream_data.size();
1623
46.0k
        if (is_metadata && m->encryption && !m->encryption->getEncryptMetadata()) {
1624
            // Don't encrypt stream data for the metadata stream
1625
0
            m->cur_data_key.clear();
1626
0
        }
1627
46.0k
        adjustAESStreamLength(m->cur_stream_length);
1628
46.0k
        unparseObject(stream_dict, 0, flags, m->cur_stream_length, compress_stream);
1629
46.0k
        char last_char = stream_data.empty() ? '\0' : stream_data.back();
1630
46.0k
        write("\nstream\n").write_encrypted(stream_data);
1631
46.0k
        m->added_newline = m->newline_before_endstream || (m->qdf_mode && last_char != '\n');
1632
46.0k
        write(m->added_newline ? "\nendstream" : "endstream");
1633
1.30M
    } else if (tc == ::ot_string) {
1634
43.6k
        std::string val;
1635
43.6k
        if (m->encryption && !(flags & f_in_ostream) && !(flags & f_no_encryption) &&
1636
43.6k
            !m->cur_data_key.empty()) {
1637
27.6k
            val = object.getStringValue();
1638
27.6k
            if (m->encrypt_use_aes) {
1639
27.6k
                Pl_Buffer bufpl("encrypted string");
1640
27.6k
                Pl_AES_PDF pl("aes encrypt string", &bufpl, true, m->cur_data_key);
1641
27.6k
                pl.writeString(val);
1642
27.6k
                pl.finish();
1643
27.6k
                val = QPDF_String(bufpl.getString()).unparse(true);
1644
27.6k
            } else {
1645
0
                auto tmp_ph = QUtil::make_unique_cstr(val);
1646
0
                char* tmp = tmp_ph.get();
1647
0
                size_t vlen = val.length();
1648
0
                RC4 rc4(
1649
0
                    QUtil::unsigned_char_pointer(m->cur_data_key),
1650
0
                    QIntC::to_int(m->cur_data_key.length()));
1651
0
                auto data = QUtil::unsigned_char_pointer(tmp);
1652
0
                rc4.process(data, vlen, data);
1653
0
                val = QPDF_String(std::string(tmp, vlen)).unparse();
1654
0
            }
1655
27.6k
        } else if (flags & f_hex_string) {
1656
12
            val = QPDF_String(object.getStringValue()).unparse(true);
1657
16.0k
        } else {
1658
16.0k
            val = object.unparseResolved();
1659
16.0k
        }
1660
43.6k
        write(val);
1661
1.26M
    } else {
1662
1.26M
        write(object.unparseResolved());
1663
1.26M
    }
1664
1.73M
}
1665
1666
void
1667
QPDFWriter::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj)
1668
8.26k
{
1669
8.26k
    qpdf_assert_debug(first_obj > 0);
1670
8.26k
    bool is_first = true;
1671
8.26k
    auto id = std::to_string(first_obj) + ' ';
1672
103k
    for (auto& offset: offsets) {
1673
103k
        if (is_first) {
1674
8.26k
            is_first = false;
1675
95.1k
        } else {
1676
95.1k
            write_qdf("\n").write_no_qdf(" ");
1677
95.1k
        }
1678
103k
        write(id);
1679
103k
        util::increment(id, 1);
1680
103k
        write(offset);
1681
103k
    }
1682
8.26k
    write("\n");
1683
8.26k
}
1684
1685
void
1686
QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1687
4.13k
{
1688
    // Note: object might be null if this is a place-holder for an object stream that we are
1689
    // generating from scratch.
1690
1691
4.13k
    QPDFObjGen old_og = object.getObjGen();
1692
4.13k
    qpdf_assert_debug(old_og.getGen() == 0);
1693
4.13k
    int old_id = old_og.getObj();
1694
4.13k
    int new_stream_id = m->obj[old_og].renumber;
1695
1696
4.13k
    std::vector<qpdf_offset_t> offsets;
1697
4.13k
    qpdf_offset_t first = 0;
1698
1699
    // Generate stream itself.  We have to do this in two passes so we can calculate offsets in the
1700
    // first pass.
1701
4.13k
    std::string stream_buffer_pass1;
1702
4.13k
    std::string stream_buffer_pass2;
1703
4.13k
    int first_obj = -1;
1704
4.13k
    const bool compressed = m->compress_streams && !m->qdf_mode;
1705
4.13k
    {
1706
        // Pass 1
1707
4.13k
        auto pp_ostream_pass1 = m->pipeline_stack.activate(stream_buffer_pass1);
1708
1709
4.13k
        int count = -1;
1710
51.7k
        for (auto const& obj: m->object_stream_to_objects[old_id]) {
1711
51.7k
            ++count;
1712
51.7k
            int new_obj = m->obj[obj].renumber;
1713
51.7k
            if (first_obj == -1) {
1714
4.13k
                first_obj = new_obj;
1715
4.13k
            }
1716
51.7k
            if (m->qdf_mode) {
1717
0
                write("%% Object stream: object ").write(new_obj).write(", index ").write(count);
1718
0
                if (!m->suppress_original_object_ids) {
1719
0
                    write("; original object ID: ").write(obj.getObj());
1720
                    // For compatibility, only write the generation if non-zero.  While object
1721
                    // streams only allow objects with generation 0, if we are generating object
1722
                    // streams, the old object could have a non-zero generation.
1723
0
                    if (obj.getGen() != 0) {
1724
0
                        QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
1725
0
                        write(" ").write(obj.getGen());
1726
0
                    }
1727
0
                }
1728
0
                write("\n");
1729
0
            }
1730
1731
51.7k
            offsets.push_back(m->pipeline->getCount());
1732
            // To avoid double-counting objects being written in object streams for progress
1733
            // reporting, decrement in pass 1.
1734
51.7k
            indicateProgress(true, false);
1735
1736
51.7k
            QPDFObjectHandle obj_to_write = m->pdf.getObject(obj);
1737
51.7k
            if (obj_to_write.isStream()) {
1738
                // This condition occurred in a fuzz input. Ideally we should block it at parse
1739
                // time, but it's not clear to me how to construct a case for this.
1740
0
                obj_to_write.warn("stream found inside object stream; treating as null");
1741
0
                obj_to_write = QPDFObjectHandle::newNull();
1742
0
            }
1743
51.7k
            writeObject(obj_to_write, count);
1744
1745
51.7k
            m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count);
1746
51.7k
        }
1747
4.13k
    }
1748
4.13k
    {
1749
        // Adjust offsets to skip over comment before first object
1750
4.13k
        first = offsets.at(0);
1751
51.6k
        for (auto& iter: offsets) {
1752
51.6k
            iter -= first;
1753
51.6k
        }
1754
1755
        // Take one pass at writing pairs of numbers so we can get their size information
1756
4.13k
        {
1757
4.13k
            auto pp_discard = m->pipeline_stack.activate(true);
1758
4.13k
            writeObjectStreamOffsets(offsets, first_obj);
1759
4.13k
            first += m->pipeline->getCount();
1760
4.13k
        }
1761
1762
        // Set up a stream to write the stream data into a buffer.
1763
4.13k
        auto pp_ostream = m->pipeline_stack.activate(stream_buffer_pass2);
1764
1765
4.13k
        writeObjectStreamOffsets(offsets, first_obj);
1766
4.13k
        write(stream_buffer_pass1);
1767
4.13k
        stream_buffer_pass1.clear();
1768
4.13k
        stream_buffer_pass1.shrink_to_fit();
1769
4.13k
        if (compressed) {
1770
4.13k
            stream_buffer_pass2 = pl::pipe<Pl_Flate>(stream_buffer_pass2, Pl_Flate::a_deflate);
1771
4.13k
        }
1772
4.13k
    }
1773
1774
    // Write the object
1775
4.13k
    openObject(new_stream_id);
1776
4.13k
    setDataKey(new_stream_id);
1777
4.13k
    write("<<").write_qdf("\n ").write(" /Type /ObjStm").write_qdf("\n ");
1778
4.13k
    size_t length = stream_buffer_pass2.size();
1779
4.13k
    adjustAESStreamLength(length);
1780
4.13k
    write(" /Length ").write(length).write_qdf("\n ");
1781
4.13k
    if (compressed) {
1782
4.13k
        write(" /Filter /FlateDecode");
1783
4.13k
    }
1784
4.13k
    write(" /N ").write(offsets.size()).write_qdf("\n ").write(" /First ").write(first);
1785
4.13k
    if (!object.isNull()) {
1786
        // If the original object has an /Extends key, preserve it.
1787
1.52k
        QPDFObjectHandle dict = object.getDict();
1788
1.52k
        QPDFObjectHandle extends = dict.getKey("/Extends");
1789
1.52k
        if (extends.isIndirect()) {
1790
135
            QTC::TC("qpdf", "QPDFWriter copy Extends");
1791
135
            write_qdf("\n ").write(" /Extends ");
1792
135
            unparseChild(extends, 1, f_in_ostream);
1793
135
        }
1794
1.52k
    }
1795
4.13k
    write_qdf("\n").write_no_qdf(" ").write(">>\nstream\n").write_encrypted(stream_buffer_pass2);
1796
4.13k
    if (m->encryption) {
1797
3.99k
        QTC::TC("qpdf", "QPDFWriter encrypt object stream");
1798
3.99k
    }
1799
4.13k
    write(m->newline_before_endstream ? "\nendstream" : "endstream");
1800
4.13k
    m->cur_data_key.clear();
1801
4.13k
    closeObject(new_stream_id);
1802
4.13k
}
1803
1804
void
1805
QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
1806
265k
{
1807
265k
    QPDFObjGen old_og = object.getObjGen();
1808
1809
265k
    if (object_stream_index == -1 && old_og.getGen() == 0 &&
1810
265k
        m->object_stream_to_objects.contains(old_og.getObj())) {
1811
4.13k
        writeObjectStream(object);
1812
4.13k
        return;
1813
4.13k
    }
1814
1815
261k
    indicateProgress(false, false);
1816
261k
    auto new_id = m->obj[old_og].renumber;
1817
261k
    if (m->qdf_mode) {
1818
0
        if (m->page_object_to_seq.contains(old_og)) {
1819
0
            write("%% Page ").write(m->page_object_to_seq[old_og]).write("\n");
1820
0
        }
1821
0
        if (m->contents_to_page_seq.contains(old_og)) {
1822
0
            write("%% Contents for page ").write(m->contents_to_page_seq[old_og]).write("\n");
1823
0
        }
1824
0
    }
1825
261k
    if (object_stream_index == -1) {
1826
209k
        if (m->qdf_mode && (!m->suppress_original_object_ids)) {
1827
0
            write("%% Original object ID: ").write(object.getObjGen().unparse(' ')).write("\n");
1828
0
        }
1829
209k
        openObject(new_id);
1830
209k
        setDataKey(new_id);
1831
209k
        unparseObject(object, 0, 0);
1832
209k
        m->cur_data_key.clear();
1833
209k
        closeObject(new_id);
1834
209k
    } else {
1835
51.7k
        unparseObject(object, 0, f_in_ostream);
1836
51.7k
        write("\n");
1837
51.7k
    }
1838
1839
261k
    if (!m->direct_stream_lengths && object.isStream()) {
1840
0
        if (m->qdf_mode) {
1841
0
            if (m->added_newline) {
1842
0
                write("%QDF: ignore_newline\n");
1843
0
            }
1844
0
        }
1845
0
        openObject(new_id + 1);
1846
0
        write(m->cur_stream_length);
1847
0
        closeObject(new_id + 1);
1848
0
    }
1849
261k
}
1850
1851
std::string
1852
QPDFWriter::getOriginalID1()
1853
23.6k
{
1854
23.6k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1855
23.6k
    if (trailer.hasKey("/ID")) {
1856
4.27k
        return trailer.getKey("/ID").getArrayItem(0).getStringValue();
1857
19.4k
    } else {
1858
19.4k
        return "";
1859
19.4k
    }
1860
23.6k
}
1861
1862
void
1863
QPDFWriter::generateID(bool encrypted)
1864
23.2k
{
1865
    // Generate the ID lazily so that we can handle the user's preference to use static or
1866
    // deterministic ID generation.
1867
1868
23.2k
    if (!m->id2.empty()) {
1869
14.5k
        return;
1870
14.5k
    }
1871
1872
8.71k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1873
1874
8.71k
    std::string result;
1875
1876
8.71k
    if (m->static_id) {
1877
        // For test suite use only...
1878
8.71k
        static unsigned char tmp[] = {
1879
8.71k
            0x31,
1880
8.71k
            0x41,
1881
8.71k
            0x59,
1882
8.71k
            0x26,
1883
8.71k
            0x53,
1884
8.71k
            0x58,
1885
8.71k
            0x97,
1886
8.71k
            0x93,
1887
8.71k
            0x23,
1888
8.71k
            0x84,
1889
8.71k
            0x62,
1890
8.71k
            0x64,
1891
8.71k
            0x33,
1892
8.71k
            0x83,
1893
8.71k
            0x27,
1894
8.71k
            0x95,
1895
8.71k
            0x00};
1896
8.71k
        result = reinterpret_cast<char*>(tmp);
1897
8.71k
    } else {
1898
        // The PDF specification has guidelines for creating IDs, but it states clearly that the
1899
        // only thing that's really important is that it is very likely to be unique.  We can't
1900
        // really follow the guidelines in the spec exactly because we haven't written the file yet.
1901
        // This scheme should be fine though.  The deterministic ID case uses a digest of a
1902
        // sufficient portion of the file's contents such no two non-matching files would match in
1903
        // the subsets used for this computation.  Note that we explicitly omit the filename from
1904
        // the digest calculation for deterministic ID so that the same file converted with qpdf, in
1905
        // that case, would have the same ID regardless of the output file's name.
1906
1907
0
        std::string seed;
1908
0
        if (m->deterministic_id) {
1909
0
            if (encrypted) {
1910
0
                throw std::runtime_error(
1911
0
                    "QPDFWriter: unable to generated a deterministic ID because the file to be "
1912
0
                    "written is encrypted (even though the file may not require a password)");
1913
0
            }
1914
0
            if (m->deterministic_id_data.empty()) {
1915
0
                throw std::logic_error(
1916
0
                    "INTERNAL ERROR: QPDFWriter::generateID has no data for deterministic ID");
1917
0
            }
1918
0
            seed += m->deterministic_id_data;
1919
0
        } else {
1920
0
            seed += std::to_string(QUtil::get_current_time());
1921
0
            seed += m->filename;
1922
0
            seed += " ";
1923
0
        }
1924
0
        seed += " QPDF ";
1925
0
        if (trailer.hasKey("/Info")) {
1926
0
            for (auto const& item: trailer.getKey("/Info").as_dictionary()) {
1927
0
                if (item.second.isString()) {
1928
0
                    seed += " ";
1929
0
                    seed += item.second.getStringValue();
1930
0
                }
1931
0
            }
1932
0
        }
1933
1934
0
        MD5 m;
1935
0
        m.encodeString(seed.c_str());
1936
0
        MD5::Digest digest;
1937
0
        m.digest(digest);
1938
0
        result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest));
1939
0
    }
1940
1941
    // If /ID already exists, follow the spec: use the original first word and generate a new second
1942
    // word.  Otherwise, we'll use the generated ID for both.
1943
1944
8.71k
    m->id2 = result;
1945
    // Note: keep /ID from old file even if --static-id was given.
1946
8.71k
    m->id1 = getOriginalID1();
1947
8.71k
    if (m->id1.empty()) {
1948
7.17k
        m->id1 = m->id2;
1949
7.17k
    }
1950
8.71k
}
1951
1952
void
1953
QPDFWriter::initializeSpecialStreams()
1954
0
{
1955
    // Mark all page content streams in case we are filtering or normalizing.
1956
0
    std::vector<QPDFObjectHandle> pages = m->pdf.getAllPages();
1957
0
    int num = 0;
1958
0
    for (auto& page: pages) {
1959
0
        m->page_object_to_seq[page.getObjGen()] = ++num;
1960
0
        QPDFObjectHandle contents = page.getKey("/Contents");
1961
0
        std::vector<QPDFObjGen> contents_objects;
1962
0
        if (contents.isArray()) {
1963
0
            int n = static_cast<int>(contents.size());
1964
0
            for (int i = 0; i < n; ++i) {
1965
0
                contents_objects.push_back(contents.getArrayItem(i).getObjGen());
1966
0
            }
1967
0
        } else if (contents.isStream()) {
1968
0
            contents_objects.push_back(contents.getObjGen());
1969
0
        }
1970
1971
0
        for (auto const& c: contents_objects) {
1972
0
            m->contents_to_page_seq[c] = num;
1973
0
            m->normalized_streams.insert(c);
1974
0
        }
1975
0
    }
1976
0
}
1977
1978
void
1979
QPDFWriter::preserveObjectStreams()
1980
8.70k
{
1981
8.70k
    auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
1982
    // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
1983
    // streams out of old objects that have generation numbers greater than zero. However in an
1984
    // existing PDF, all object stream objects and all objects in them must have generation 0
1985
    // because the PDF spec does not provide any way to do otherwise. This code filters out objects
1986
    // that are not allowed to be in object streams. In addition to removing objects that were
1987
    // erroneously included in object streams in the source PDF, it also prevents unreferenced
1988
    // objects from being included.
1989
8.70k
    auto end = xref.cend();
1990
8.70k
    m->obj.streams_empty = true;
1991
8.70k
    if (m->preserve_unreferenced_objects) {
1992
0
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
1993
0
            if (iter->second.getType() == 2) {
1994
                // Pdf contains object streams.
1995
0
                QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
1996
0
                m->obj.streams_empty = false;
1997
0
                m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
1998
0
            }
1999
0
        }
2000
8.70k
    } else {
2001
        // Start by scanning for first compressed object in case we don't have any object streams to
2002
        // process.
2003
99.2k
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
2004
91.8k
            if (iter->second.getType() == 2) {
2005
                // Pdf contains object streams.
2006
1.31k
                QTC::TC("qpdf", "QPDFWriter preserve object streams");
2007
1.31k
                m->obj.streams_empty = false;
2008
1.31k
                auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
2009
                // The object pointed to by iter may be a previous generation, in which case it is
2010
                // removed by getCompressibleObjSet. We need to restart the loop (while the object
2011
                // table may contain multiple generations of an object).
2012
213k
                for (iter = xref.cbegin(); iter != end; ++iter) {
2013
212k
                    if (iter->second.getType() == 2) {
2014
190k
                        auto id = static_cast<size_t>(iter->first.getObj());
2015
190k
                        if (id < eligible.size() && eligible[id]) {
2016
34.7k
                            m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
2017
156k
                        } else {
2018
156k
                            QTC::TC("qpdf", "QPDFWriter exclude from object stream");
2019
156k
                        }
2020
190k
                    }
2021
212k
                }
2022
1.31k
                return;
2023
1.31k
            }
2024
91.8k
        }
2025
8.70k
    }
2026
8.70k
}
2027
2028
void
2029
QPDFWriter::generateObjectStreams()
2030
0
{
2031
    // Basic strategy: make a list of objects that can go into an object stream.  Then figure out
2032
    // how many object streams are needed so that we can distribute objects approximately evenly
2033
    // without having any object stream exceed 100 members.  We don't have to worry about linearized
2034
    // files here -- if the file is linearized, we take care of excluding things that aren't allowed
2035
    // here later.
2036
2037
    // This code doesn't do anything with /Extends.
2038
2039
0
    std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf);
2040
0
    size_t n_object_streams = (eligible.size() + 99U) / 100U;
2041
2042
0
    initializeTables(2U * n_object_streams);
2043
0
    if (n_object_streams == 0) {
2044
0
        m->obj.streams_empty = true;
2045
0
        return;
2046
0
    }
2047
0
    size_t n_per = eligible.size() / n_object_streams;
2048
0
    if (n_per * n_object_streams < eligible.size()) {
2049
0
        ++n_per;
2050
0
    }
2051
0
    unsigned int n = 0;
2052
0
    int cur_ostream = m->pdf.newIndirectNull().getObjectID();
2053
0
    for (auto const& item: eligible) {
2054
0
        if (n == n_per) {
2055
0
            QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
2056
0
            n = 0;
2057
            // Construct a new null object as the "original" object stream.  The rest of the code
2058
            // knows that this means we're creating the object stream from scratch.
2059
0
            cur_ostream = m->pdf.newIndirectNull().getObjectID();
2060
0
        }
2061
0
        auto& obj = m->obj[item];
2062
0
        obj.object_stream = cur_ostream;
2063
0
        obj.gen = item.getGen();
2064
0
        ++n;
2065
0
    }
2066
0
}
2067
2068
QPDFObjectHandle
2069
QPDFWriter::getTrimmedTrailer()
2070
29.5k
{
2071
    // Remove keys from the trailer that necessarily have to be replaced when writing the file.
2072
2073
29.5k
    QPDFObjectHandle trailer = m->pdf.getTrailer().unsafeShallowCopy();
2074
2075
    // Remove encryption keys
2076
29.5k
    trailer.removeKey("/ID");
2077
29.5k
    trailer.removeKey("/Encrypt");
2078
2079
    // Remove modification information
2080
29.5k
    trailer.removeKey("/Prev");
2081
2082
    // Remove all trailer keys that potentially come from a cross-reference stream
2083
29.5k
    trailer.removeKey("/Index");
2084
29.5k
    trailer.removeKey("/W");
2085
29.5k
    trailer.removeKey("/Length");
2086
29.5k
    trailer.removeKey("/Filter");
2087
29.5k
    trailer.removeKey("/DecodeParms");
2088
29.5k
    trailer.removeKey("/Type");
2089
29.5k
    trailer.removeKey("/XRefStm");
2090
2091
29.5k
    return trailer;
2092
29.5k
}
2093
2094
// Make document extension level information direct as required by the spec.
2095
void
2096
QPDFWriter::prepareFileForWrite()
2097
8.63k
{
2098
8.63k
    m->pdf.fixDanglingReferences();
2099
8.63k
    auto root = m->pdf.getRoot();
2100
8.63k
    auto oh = root.getKey("/Extensions");
2101
8.63k
    if (oh.isDictionary()) {
2102
348
        const bool extensions_indirect = oh.isIndirect();
2103
348
        if (extensions_indirect) {
2104
105
            QTC::TC("qpdf", "QPDFWriter make Extensions direct");
2105
105
            oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy());
2106
105
        }
2107
348
        if (oh.hasKey("/ADBE")) {
2108
214
            auto adbe = oh.getKey("/ADBE");
2109
214
            if (adbe.isIndirect()) {
2110
138
                QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1);
2111
138
                adbe.makeDirect();
2112
138
                oh.replaceKey("/ADBE", adbe);
2113
138
            }
2114
214
        }
2115
348
    }
2116
8.63k
}
2117
2118
void
2119
QPDFWriter::initializeTables(size_t extra)
2120
8.70k
{
2121
8.70k
    auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra;
2122
8.70k
    m->obj.resize(size);
2123
8.70k
    m->new_obj.resize(size);
2124
8.70k
}
2125
2126
void
2127
QPDFWriter::doWriteSetup()
2128
8.70k
{
2129
8.70k
    if (m->did_write_setup) {
2130
0
        return;
2131
0
    }
2132
8.70k
    m->did_write_setup = true;
2133
2134
    // Do preliminary setup
2135
2136
8.70k
    if (m->linearized) {
2137
8.70k
        m->qdf_mode = false;
2138
8.70k
    }
2139
2140
8.70k
    if (m->pclm) {
2141
0
        m->stream_decode_level = qpdf_dl_none;
2142
0
        m->compress_streams = false;
2143
0
        m->encryption = nullptr;
2144
0
    }
2145
2146
8.70k
    if (m->qdf_mode) {
2147
0
        if (!m->normalize_content_set) {
2148
0
            m->normalize_content = true;
2149
0
        }
2150
0
        if (!m->compress_streams_set) {
2151
0
            m->compress_streams = false;
2152
0
        }
2153
0
        if (!m->stream_decode_level_set) {
2154
0
            m->stream_decode_level = qpdf_dl_generalized;
2155
0
        }
2156
0
    }
2157
2158
8.70k
    if (m->encryption) {
2159
        // Encryption has been explicitly set
2160
8.70k
        m->preserve_encryption = false;
2161
8.70k
    } else if (m->normalize_content || !m->compress_streams || m->pclm || m->qdf_mode) {
2162
        // Encryption makes looking at contents pretty useless.  If the user explicitly encrypted
2163
        // though, we still obey that.
2164
0
        m->preserve_encryption = false;
2165
0
    }
2166
2167
8.70k
    if (m->preserve_encryption) {
2168
0
        copyEncryptionParameters(m->pdf);
2169
0
    }
2170
2171
8.70k
    if (!m->forced_pdf_version.empty()) {
2172
0
        int major = 0;
2173
0
        int minor = 0;
2174
0
        parseVersion(m->forced_pdf_version, major, minor);
2175
0
        disableIncompatibleEncryption(major, minor, m->forced_extension_level);
2176
0
        if (compareVersions(major, minor, 1, 5) < 0) {
2177
0
            QTC::TC("qpdf", "QPDFWriter forcing object stream disable");
2178
0
            m->object_stream_mode = qpdf_o_disable;
2179
0
        }
2180
0
    }
2181
2182
8.70k
    if (m->qdf_mode || m->normalize_content) {
2183
0
        initializeSpecialStreams();
2184
0
    }
2185
2186
8.70k
    if (m->qdf_mode) {
2187
        // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing
2188
        // recomputed stream length data. Certain streams such as object streams, xref streams, and
2189
        // hint streams always get direct stream lengths.
2190
0
        m->direct_stream_lengths = false;
2191
0
    }
2192
2193
8.70k
    switch (m->object_stream_mode) {
2194
0
    case qpdf_o_disable:
2195
0
        initializeTables();
2196
0
        m->obj.streams_empty = true;
2197
0
        break;
2198
2199
8.70k
    case qpdf_o_preserve:
2200
8.70k
        initializeTables();
2201
8.70k
        preserveObjectStreams();
2202
8.70k
        break;
2203
2204
0
    case qpdf_o_generate:
2205
0
        generateObjectStreams();
2206
0
        break;
2207
2208
        // no default so gcc will warn for missing case tag
2209
8.70k
    }
2210
2211
8.68k
    if (!m->obj.streams_empty) {
2212
1.29k
        if (m->linearized) {
2213
            // Page dictionaries are not allowed to be compressed objects.
2214
2.07k
            for (auto& page: m->pdf.getAllPages()) {
2215
2.07k
                if (m->obj[page].object_stream > 0) {
2216
239
                    QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
2217
239
                    m->obj[page].object_stream = 0;
2218
239
                }
2219
2.07k
            }
2220
1.29k
        }
2221
2222
1.29k
        if (m->linearized || m->encryption) {
2223
            // The document catalog is not allowed to be compressed in linearized files either.  It
2224
            // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
2225
            // handle encrypted files with compressed document catalogs, so we disable them in that
2226
            // case as well.
2227
1.29k
            if (m->obj[m->root_og].object_stream > 0) {
2228
28
                QTC::TC("qpdf", "QPDFWriter uncompressing root");
2229
28
                m->obj[m->root_og].object_stream = 0;
2230
28
            }
2231
1.29k
        }
2232
2233
        // Generate reverse mapping from object stream to objects
2234
1.37M
        m->obj.forEach([this](auto id, auto const& item) -> void {
2235
1.37M
            if (item.object_stream > 0) {
2236
34.2k
                auto& vec = m->object_stream_to_objects[item.object_stream];
2237
34.2k
                vec.emplace_back(id, item.gen);
2238
34.2k
                if (m->max_ostream_index < vec.size()) {
2239
18.3k
                    ++m->max_ostream_index;
2240
18.3k
                }
2241
34.2k
            }
2242
1.37M
        });
2243
1.29k
        --m->max_ostream_index;
2244
2245
1.29k
        if (m->object_stream_to_objects.empty()) {
2246
375
            m->obj.streams_empty = true;
2247
924
        } else {
2248
924
            setMinimumPDFVersion("1.5");
2249
924
        }
2250
1.29k
    }
2251
2252
8.68k
    setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel());
2253
8.68k
    m->final_pdf_version = m->min_pdf_version;
2254
8.68k
    m->final_extension_level = m->min_extension_level;
2255
8.68k
    if (!m->forced_pdf_version.empty()) {
2256
0
        QTC::TC("qpdf", "QPDFWriter using forced PDF version");
2257
0
        m->final_pdf_version = m->forced_pdf_version;
2258
0
        m->final_extension_level = m->forced_extension_level;
2259
0
    }
2260
8.68k
}
2261
2262
void
2263
QPDFWriter::write()
2264
8.70k
{
2265
8.70k
    doWriteSetup();
2266
2267
    // Set up progress reporting. For linearized files, we write two passes. events_expected is an
2268
    // approximation, but it's good enough for progress reporting, which is mostly a guess anyway.
2269
8.70k
    m->events_expected = QIntC::to_int(m->pdf.getObjectCount() * (m->linearized ? 2 : 1));
2270
2271
8.70k
    prepareFileForWrite();
2272
2273
8.70k
    if (m->linearized) {
2274
8.61k
        writeLinearized();
2275
8.61k
    } else {
2276
89
        writeStandard();
2277
89
    }
2278
2279
8.70k
    m->pipeline->finish();
2280
8.70k
    if (m->close_file) {
2281
0
        fclose(m->file);
2282
0
    }
2283
8.70k
    m->file = nullptr;
2284
8.70k
    if (m->buffer_pipeline) {
2285
0
        m->output_buffer = m->buffer_pipeline->getBuffer();
2286
0
        m->buffer_pipeline = nullptr;
2287
0
    }
2288
8.70k
    indicateProgress(false, true);
2289
8.70k
}
2290
2291
QPDFObjGen
2292
QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
2293
0
{
2294
0
    return {m->obj[og].renumber, 0};
2295
0
}
2296
2297
std::map<QPDFObjGen, QPDFXRefEntry>
2298
QPDFWriter::getWrittenXRefTable()
2299
0
{
2300
0
    std::map<QPDFObjGen, QPDFXRefEntry> result;
2301
2302
0
    auto it = result.begin();
2303
0
    m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void {
2304
0
        if (item.xref.getType() != 0) {
2305
0
            it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref);
2306
0
        }
2307
0
    });
2308
0
    return result;
2309
0
}
2310
2311
void
2312
QPDFWriter::enqueuePart(std::vector<QPDFObjectHandle>& part)
2313
40.6k
{
2314
124k
    for (auto const& oh: part) {
2315
124k
        enqueueObject(oh);
2316
124k
    }
2317
40.6k
}
2318
2319
void
2320
QPDFWriter::writeEncryptionDictionary()
2321
14.9k
{
2322
14.9k
    m->encryption_dict_objid = openObject(m->encryption_dict_objid);
2323
14.9k
    auto& enc = *m->encryption;
2324
14.9k
    auto const V = enc.getV();
2325
2326
14.9k
    write("<<");
2327
14.9k
    if (V >= 4) {
2328
14.9k
        write(" /CF << /StdCF << /AuthEvent /DocOpen /CFM ");
2329
14.9k
        write(m->encrypt_use_aes ? ((V < 5) ? "/AESV2" : "/AESV3") : "/V2");
2330
        // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of
2331
        // MacOS won't open encrypted files without it.
2332
14.9k
        write((V < 5) ? " /Length 16 >> >>" : " /Length 32 >> >>");
2333
14.9k
        if (!m->encryption->getEncryptMetadata()) {
2334
0
            write(" /EncryptMetadata false");
2335
0
        }
2336
14.9k
    }
2337
14.9k
    write(" /Filter /Standard /Length ").write(enc.getLengthBytes() * 8);
2338
14.9k
    write(" /O ").write_string(enc.getO(), true);
2339
14.9k
    if (V >= 4) {
2340
14.9k
        write(" /OE ").write_string(enc.getOE(), true);
2341
14.9k
    }
2342
14.9k
    write(" /P ").write(enc.getP());
2343
14.9k
    if (V >= 5) {
2344
14.9k
        write(" /Perms ").write_string(enc.getPerms(), true);
2345
14.9k
    }
2346
14.9k
    write(" /R ").write(enc.getR());
2347
2348
14.9k
    if (V >= 4) {
2349
14.9k
        write(" /StmF /StdCF /StrF /StdCF");
2350
14.9k
    }
2351
14.9k
    write(" /U ").write_string(enc.getU(), true);
2352
14.9k
    if (V >= 4) {
2353
14.9k
        write(" /UE ").write_string(enc.getUE(), true);
2354
14.9k
    }
2355
14.9k
    write(" /V ").write(enc.getV()).write(" >>");
2356
14.9k
    closeObject(m->encryption_dict_objid);
2357
14.9k
}
2358
2359
std::string
2360
QPDFWriter::getFinalVersion()
2361
0
{
2362
0
    doWriteSetup();
2363
0
    return m->final_pdf_version;
2364
0
}
2365
2366
void
2367
QPDFWriter::writeHeader()
2368
14.9k
{
2369
14.9k
    write("%PDF-").write(m->final_pdf_version);
2370
14.9k
    if (m->pclm) {
2371
        // PCLm version
2372
0
        write("\n%PCLm 1.0\n");
2373
14.9k
    } else {
2374
        // This string of binary characters would not be valid UTF-8, so it really should be treated
2375
        // as binary.
2376
14.9k
        write("\n%\xbf\xf7\xa2\xfe\n");
2377
14.9k
    }
2378
14.9k
    write_qdf("%QDF-1.0\n\n");
2379
2380
    // Note: do not write extra header text here.  Linearized PDFs must include the entire
2381
    // linearization parameter dictionary within the first 1024 characters of the PDF file, so for
2382
    // linearized files, we have to write extra header text after the linearization parameter
2383
    // dictionary.
2384
14.9k
}
2385
2386
void
2387
QPDFWriter::writeHintStream(int hint_id)
2388
7.29k
{
2389
7.29k
    std::string hint_buffer;
2390
7.29k
    int S = 0;
2391
7.29k
    int O = 0;
2392
7.29k
    bool compressed = m->compress_streams && !m->qdf_mode;
2393
7.29k
    QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed);
2394
2395
7.29k
    openObject(hint_id);
2396
7.29k
    setDataKey(hint_id);
2397
2398
7.29k
    size_t hlen = hint_buffer.size();
2399
2400
7.29k
    write("<< ");
2401
7.29k
    if (compressed) {
2402
7.29k
        write("/Filter /FlateDecode ");
2403
7.29k
    }
2404
7.29k
    write("/S ").write(S);
2405
7.29k
    if (O) {
2406
291
        write(" /O ").write(O);
2407
291
    }
2408
7.29k
    adjustAESStreamLength(hlen);
2409
7.29k
    write(" /Length ").write(hlen);
2410
7.29k
    write(" >>\nstream\n").write_encrypted(hint_buffer);
2411
2412
7.29k
    if (m->encryption) {
2413
7.29k
        QTC::TC("qpdf", "QPDFWriter encrypted hint stream");
2414
7.29k
    }
2415
2416
7.29k
    write(hint_buffer.empty() || hint_buffer.back() != '\n' ? "\nendstream" : "endstream");
2417
7.29k
    closeObject(hint_id);
2418
7.29k
}
2419
2420
qpdf_offset_t
2421
QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size)
2422
0
{
2423
    // There are too many extra arguments to replace overloaded function with defaults in the header
2424
    // file...too much risk of leaving something off.
2425
0
    return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0);
2426
0
}
2427
2428
qpdf_offset_t
2429
QPDFWriter::writeXRefTable(
2430
    trailer_e which,
2431
    int first,
2432
    int last,
2433
    int size,
2434
    qpdf_offset_t prev,
2435
    bool suppress_offsets,
2436
    int hint_id,
2437
    qpdf_offset_t hint_offset,
2438
    qpdf_offset_t hint_length,
2439
    int linearization_pass)
2440
27.0k
{
2441
27.0k
    write("xref\n").write(first).write(" ").write(last - first + 1);
2442
27.0k
    qpdf_offset_t space_before_zero = m->pipeline->getCount();
2443
27.0k
    write("\n");
2444
27.0k
    if (first == 0) {
2445
13.4k
        write("0000000000 65535 f \n");
2446
13.4k
        ++first;
2447
13.4k
    }
2448
232k
    for (int i = first; i <= last; ++i) {
2449
205k
        qpdf_offset_t offset = 0;
2450
205k
        if (!suppress_offsets) {
2451
132k
            offset = m->new_obj[i].xref.getOffset();
2452
132k
            if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2453
39.3k
                offset += hint_length;
2454
39.3k
            }
2455
132k
        }
2456
205k
        write(QUtil::int_to_string(offset, 10)).write(" 00000 n \n");
2457
205k
    }
2458
27.0k
    writeTrailer(which, size, false, prev, linearization_pass);
2459
27.0k
    write("\n");
2460
27.0k
    return space_before_zero;
2461
27.0k
}
2462
2463
qpdf_offset_t
2464
QPDFWriter::writeXRefStream(
2465
    int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size)
2466
0
{
2467
    // There are too many extra arguments to replace overloaded function with defaults in the header
2468
    // file...too much risk of leaving something off.
2469
0
    return writeXRefStream(
2470
0
        objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0);
2471
0
}
2472
2473
qpdf_offset_t
2474
QPDFWriter::writeXRefStream(
2475
    int xref_id,
2476
    int max_id,
2477
    qpdf_offset_t max_offset,
2478
    trailer_e which,
2479
    int first,
2480
    int last,
2481
    int size,
2482
    qpdf_offset_t prev,
2483
    int hint_id,
2484
    qpdf_offset_t hint_offset,
2485
    qpdf_offset_t hint_length,
2486
    bool skip_compression,
2487
    int linearization_pass)
2488
2.52k
{
2489
2.52k
    qpdf_offset_t xref_offset = m->pipeline->getCount();
2490
2.52k
    qpdf_offset_t space_before_zero = xref_offset - 1;
2491
2492
    // field 1 contains offsets and object stream identifiers
2493
2.52k
    unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id));
2494
2495
    // field 2 contains object stream indices
2496
2.52k
    unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index));
2497
2498
2.52k
    unsigned int esize = 1 + f1_size + f2_size;
2499
2500
    // Must store in xref table in advance of writing the actual data rather than waiting for
2501
    // openObject to do it.
2502
2.52k
    m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2503
2504
2.52k
    std::string xref_data;
2505
2.52k
    const bool compressed = m->compress_streams && !m->qdf_mode;
2506
2.52k
    {
2507
2.52k
        auto pp_xref = m->pipeline_stack.activate(xref_data);
2508
2509
113k
        for (int i = first; i <= last; ++i) {
2510
110k
            QPDFXRefEntry& e = m->new_obj[i].xref;
2511
110k
            switch (e.getType()) {
2512
26.2k
            case 0:
2513
26.2k
                writeBinary(0, 1);
2514
26.2k
                writeBinary(0, f1_size);
2515
26.2k
                writeBinary(0, f2_size);
2516
26.2k
                break;
2517
2518
41.7k
            case 1:
2519
41.7k
                {
2520
41.7k
                    qpdf_offset_t offset = e.getOffset();
2521
41.7k
                    if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2522
8.03k
                        offset += hint_length;
2523
8.03k
                    }
2524
41.7k
                    writeBinary(1, 1);
2525
41.7k
                    writeBinary(QIntC::to_ulonglong(offset), f1_size);
2526
41.7k
                    writeBinary(0, f2_size);
2527
41.7k
                }
2528
41.7k
                break;
2529
2530
42.4k
            case 2:
2531
42.4k
                writeBinary(2, 1);
2532
42.4k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size);
2533
42.4k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size);
2534
42.4k
                break;
2535
2536
0
            default:
2537
0
                throw std::logic_error("invalid type writing xref stream");
2538
0
                break;
2539
110k
            }
2540
110k
        }
2541
2.52k
    }
2542
2543
2.52k
    if (compressed) {
2544
2.52k
        xref_data = pl::pipe<Pl_PNGFilter>(xref_data, Pl_PNGFilter::a_encode, esize);
2545
2.52k
        if (!skip_compression) {
2546
            // Write the stream dictionary for compression but don't actually compress.  This
2547
            // helps us with computation of padding for pass 1 of linearization.
2548
1.18k
            xref_data = pl::pipe<Pl_Flate>(xref_data, Pl_Flate::a_deflate);
2549
1.18k
        }
2550
2.52k
    }
2551
2552
2.52k
    openObject(xref_id);
2553
2.52k
    write("<<").write_qdf("\n ").write(" /Type /XRef").write_qdf("\n ");
2554
2.52k
    write(" /Length ").write(xref_data.size());
2555
2.52k
    if (compressed) {
2556
2.52k
        write_qdf("\n ").write(" /Filter /FlateDecode").write_qdf("\n ");
2557
2.52k
        write(" /DecodeParms << /Columns ").write(esize).write(" /Predictor 12 >>");
2558
2.52k
    }
2559
2.52k
    write_qdf("\n ").write(" /W [ 1 ").write(f1_size).write(" ").write(f2_size).write(" ]");
2560
2.52k
    if (!(first == 0 && last == (size - 1))) {
2561
1.33k
        write(" /Index [ ").write(first).write(" ").write(last - first + 1).write(" ]");
2562
1.33k
    }
2563
2.52k
    writeTrailer(which, size, true, prev, linearization_pass);
2564
2.52k
    write("\nstream\n").write(xref_data).write("\nendstream");
2565
2.52k
    closeObject(xref_id);
2566
2.52k
    return space_before_zero;
2567
2.52k
}
2568
2569
size_t
2570
QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
2571
1.33k
{
2572
    // This routine is called right after a linearization first pass xref stream has been written
2573
    // without compression.  Calculate the amount of padding that would be required in the worst
2574
    // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is
2575
    // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add
2576
    // 10 extra bytes for number length increases.
2577
2578
1.33k
    return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384)));
2579
1.33k
}
2580
2581
void
2582
QPDFWriter::writeLinearized()
2583
8.61k
{
2584
    // Optimize file and enqueue objects in order
2585
2586
8.61k
    std::map<int, int> stream_cache;
2587
2588
48.7k
    auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) {
2589
48.7k
        auto& result = stream_cache[stream.getObjectID()];
2590
48.7k
        if (result == 0) {
2591
24.9k
            bool compress_stream;
2592
24.9k
            bool is_metadata;
2593
24.9k
            if (willFilterStream(stream, compress_stream, is_metadata, nullptr)) {
2594
12.9k
                result = 2;
2595
12.9k
            } else {
2596
12.0k
                result = 1;
2597
12.0k
            }
2598
24.9k
        }
2599
48.7k
        return result;
2600
48.7k
    };
2601
2602
8.61k
    QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters);
2603
2604
8.61k
    std::vector<QPDFObjectHandle> part4;
2605
8.61k
    std::vector<QPDFObjectHandle> part6;
2606
8.61k
    std::vector<QPDFObjectHandle> part7;
2607
8.61k
    std::vector<QPDFObjectHandle> part8;
2608
8.61k
    std::vector<QPDFObjectHandle> part9;
2609
8.61k
    QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9);
2610
2611
    // Object number sequence:
2612
    //
2613
    //  second half
2614
    //    second half uncompressed objects
2615
    //    second half xref stream, if any
2616
    //    second half compressed objects
2617
    //  first half
2618
    //    linearization dictionary
2619
    //    first half xref stream, if any
2620
    //    part 4 uncompresesd objects
2621
    //    encryption dictionary, if any
2622
    //    hint stream
2623
    //    part 6 uncompressed objects
2624
    //    first half compressed objects
2625
    //
2626
2627
    // Second half objects
2628
8.61k
    int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size());
2629
8.61k
    int second_half_first_obj = 1;
2630
8.61k
    int after_second_half = 1 + second_half_uncompressed;
2631
8.61k
    m->next_objid = after_second_half;
2632
8.61k
    int second_half_xref = 0;
2633
8.61k
    bool need_xref_stream = !m->obj.streams_empty;
2634
8.61k
    if (need_xref_stream) {
2635
856
        second_half_xref = m->next_objid++;
2636
856
    }
2637
    // Assign numbers to all compressed objects in the second half.
2638
8.61k
    std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9};
2639
33.2k
    for (int i = 0; i < 3; ++i) {
2640
51.1k
        for (auto const& oh: *vecs2[i]) {
2641
51.1k
            assignCompressedObjectNumbers(oh.getObjGen());
2642
51.1k
        }
2643
24.6k
    }
2644
8.61k
    int second_half_end = m->next_objid - 1;
2645
8.61k
    int second_trailer_size = m->next_objid;
2646
2647
    // First half objects
2648
8.61k
    int first_half_start = m->next_objid;
2649
8.61k
    int lindict_id = m->next_objid++;
2650
8.61k
    int first_half_xref = 0;
2651
8.61k
    if (need_xref_stream) {
2652
856
        first_half_xref = m->next_objid++;
2653
856
    }
2654
8.61k
    int part4_first_obj = m->next_objid;
2655
8.61k
    m->next_objid += QIntC::to_int(part4.size());
2656
8.61k
    int after_part4 = m->next_objid;
2657
8.61k
    if (m->encryption) {
2658
8.21k
        m->encryption_dict_objid = m->next_objid++;
2659
8.21k
    }
2660
8.61k
    int hint_id = m->next_objid++;
2661
8.61k
    int part6_first_obj = m->next_objid;
2662
8.61k
    m->next_objid += QIntC::to_int(part6.size());
2663
8.61k
    int after_part6 = m->next_objid;
2664
    // Assign numbers to all compressed objects in the first half
2665
8.61k
    std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6};
2666
25.0k
    for (int i = 0; i < 2; ++i) {
2667
73.6k
        for (auto const& oh: *vecs1[i]) {
2668
73.6k
            assignCompressedObjectNumbers(oh.getObjGen());
2669
73.6k
        }
2670
16.4k
    }
2671
8.61k
    int first_half_end = m->next_objid - 1;
2672
8.61k
    int first_trailer_size = m->next_objid;
2673
2674
8.61k
    int part4_end_marker = part4.back().getObjectID();
2675
8.61k
    int part6_end_marker = part6.back().getObjectID();
2676
8.61k
    qpdf_offset_t space_before_zero = 0;
2677
8.61k
    qpdf_offset_t file_size = 0;
2678
8.61k
    qpdf_offset_t part6_end_offset = 0;
2679
8.61k
    qpdf_offset_t first_half_max_obj_offset = 0;
2680
8.61k
    qpdf_offset_t second_xref_offset = 0;
2681
8.61k
    qpdf_offset_t first_xref_end = 0;
2682
8.61k
    qpdf_offset_t second_xref_end = 0;
2683
2684
8.61k
    m->next_objid = part4_first_obj;
2685
8.61k
    enqueuePart(part4);
2686
8.61k
    if (m->next_objid != after_part4) {
2687
        // This can happen with very botched files as in the fuzzer test. There are likely some
2688
        // faulty assumptions in calculateLinearizationData
2689
7
        throw std::runtime_error("error encountered after writing part 4 of linearized data");
2690
7
    }
2691
8.60k
    m->next_objid = part6_first_obj;
2692
8.60k
    enqueuePart(part6);
2693
8.60k
    if (m->next_objid != after_part6) {
2694
92
        throw std::runtime_error("error encountered after writing part 6 of linearized data");
2695
92
    }
2696
8.51k
    m->next_objid = second_half_first_obj;
2697
8.51k
    enqueuePart(part7);
2698
8.51k
    enqueuePart(part8);
2699
8.51k
    enqueuePart(part9);
2700
8.51k
    if (m->next_objid != after_second_half) {
2701
397
        throw std::runtime_error("error encountered after writing part 9 of linearized data");
2702
397
    }
2703
2704
8.11k
    qpdf_offset_t hint_length = 0;
2705
8.11k
    std::string hint_buffer;
2706
2707
    // Write file in two passes.  Part numbers refer to PDF spec 1.4.
2708
2709
8.11k
    FILE* lin_pass1_file = nullptr;
2710
8.11k
    auto pp_pass1 = m->pipeline_stack.popper();
2711
8.11k
    auto pp_md5 = m->pipeline_stack.popper();
2712
14.9k
    for (int pass: {1, 2}) {
2713
14.9k
        if (pass == 1) {
2714
7.67k
            if (!m->lin_pass1_filename.empty()) {
2715
0
                lin_pass1_file = QUtil::safe_fopen(m->lin_pass1_filename.c_str(), "wb");
2716
0
                m->pipeline_stack.activate(
2717
0
                    pp_pass1,
2718
0
                    std::make_unique<Pl_StdioFile>("linearization pass1", lin_pass1_file));
2719
7.67k
            } else {
2720
7.67k
                m->pipeline_stack.activate(pp_pass1, true);
2721
7.67k
            }
2722
7.67k
            if (m->deterministic_id) {
2723
0
                m->pipeline_stack.activate_md5(pp_md5);
2724
0
            }
2725
7.67k
        }
2726
2727
        // Part 1: header
2728
2729
14.9k
        writeHeader();
2730
2731
        // Part 2: linearization parameter dictionary.  Save enough space to write real dictionary.
2732
        // 200 characters is enough space if all numerical values in the parameter dictionary that
2733
        // contain offsets are 20 digits long plus a few extra characters for safety.  The entire
2734
        // linearization parameter dictionary must appear within the first 1024 characters of the
2735
        // file.
2736
2737
14.9k
        qpdf_offset_t pos = m->pipeline->getCount();
2738
14.9k
        openObject(lindict_id);
2739
14.9k
        write("<<");
2740
14.9k
        if (pass == 2) {
2741
7.29k
            std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages();
2742
7.29k
            int first_page_object = m->obj[pages.at(0)].renumber;
2743
2744
7.29k
            write(" /Linearized 1 /L ").write(file_size + hint_length);
2745
            // Implementation note 121 states that a space is mandatory after this open bracket.
2746
7.29k
            write(" /H [ ").write(m->new_obj[hint_id].xref.getOffset()).write(" ");
2747
7.29k
            write(hint_length);
2748
7.29k
            write(" ] /O ").write(first_page_object);
2749
7.29k
            write(" /E ").write(part6_end_offset + hint_length);
2750
7.29k
            write(" /N ").write(pages.size());
2751
7.29k
            write(" /T ").write(space_before_zero + hint_length);
2752
7.29k
        }
2753
14.9k
        write(" >>");
2754
14.9k
        closeObject(lindict_id);
2755
14.9k
        static int const pad = 200;
2756
14.9k
        write(QIntC::to_size(pos - m->pipeline->getCount() + pad), ' ').write("\n");
2757
2758
        // If the user supplied any additional header text, write it here after the linearization
2759
        // parameter dictionary.
2760
14.9k
        write(m->extra_header_text);
2761
2762
        // Part 3: first page cross reference table and trailer.
2763
2764
14.9k
        qpdf_offset_t first_xref_offset = m->pipeline->getCount();
2765
14.9k
        qpdf_offset_t hint_offset = 0;
2766
14.9k
        if (pass == 2) {
2767
7.29k
            hint_offset = m->new_obj[hint_id].xref.getOffset();
2768
7.29k
        }
2769
14.9k
        if (need_xref_stream) {
2770
            // Must pad here too.
2771
1.33k
            if (pass == 1) {
2772
                // Set first_half_max_obj_offset to a value large enough to force four bytes to be
2773
                // reserved for each file offset.  This would provide adequate space for the xref
2774
                // stream as long as the last object in page 1 starts with in the first 4 GB of the
2775
                // file, which is extremely likely.  In the second pass, we will know the actual
2776
                // value for this, but it's okay if it's smaller.
2777
743
                first_half_max_obj_offset = 1 << 25;
2778
743
            }
2779
1.33k
            pos = m->pipeline->getCount();
2780
1.33k
            writeXRefStream(
2781
1.33k
                first_half_xref,
2782
1.33k
                first_half_end,
2783
1.33k
                first_half_max_obj_offset,
2784
1.33k
                t_lin_first,
2785
1.33k
                first_half_start,
2786
1.33k
                first_half_end,
2787
1.33k
                first_trailer_size,
2788
1.33k
                hint_length + second_xref_offset,
2789
1.33k
                hint_id,
2790
1.33k
                hint_offset,
2791
1.33k
                hint_length,
2792
1.33k
                (pass == 1),
2793
1.33k
                pass);
2794
1.33k
            qpdf_offset_t endpos = m->pipeline->getCount();
2795
1.33k
            if (pass == 1) {
2796
                // Pad so we have enough room for the real xref stream.
2797
742
                write(calculateXrefStreamPadding(endpos - pos), ' ');
2798
742
                first_xref_end = m->pipeline->getCount();
2799
742
            } else {
2800
                // Pad so that the next object starts at the same place as in pass 1.
2801
596
                write(QIntC::to_size(first_xref_end - endpos), ' ');
2802
2803
596
                if (m->pipeline->getCount() != first_xref_end) {
2804
0
                    throw std::logic_error(
2805
0
                        "insufficient padding for first pass xref stream; first_xref_end=" +
2806
0
                        std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos));
2807
0
                }
2808
596
            }
2809
1.33k
            write("\n");
2810
13.6k
        } else {
2811
13.6k
            writeXRefTable(
2812
13.6k
                t_lin_first,
2813
13.6k
                first_half_start,
2814
13.6k
                first_half_end,
2815
13.6k
                first_trailer_size,
2816
13.6k
                hint_length + second_xref_offset,
2817
13.6k
                (pass == 1),
2818
13.6k
                hint_id,
2819
13.6k
                hint_offset,
2820
13.6k
                hint_length,
2821
13.6k
                pass);
2822
13.6k
            write("startxref\n0\n%%EOF\n");
2823
13.6k
        }
2824
2825
        // Parts 4 through 9
2826
2827
213k
        for (auto const& cur_object: m->object_queue) {
2828
213k
            if (cur_object.getObjectID() == part6_end_marker) {
2829
14.8k
                first_half_max_obj_offset = m->pipeline->getCount();
2830
14.8k
            }
2831
213k
            writeObject(cur_object);
2832
213k
            if (cur_object.getObjectID() == part4_end_marker) {
2833
14.9k
                if (m->encryption) {
2834
14.9k
                    writeEncryptionDictionary();
2835
14.9k
                }
2836
14.9k
                if (pass == 1) {
2837
7.62k
                    m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2838
7.62k
                } else {
2839
                    // Part 5: hint stream
2840
7.29k
                    write(hint_buffer);
2841
7.29k
                }
2842
14.9k
            }
2843
213k
            if (cur_object.getObjectID() == part6_end_marker) {
2844
14.7k
                part6_end_offset = m->pipeline->getCount();
2845
14.7k
            }
2846
213k
        }
2847
2848
        // Part 10: overflow hint stream -- not used
2849
2850
        // Part 11: main cross reference table and trailer
2851
2852
14.9k
        second_xref_offset = m->pipeline->getCount();
2853
14.9k
        if (need_xref_stream) {
2854
1.18k
            pos = m->pipeline->getCount();
2855
1.18k
            space_before_zero = writeXRefStream(
2856
1.18k
                second_half_xref,
2857
1.18k
                second_half_end,
2858
1.18k
                second_xref_offset,
2859
1.18k
                t_lin_second,
2860
1.18k
                0,
2861
1.18k
                second_half_end,
2862
1.18k
                second_trailer_size,
2863
1.18k
                0,
2864
1.18k
                0,
2865
1.18k
                0,
2866
1.18k
                0,
2867
1.18k
                (pass == 1),
2868
1.18k
                pass);
2869
1.18k
            qpdf_offset_t endpos = m->pipeline->getCount();
2870
2871
1.18k
            if (pass == 1) {
2872
                // Pad so we have enough room for the real xref stream.  See comments for previous
2873
                // xref stream on how we calculate the padding.
2874
595
                write(calculateXrefStreamPadding(endpos - pos), ' ').write("\n");
2875
595
                second_xref_end = m->pipeline->getCount();
2876
595
            } else {
2877
                // Make the file size the same.
2878
594
                auto padding =
2879
594
                    QIntC::to_size(second_xref_end + hint_length - 1 - m->pipeline->getCount());
2880
594
                write(padding, ' ').write("\n");
2881
2882
                // If this assertion fails, maybe we didn't have enough padding above.
2883
594
                if (m->pipeline->getCount() != second_xref_end + hint_length) {
2884
0
                    throw std::logic_error(
2885
0
                        "count mismatch after xref stream; possible insufficient padding?");
2886
0
                }
2887
594
            }
2888
13.7k
        } else {
2889
13.7k
            space_before_zero = writeXRefTable(
2890
13.7k
                t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass);
2891
13.7k
        }
2892
14.9k
        write("startxref\n").write(first_xref_offset).write("\n%%EOF\n");
2893
2894
14.9k
        if (pass == 1) {
2895
7.29k
            if (m->deterministic_id) {
2896
0
                QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1);
2897
0
                computeDeterministicIDData();
2898
0
                pp_md5.pop();
2899
0
            }
2900
2901
            // Close first pass pipeline
2902
7.29k
            file_size = m->pipeline->getCount();
2903
7.29k
            pp_pass1.pop();
2904
2905
            // Save hint offset since it will be set to zero by calling openObject.
2906
7.29k
            qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset();
2907
2908
            // Write hint stream to a buffer
2909
7.29k
            {
2910
7.29k
                auto pp_hint = m->pipeline_stack.activate(hint_buffer);
2911
7.29k
                writeHintStream(hint_id);
2912
7.29k
            }
2913
7.29k
            hint_length = QIntC::to_offset(hint_buffer.size());
2914
2915
            // Restore hint offset
2916
7.29k
            m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1);
2917
7.29k
            if (lin_pass1_file) {
2918
                // Write some debugging information
2919
0
                fprintf(
2920
0
                    lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str());
2921
0
                fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str());
2922
0
                fprintf(
2923
0
                    lin_pass1_file,
2924
0
                    "%% second_xref_offset=%s\n",
2925
0
                    std::to_string(second_xref_offset).c_str());
2926
0
                fprintf(
2927
0
                    lin_pass1_file,
2928
0
                    "%% second_xref_end=%s\n",
2929
0
                    std::to_string(second_xref_end).c_str());
2930
0
                fclose(lin_pass1_file);
2931
0
                lin_pass1_file = nullptr;
2932
0
            }
2933
7.29k
        }
2934
14.9k
    }
2935
8.11k
}
2936
2937
void
2938
QPDFWriter::enqueueObjectsStandard()
2939
0
{
2940
0
    if (m->preserve_unreferenced_objects) {
2941
0
        QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard");
2942
0
        for (auto const& oh: m->pdf.getAllObjects()) {
2943
0
            enqueueObject(oh);
2944
0
        }
2945
0
    }
2946
2947
    // Put root first on queue.
2948
0
    QPDFObjectHandle trailer = getTrimmedTrailer();
2949
0
    enqueueObject(trailer.getKey("/Root"));
2950
2951
    // Next place any other objects referenced from the trailer dictionary into the queue, handling
2952
    // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op.
2953
0
    for (auto& item: trailer.as_dictionary()) {
2954
0
        if (!item.second.null()) {
2955
0
            enqueueObject(item.second);
2956
0
        }
2957
0
    }
2958
0
}
2959
2960
void
2961
QPDFWriter::enqueueObjectsPCLm()
2962
0
{
2963
    // Image transform stream content for page strip images. Each of this new stream has to come
2964
    // after every page image strip written in the pclm file.
2965
0
    std::string image_transform_content = "q /image Do Q\n";
2966
2967
    // enqueue all pages first
2968
0
    std::vector<QPDFObjectHandle> all = m->pdf.getAllPages();
2969
0
    for (auto& page: all) {
2970
        // enqueue page
2971
0
        enqueueObject(page);
2972
2973
        // enqueue page contents stream
2974
0
        enqueueObject(page.getKey("/Contents"));
2975
2976
        // enqueue all the strips for each page
2977
0
        QPDFObjectHandle strips = page.getKey("/Resources").getKey("/XObject");
2978
0
        for (auto& image: strips.as_dictionary()) {
2979
0
            if (!image.second.null()) {
2980
0
                enqueueObject(image.second);
2981
0
                enqueueObject(QPDFObjectHandle::newStream(&m->pdf, image_transform_content));
2982
0
            }
2983
0
        }
2984
0
    }
2985
2986
    // Put root in queue.
2987
0
    QPDFObjectHandle trailer = getTrimmedTrailer();
2988
0
    enqueueObject(trailer.getKey("/Root"));
2989
0
}
2990
2991
void
2992
QPDFWriter::indicateProgress(bool decrement, bool finished)
2993
320k
{
2994
320k
    if (decrement) {
2995
51.7k
        --m->events_seen;
2996
51.7k
        return;
2997
51.7k
    }
2998
2999
268k
    ++m->events_seen;
3000
3001
268k
    if (!m->progress_reporter.get()) {
3002
268k
        return;
3003
268k
    }
3004
3005
0
    if (finished || (m->events_seen >= m->next_progress_report)) {
3006
0
        int percentage =
3007
0
            (finished ? 100
3008
0
                 : m->next_progress_report == 0
3009
0
                 ? 0
3010
0
                 : std::min(99, 1 + ((100 * m->events_seen) / m->events_expected)));
3011
0
        m->progress_reporter->reportProgress(percentage);
3012
0
    }
3013
0
    int increment = std::max(1, (m->events_expected / 100));
3014
0
    while (m->events_seen >= m->next_progress_report) {
3015
0
        m->next_progress_report += increment;
3016
0
    }
3017
0
}
3018
3019
void
3020
QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr)
3021
0
{
3022
0
    m->progress_reporter = pr;
3023
0
}
3024
3025
void
3026
QPDFWriter::writeStandard()
3027
0
{
3028
0
    auto pp_md5 = m->pipeline_stack.popper();
3029
0
    if (m->deterministic_id) {
3030
0
        m->pipeline_stack.activate_md5(pp_md5);
3031
0
    }
3032
3033
    // Start writing
3034
3035
0
    writeHeader();
3036
0
    write(m->extra_header_text);
3037
3038
0
    if (m->pclm) {
3039
0
        enqueueObjectsPCLm();
3040
0
    } else {
3041
0
        enqueueObjectsStandard();
3042
0
    }
3043
3044
    // Now start walking queue, outputting each object.
3045
0
    while (m->object_queue_front < m->object_queue.size()) {
3046
0
        QPDFObjectHandle cur_object = m->object_queue.at(m->object_queue_front);
3047
0
        ++m->object_queue_front;
3048
0
        writeObject(cur_object);
3049
0
    }
3050
3051
    // Write out the encryption dictionary, if any
3052
0
    if (m->encryption) {
3053
0
        writeEncryptionDictionary();
3054
0
    }
3055
3056
    // Now write out xref.  next_objid is now the number of objects.
3057
0
    qpdf_offset_t xref_offset = m->pipeline->getCount();
3058
0
    if (m->object_stream_to_objects.empty()) {
3059
        // Write regular cross-reference table
3060
0
        writeXRefTable(t_normal, 0, m->next_objid - 1, m->next_objid);
3061
0
    } else {
3062
        // Write cross-reference stream.
3063
0
        int xref_id = m->next_objid++;
3064
0
        writeXRefStream(
3065
0
            xref_id, xref_id, xref_offset, t_normal, 0, m->next_objid - 1, m->next_objid);
3066
0
    }
3067
0
    write("startxref\n").write(xref_offset).write("\n%%EOF\n");
3068
3069
0
    if (m->deterministic_id) {
3070
0
        QTC::TC(
3071
0
            "qpdf",
3072
0
            "QPDFWriter standard deterministic ID",
3073
0
            m->object_stream_to_objects.empty() ? 0 : 1);
3074
0
    }
3075
0
}