Coverage Report

Created: 2025-08-03 06:19

/src/qpdf/libqpdf/QPDFWriter.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/assert_debug.h>
2
3
#include <qpdf/qpdf-config.h> // include early for large file support
4
5
#include <qpdf/QPDFWriter_private.hh>
6
7
#include <qpdf/MD5.hh>
8
#include <qpdf/Pl_AES_PDF.hh>
9
#include <qpdf/Pl_Flate.hh>
10
#include <qpdf/Pl_MD5.hh>
11
#include <qpdf/Pl_PNGFilter.hh>
12
#include <qpdf/Pl_RC4.hh>
13
#include <qpdf/Pl_StdioFile.hh>
14
#include <qpdf/Pl_String.hh>
15
#include <qpdf/QIntC.hh>
16
#include <qpdf/QPDFObjectHandle_private.hh>
17
#include <qpdf/QPDFObject_private.hh>
18
#include <qpdf/QPDF_private.hh>
19
#include <qpdf/QTC.hh>
20
#include <qpdf/QUtil.hh>
21
#include <qpdf/RC4.hh>
22
#include <qpdf/Util.hh>
23
24
#include <algorithm>
25
#include <cstdlib>
26
#include <stdexcept>
27
28
using namespace std::literals;
29
using namespace qpdf;
30
31
QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default)
32
0
{
33
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
34
0
}
35
36
QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) :
37
0
    handler(handler)
38
0
{
39
0
}
40
41
QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT
42
                                                                  // (modernize-use-equals-default)
43
0
{
44
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
45
0
}
46
47
void
48
QPDFWriter::FunctionProgressReporter::reportProgress(int progress)
49
0
{
50
0
    handler(progress);
51
0
}
52
53
namespace
54
{
55
    class Pl_stack
56
    {
57
        // A pipeline Popper is normally returned by Pl_stack::activate, or, if necessary, a
58
        // reference to a Popper instance can be passed into activate. When the Popper goes out of
59
        // scope, the pipeline stack is popped. This causes finish to be called on the current
60
        // pipeline and the pipeline stack to be popped until the top of stack is a previous active
61
        // top of stack and restores the pipeline to that point. It deletes any pipelines that it
62
        // pops.
63
        class Popper
64
        {
65
            friend class Pl_stack;
66
67
          public:
68
            Popper() = default;
69
            Popper(Popper const&) = delete;
70
            Popper(Popper&& other) noexcept
71
0
            {
72
0
                // For MSVC, default pops the stack
73
0
                if (this != &other) {
74
0
                    stack = other.stack;
75
0
                    stack_id = other.stack_id;
76
0
                    other.stack = nullptr;
77
0
                    other.stack_id = 0;
78
0
                };
79
0
            }
80
            Popper& operator=(Popper const&) = delete;
81
            Popper&
82
            operator=(Popper&& other) noexcept
83
0
            {
84
0
                // For MSVC, default pops the stack
85
0
                if (this != &other) {
86
0
                    stack = other.stack;
87
0
                    stack_id = other.stack_id;
88
0
                    other.stack = nullptr;
89
0
                    other.stack_id = 0;
90
0
                };
91
0
                return *this;
92
0
            }
93
94
            ~Popper();
95
96
            // Manually pop pipeline from the pipeline stack.
97
            void pop();
98
99
          private:
100
            Popper(Pl_stack& stack) :
101
761k
                stack(&stack)
102
761k
            {
103
761k
            }
104
105
            Pl_stack* stack{nullptr};
106
            unsigned long stack_id{0};
107
        };
108
109
      public:
110
        Pl_stack(pl::Count*& top) :
111
70.4k
            top(top)
112
70.4k
        {
113
70.4k
        }
114
115
        Popper
116
        popper()
117
94.6k
        {
118
94.6k
            return {*this};
119
94.6k
        }
120
121
        void
122
        initialize(Pipeline* p)
123
70.4k
        {
124
70.4k
            auto c = std::make_unique<pl::Count>(++last_id, p);
125
70.4k
            top = c.get();
126
70.4k
            stack.emplace_back(std::move(c));
127
70.4k
        }
128
129
        Popper
130
        activate(std::string& str)
131
495k
        {
132
495k
            Popper pp{*this};
133
495k
            activate(pp, str);
134
495k
            return pp;
135
495k
        }
136
137
        void
138
        activate(Popper& pp, std::string& str)
139
495k
        {
140
495k
            activate(pp, false, &str, nullptr);
141
495k
        }
142
143
        void
144
        activate(Popper& pp, std::unique_ptr<Pipeline> next)
145
0
        {
146
0
            count_buffer.clear();
147
0
            activate(pp, false, &count_buffer, std::move(next));
148
0
        }
149
150
        Popper
151
        activate(
152
            bool discard = false,
153
            std::string* str = nullptr,
154
            std::unique_ptr<Pipeline> next = nullptr)
155
171k
        {
156
171k
            Popper pp{*this};
157
171k
            activate(pp, discard, str, std::move(next));
158
171k
            return pp;
159
171k
        }
160
161
        void
162
        activate(
163
            Popper& pp,
164
            bool discard = false,
165
            std::string* str = nullptr,
166
            std::unique_ptr<Pipeline> next = nullptr)
167
697k
        {
168
697k
            std::unique_ptr<pl::Count> c;
169
697k
            if (next) {
170
0
                c = std::make_unique<pl::Count>(++last_id, count_buffer, std::move(next));
171
697k
            } else if (discard) {
172
202k
                c = std::make_unique<pl::Count>(++last_id, nullptr);
173
495k
            } else if (!str) {
174
0
                c = std::make_unique<pl::Count>(++last_id, top);
175
495k
            } else {
176
495k
                c = std::make_unique<pl::Count>(++last_id, *str);
177
495k
            }
178
697k
            pp.stack_id = last_id;
179
697k
            top = c.get();
180
697k
            stack.emplace_back(std::move(c));
181
697k
        }
182
        void
183
        activate_md5(Popper& pp)
184
32.6k
        {
185
32.6k
            qpdf_assert_debug(!md5_pipeline);
186
32.6k
            qpdf_assert_debug(md5_id == 0);
187
32.6k
            qpdf_assert_debug(top->getCount() == 0);
188
32.6k
            md5_pipeline = std::make_unique<Pl_MD5>("qpdf md5", top);
189
32.6k
            md5_pipeline->persistAcrossFinish(true);
190
            // Special case code in pop clears m->md5_pipeline upon deletion.
191
32.6k
            auto c = std::make_unique<pl::Count>(++last_id, md5_pipeline.get());
192
32.6k
            pp.stack_id = last_id;
193
32.6k
            md5_id = last_id;
194
32.6k
            top = c.get();
195
32.6k
            stack.emplace_back(std::move(c));
196
32.6k
        }
197
198
        // Return the hex digest and disable the MD5 pipeline.
199
        std::string
200
        hex_digest()
201
31.5k
        {
202
31.5k
            qpdf_assert_debug(md5_pipeline);
203
31.5k
            auto digest = md5_pipeline->getHexDigest();
204
31.5k
            md5_pipeline->enable(false);
205
31.5k
            return digest;
206
31.5k
        }
207
208
        void
209
        clear_buffer()
210
0
        {
211
0
            count_buffer.clear();
212
0
        }
213
214
      private:
215
        void
216
        pop(unsigned long stack_id)
217
761k
        {
218
761k
            if (!stack_id) {
219
31.2k
                return;
220
31.2k
            }
221
730k
            qpdf_assert_debug(stack.size() >= 2);
222
730k
            top->finish();
223
730k
            qpdf_assert_debug(stack.back().get() == top);
224
            // It used to be possible for this assertion to fail if writeLinearized exits by
225
            // exception when deterministic ID. There are no longer any cases in which two
226
            // dynamically allocated pipeline Popper objects ever exist at the same time, so the
227
            // assertion will fail if they get popped out of order from automatic destruction.
228
730k
            qpdf_assert_debug(top->id() == stack_id);
229
730k
            if (stack_id == md5_id) {
230
32.6k
                md5_pipeline = nullptr;
231
32.6k
                md5_id = 0;
232
32.6k
            }
233
730k
            stack.pop_back();
234
730k
            top = stack.back().get();
235
730k
        }
236
237
        std::vector<std::unique_ptr<pl::Count>> stack;
238
        pl::Count*& top;
239
        std::unique_ptr<Pl_MD5> md5_pipeline{nullptr};
240
        unsigned long last_id{0};
241
        unsigned long md5_id{0};
242
        std::string count_buffer;
243
    };
244
} // namespace
245
246
Pl_stack::Popper::~Popper()
247
761k
{
248
761k
    if (stack) {
249
718k
        stack->pop(stack_id);
250
718k
    }
251
761k
}
252
253
void
254
Pl_stack::Popper::pop()
255
43.2k
{
256
43.2k
    if (stack) {
257
43.2k
        stack->pop(stack_id);
258
43.2k
    }
259
43.2k
    stack_id = 0;
260
43.2k
    stack = nullptr;
261
43.2k
}
262
263
class QPDFWriter::Members
264
{
265
    friend class QPDFWriter;
266
267
  public:
268
    ~Members();
269
270
  private:
271
    Members(QPDF& pdf);
272
    Members(Members const&) = delete;
273
274
    QPDF& pdf;
275
    QPDFObjGen root_og{-1, 0};
276
    char const* filename{"unspecified"};
277
    FILE* file{nullptr};
278
    bool close_file{false};
279
    std::unique_ptr<Pl_Buffer> buffer_pipeline{nullptr};
280
    Buffer* output_buffer{nullptr};
281
    bool normalize_content_set{false};
282
    bool normalize_content{false};
283
    bool compress_streams{true};
284
    bool compress_streams_set{false};
285
    qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_generalized};
286
    bool stream_decode_level_set{false};
287
    bool recompress_flate{false};
288
    bool qdf_mode{false};
289
    bool preserve_unreferenced_objects{false};
290
    bool newline_before_endstream{false};
291
    bool static_id{false};
292
    bool suppress_original_object_ids{false};
293
    bool direct_stream_lengths{true};
294
    bool preserve_encryption{true};
295
    bool linearized{false};
296
    bool pclm{false};
297
    qpdf_object_stream_e object_stream_mode{qpdf_o_preserve};
298
299
    std::unique_ptr<QPDF::EncryptionData> encryption;
300
    std::string encryption_key;
301
    bool encrypt_use_aes{false};
302
303
    std::string id1; // for /ID key of
304
    std::string id2; // trailer dictionary
305
    std::string final_pdf_version;
306
    int final_extension_level{0};
307
    std::string min_pdf_version;
308
    int min_extension_level{0};
309
    std::string forced_pdf_version;
310
    int forced_extension_level{0};
311
    std::string extra_header_text;
312
    int encryption_dict_objid{0};
313
    std::string cur_data_key;
314
    std::unique_ptr<Pipeline> file_pl;
315
    qpdf::pl::Count* pipeline{nullptr};
316
    std::vector<QPDFObjectHandle> object_queue;
317
    size_t object_queue_front{0};
318
    QPDFWriter::ObjTable obj;
319
    QPDFWriter::NewObjTable new_obj;
320
    int next_objid{1};
321
    int cur_stream_length_id{0};
322
    size_t cur_stream_length{0};
323
    bool added_newline{false};
324
    size_t max_ostream_index{0};
325
    std::set<QPDFObjGen> normalized_streams;
326
    std::map<QPDFObjGen, int> page_object_to_seq;
327
    std::map<QPDFObjGen, int> contents_to_page_seq;
328
    std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects;
329
    Pl_stack pipeline_stack;
330
    bool deterministic_id{false};
331
    std::string deterministic_id_data;
332
    bool did_write_setup{false};
333
334
    // For linearization only
335
    std::string lin_pass1_filename;
336
337
    // For progress reporting
338
    std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter;
339
    int events_expected{0};
340
    int events_seen{0};
341
    int next_progress_report{0};
342
};
343
344
QPDFWriter::Members::Members(QPDF& pdf) :
345
72.0k
    pdf(pdf),
346
72.0k
    root_og(pdf.getRoot().getObjGen().isIndirect() ? pdf.getRoot().getObjGen() : QPDFObjGen(-1, 0)),
347
72.0k
    pipeline_stack(pipeline)
348
72.0k
{
349
72.0k
}
350
351
QPDFWriter::Members::~Members()
352
70.4k
{
353
70.4k
    if (file && close_file) {
354
0
        fclose(file);
355
0
    }
356
70.4k
    delete output_buffer;
357
70.4k
}
358
359
QPDFWriter::QPDFWriter(QPDF& pdf) :
360
72.0k
    m(new Members(pdf))
361
72.0k
{
362
72.0k
}
363
364
QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) :
365
0
    m(new Members(pdf))
366
0
{
367
0
    setOutputFilename(filename);
368
0
}
369
370
QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) :
371
0
    m(new Members(pdf))
372
0
{
373
0
    setOutputFile(description, file, close_file);
374
0
}
375
376
void
377
QPDFWriter::setOutputFilename(char const* filename)
378
0
{
379
0
    char const* description = filename;
380
0
    FILE* f = nullptr;
381
0
    bool close_file = false;
382
0
    if (filename == nullptr) {
383
0
        description = "standard output";
384
0
        QTC::TC("qpdf", "QPDFWriter write to stdout");
385
0
        f = stdout;
386
0
        QUtil::binary_stdout();
387
0
    } else {
388
0
        QTC::TC("qpdf", "QPDFWriter write to file");
389
0
        f = QUtil::safe_fopen(filename, "wb+");
390
0
        close_file = true;
391
0
    }
392
0
    setOutputFile(description, f, close_file);
393
0
}
394
395
void
396
QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file)
397
0
{
398
0
    m->filename = description;
399
0
    m->file = file;
400
0
    m->close_file = close_file;
401
0
    m->file_pl = std::make_unique<Pl_StdioFile>("qpdf output", file);
402
0
    m->pipeline_stack.initialize(m->file_pl.get());
403
0
}
404
405
void
406
QPDFWriter::setOutputMemory()
407
0
{
408
0
    m->filename = "memory buffer";
409
0
    m->buffer_pipeline = std::make_unique<Pl_Buffer>("qpdf output");
410
0
    m->pipeline_stack.initialize(m->buffer_pipeline.get());
411
0
}
412
413
Buffer*
414
QPDFWriter::getBuffer()
415
0
{
416
0
    Buffer* result = m->output_buffer;
417
0
    m->output_buffer = nullptr;
418
0
    return result;
419
0
}
420
421
std::shared_ptr<Buffer>
422
QPDFWriter::getBufferSharedPointer()
423
0
{
424
0
    return std::shared_ptr<Buffer>(getBuffer());
425
0
}
426
427
void
428
QPDFWriter::setOutputPipeline(Pipeline* p)
429
70.4k
{
430
70.4k
    m->filename = "custom pipeline";
431
70.4k
    m->pipeline_stack.initialize(p);
432
70.4k
}
433
434
void
435
QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode)
436
35.3k
{
437
35.3k
    m->object_stream_mode = mode;
438
35.3k
}
439
440
void
441
QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode)
442
0
{
443
0
    switch (mode) {
444
0
    case qpdf_s_uncompress:
445
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
446
0
        m->compress_streams = false;
447
0
        break;
448
449
0
    case qpdf_s_preserve:
450
0
        m->stream_decode_level = qpdf_dl_none;
451
0
        m->compress_streams = false;
452
0
        break;
453
454
0
    case qpdf_s_compress:
455
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
456
0
        m->compress_streams = true;
457
0
        break;
458
0
    }
459
0
    m->stream_decode_level_set = true;
460
0
    m->compress_streams_set = true;
461
0
}
462
463
void
464
QPDFWriter::setCompressStreams(bool val)
465
0
{
466
0
    m->compress_streams = val;
467
0
    m->compress_streams_set = true;
468
0
}
469
470
void
471
QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
472
70.4k
{
473
70.4k
    m->stream_decode_level = val;
474
70.4k
    m->stream_decode_level_set = true;
475
70.4k
}
476
477
void
478
QPDFWriter::setRecompressFlate(bool val)
479
0
{
480
0
    m->recompress_flate = val;
481
0
}
482
483
void
484
QPDFWriter::setContentNormalization(bool val)
485
0
{
486
0
    m->normalize_content_set = true;
487
0
    m->normalize_content = val;
488
0
}
489
490
void
491
QPDFWriter::setQDFMode(bool val)
492
17.6k
{
493
17.6k
    m->qdf_mode = val;
494
17.6k
}
495
496
void
497
QPDFWriter::setPreserveUnreferencedObjects(bool val)
498
0
{
499
0
    m->preserve_unreferenced_objects = val;
500
0
}
501
502
void
503
QPDFWriter::setNewlineBeforeEndstream(bool val)
504
0
{
505
0
    m->newline_before_endstream = val;
506
0
}
507
508
void
509
QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level)
510
123k
{
511
123k
    bool set_version = false;
512
123k
    bool set_extension_level = false;
513
123k
    if (m->min_pdf_version.empty()) {
514
69.9k
        set_version = true;
515
69.9k
        set_extension_level = true;
516
69.9k
    } else {
517
53.4k
        int old_major = 0;
518
53.4k
        int old_minor = 0;
519
53.4k
        int min_major = 0;
520
53.4k
        int min_minor = 0;
521
53.4k
        parseVersion(version, old_major, old_minor);
522
53.4k
        parseVersion(m->min_pdf_version, min_major, min_minor);
523
53.4k
        int compare = compareVersions(old_major, old_minor, min_major, min_minor);
524
53.4k
        if (compare > 0) {
525
4.00k
            QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1);
526
4.00k
            set_version = true;
527
4.00k
            set_extension_level = true;
528
49.4k
        } else if (compare == 0) {
529
3.69k
            if (extension_level > m->min_extension_level) {
530
12
                QTC::TC("qpdf", "QPDFWriter increasing extension level");
531
12
                set_extension_level = true;
532
12
            }
533
3.69k
        }
534
53.4k
    }
535
536
123k
    if (set_version) {
537
73.9k
        m->min_pdf_version = version;
538
73.9k
    }
539
123k
    if (set_extension_level) {
540
73.9k
        m->min_extension_level = extension_level;
541
73.9k
    }
542
123k
}
543
544
void
545
QPDFWriter::setMinimumPDFVersion(PDFVersion const& v)
546
0
{
547
0
    std::string version;
548
0
    int extension_level;
549
0
    v.getVersion(version, extension_level);
550
0
    setMinimumPDFVersion(version, extension_level);
551
0
}
552
553
void
554
QPDFWriter::forcePDFVersion(std::string const& version, int extension_level)
555
0
{
556
0
    m->forced_pdf_version = version;
557
0
    m->forced_extension_level = extension_level;
558
0
}
559
560
void
561
QPDFWriter::setExtraHeaderText(std::string const& text)
562
0
{
563
0
    m->extra_header_text = text;
564
0
    if (!m->extra_header_text.empty() && *m->extra_header_text.rbegin() != '\n') {
565
0
        QTC::TC("qpdf", "QPDFWriter extra header text add newline");
566
0
        m->extra_header_text += "\n";
567
0
    } else {
568
0
        QTC::TC("qpdf", "QPDFWriter extra header text no newline");
569
0
    }
570
0
}
571
572
void
573
QPDFWriter::setStaticID(bool val)
574
33.4k
{
575
33.4k
    m->static_id = val;
576
33.4k
}
577
578
void
579
QPDFWriter::setDeterministicID(bool val)
580
36.9k
{
581
36.9k
    m->deterministic_id = val;
582
36.9k
}
583
584
void
585
QPDFWriter::setStaticAesIV(bool val)
586
0
{
587
0
    if (val) {
588
0
        Pl_AES_PDF::useStaticIV();
589
0
    }
590
0
}
591
592
void
593
QPDFWriter::setSuppressOriginalObjectIDs(bool val)
594
0
{
595
0
    m->suppress_original_object_ids = val;
596
0
}
597
598
void
599
QPDFWriter::setPreserveEncryption(bool val)
600
0
{
601
0
    m->preserve_encryption = val;
602
0
}
603
604
void
605
QPDFWriter::setLinearization(bool val)
606
36.6k
{
607
36.6k
    m->linearized = val;
608
36.6k
    if (val) {
609
36.6k
        m->pclm = false;
610
36.6k
    }
611
36.6k
}
612
613
void
614
QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
615
0
{
616
0
    m->lin_pass1_filename = filename;
617
0
}
618
619
void
620
QPDFWriter::setPCLm(bool val)
621
0
{
622
0
    m->pclm = val;
623
0
    if (val) {
624
0
        m->linearized = false;
625
0
    }
626
0
}
627
628
void
629
QPDFWriter::setR2EncryptionParametersInsecure(
630
    char const* user_password,
631
    char const* owner_password,
632
    bool allow_print,
633
    bool allow_modify,
634
    bool allow_extract,
635
    bool allow_annotate)
636
0
{
637
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(1, 2, 5, true);
638
0
    if (!allow_print) {
639
0
        m->encryption->setP(3, false);
640
0
    }
641
0
    if (!allow_modify) {
642
0
        m->encryption->setP(4, false);
643
0
    }
644
0
    if (!allow_extract) {
645
0
        m->encryption->setP(5, false);
646
0
    }
647
0
    if (!allow_annotate) {
648
0
        m->encryption->setP(6, false);
649
0
    }
650
0
    setEncryptionParameters(user_password, owner_password);
651
0
}
652
653
void
654
QPDFWriter::setR3EncryptionParametersInsecure(
655
    char const* user_password,
656
    char const* owner_password,
657
    bool allow_accessibility,
658
    bool allow_extract,
659
    bool allow_assemble,
660
    bool allow_annotate_and_form,
661
    bool allow_form_filling,
662
    bool allow_modify_other,
663
    qpdf_r3_print_e print)
664
16.1k
{
665
16.1k
    m->encryption = std::make_unique<QPDF::EncryptionData>(2, 3, 16, true);
666
16.1k
    interpretR3EncryptionParameters(
667
16.1k
        allow_accessibility,
668
16.1k
        allow_extract,
669
16.1k
        allow_assemble,
670
16.1k
        allow_annotate_and_form,
671
16.1k
        allow_form_filling,
672
16.1k
        allow_modify_other,
673
16.1k
        print,
674
16.1k
        qpdf_r3m_all);
675
16.1k
    setEncryptionParameters(user_password, owner_password);
676
16.1k
}
677
678
void
679
QPDFWriter::setR4EncryptionParametersInsecure(
680
    char const* user_password,
681
    char const* owner_password,
682
    bool allow_accessibility,
683
    bool allow_extract,
684
    bool allow_assemble,
685
    bool allow_annotate_and_form,
686
    bool allow_form_filling,
687
    bool allow_modify_other,
688
    qpdf_r3_print_e print,
689
    bool encrypt_metadata,
690
    bool use_aes)
691
0
{
692
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(4, 4, 16, encrypt_metadata);
693
0
    m->encrypt_use_aes = use_aes;
694
0
    interpretR3EncryptionParameters(
695
0
        allow_accessibility,
696
0
        allow_extract,
697
0
        allow_assemble,
698
0
        allow_annotate_and_form,
699
0
        allow_form_filling,
700
0
        allow_modify_other,
701
0
        print,
702
0
        qpdf_r3m_all);
703
0
    setEncryptionParameters(user_password, owner_password);
704
0
}
705
706
void
707
QPDFWriter::setR5EncryptionParameters(
708
    char const* user_password,
709
    char const* owner_password,
710
    bool allow_accessibility,
711
    bool allow_extract,
712
    bool allow_assemble,
713
    bool allow_annotate_and_form,
714
    bool allow_form_filling,
715
    bool allow_modify_other,
716
    qpdf_r3_print_e print,
717
    bool encrypt_metadata)
718
0
{
719
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(5, 5, 32, encrypt_metadata);
720
0
    m->encrypt_use_aes = true;
721
0
    interpretR3EncryptionParameters(
722
0
        allow_accessibility,
723
0
        allow_extract,
724
0
        allow_assemble,
725
0
        allow_annotate_and_form,
726
0
        allow_form_filling,
727
0
        allow_modify_other,
728
0
        print,
729
0
        qpdf_r3m_all);
730
0
    setEncryptionParameters(user_password, owner_password);
731
0
}
732
733
void
734
QPDFWriter::setR6EncryptionParameters(
735
    char const* user_password,
736
    char const* owner_password,
737
    bool allow_accessibility,
738
    bool allow_extract,
739
    bool allow_assemble,
740
    bool allow_annotate_and_form,
741
    bool allow_form_filling,
742
    bool allow_modify_other,
743
    qpdf_r3_print_e print,
744
    bool encrypt_metadata)
745
17.3k
{
746
17.3k
    m->encryption = std::make_unique<QPDF::EncryptionData>(5, 6, 32, encrypt_metadata);
747
17.3k
    interpretR3EncryptionParameters(
748
17.3k
        allow_accessibility,
749
17.3k
        allow_extract,
750
17.3k
        allow_assemble,
751
17.3k
        allow_annotate_and_form,
752
17.3k
        allow_form_filling,
753
17.3k
        allow_modify_other,
754
17.3k
        print,
755
17.3k
        qpdf_r3m_all);
756
17.3k
    m->encrypt_use_aes = true;
757
17.3k
    setEncryptionParameters(user_password, owner_password);
758
17.3k
}
759
760
void
761
QPDFWriter::interpretR3EncryptionParameters(
762
    bool allow_accessibility,
763
    bool allow_extract,
764
    bool allow_assemble,
765
    bool allow_annotate_and_form,
766
    bool allow_form_filling,
767
    bool allow_modify_other,
768
    qpdf_r3_print_e print,
769
    qpdf_r3_modify_e modify)
770
33.4k
{
771
    // Acrobat 5 security options:
772
773
    // Checkboxes:
774
    //   Enable Content Access for the Visually Impaired
775
    //   Allow Content Copying and Extraction
776
777
    // Allowed changes menu:
778
    //   None
779
    //   Only Document Assembly
780
    //   Only Form Field Fill-in or Signing
781
    //   Comment Authoring, Form Field Fill-in or Signing
782
    //   General Editing, Comment and Form Field Authoring
783
784
    // Allowed printing menu:
785
    //   None
786
    //   Low Resolution
787
    //   Full printing
788
789
    // Meanings of bits in P when R >= 3
790
    //
791
    //  3: low-resolution printing
792
    //  4: document modification except as controlled by 6, 9, and 11
793
    //  5: extraction
794
    //  6: add/modify annotations (comment), fill in forms
795
    //     if 4+6 are set, also allows modification of form fields
796
    //  9: fill in forms even if 6 is clear
797
    // 10: accessibility; ignored by readers, should always be set
798
    // 11: document assembly even if 4 is clear
799
    // 12: high-resolution printing
800
33.4k
    if (!allow_accessibility && m->encryption->getR() <= 3) {
801
        // Bit 10 is deprecated and should always be set.  This used to mean accessibility.  There
802
        // is no way to disable accessibility with R > 3.
803
0
        m->encryption->setP(10, false);
804
0
    }
805
33.4k
    if (!allow_extract) {
806
0
        m->encryption->setP(5, false);
807
0
    }
808
809
33.4k
    switch (print) {
810
0
    case qpdf_r3p_none:
811
0
        m->encryption->setP(3, false); // any printing
812
0
        [[fallthrough]];
813
0
    case qpdf_r3p_low:
814
0
        m->encryption->setP(12, false); // high resolution printing
815
0
        [[fallthrough]];
816
33.4k
    case qpdf_r3p_full:
817
33.4k
        break;
818
        // no default so gcc warns for missing cases
819
33.4k
    }
820
821
    // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full
822
    // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're
823
    // stuck with it. See also allow checks below to control the bits individually.
824
825
    // NOT EXERCISED IN TEST SUITE
826
33.4k
    switch (modify) {
827
0
    case qpdf_r3m_none:
828
0
        m->encryption->setP(11, false); // document assembly
829
0
        [[fallthrough]];
830
0
    case qpdf_r3m_assembly:
831
0
        m->encryption->setP(9, false); // filling in form fields
832
0
        [[fallthrough]];
833
0
    case qpdf_r3m_form:
834
0
        m->encryption->setP(6, false); // modify annotations, fill in form fields
835
0
        [[fallthrough]];
836
0
    case qpdf_r3m_annotate:
837
0
        m->encryption->setP(4, false); // other modifications
838
0
        [[fallthrough]];
839
33.4k
    case qpdf_r3m_all:
840
33.4k
        break;
841
        // no default so gcc warns for missing cases
842
33.4k
    }
843
    // END NOT EXERCISED IN TEST SUITE
844
845
33.4k
    if (!allow_assemble) {
846
0
        m->encryption->setP(11, false);
847
0
    }
848
33.4k
    if (!allow_annotate_and_form) {
849
0
        m->encryption->setP(6, false);
850
0
    }
851
33.4k
    if (!allow_form_filling) {
852
0
        m->encryption->setP(9, false);
853
0
    }
854
33.4k
    if (!allow_modify_other) {
855
0
        m->encryption->setP(4, false);
856
0
    }
857
33.4k
}
858
859
void
860
QPDFWriter::setEncryptionParameters(char const* user_password, char const* owner_password)
861
33.4k
{
862
33.4k
    generateID();
863
33.4k
    m->encryption->setId1(m->id1);
864
33.4k
    m->encryption_key = m->encryption->compute_parameters(user_password, owner_password);
865
33.4k
    setEncryptionMinimumVersion();
866
33.4k
}
867
868
void
869
QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
870
19.2k
{
871
19.2k
    m->preserve_encryption = false;
872
19.2k
    QPDFObjectHandle trailer = qpdf.getTrailer();
873
19.2k
    if (trailer.hasKey("/Encrypt")) {
874
109
        generateID();
875
109
        m->id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue();
876
109
        QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
877
109
        int V = encrypt.getKey("/V").getIntValueAsInt();
878
109
        int key_len = 5;
879
109
        if (V > 1) {
880
0
            key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8;
881
0
        }
882
109
        const bool encrypt_metadata =
883
109
            encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool()
884
109
            ? encrypt.getKey("/EncryptMetadata").getBoolValue()
885
109
            : true;
886
109
        if (V >= 4) {
887
            // When copying encryption parameters, use AES even if the original file did not.
888
            // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of
889
            // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF
890
            // all potentially having different values.
891
0
            m->encrypt_use_aes = true;
892
0
        }
893
109
        QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", encrypt_metadata ? 0 : 1);
894
109
        QTC::TC("qpdf", "QPDFWriter copy use_aes", m->encrypt_use_aes ? 0 : 1);
895
896
109
        m->encryption = std::make_unique<QPDF::EncryptionData>(
897
109
            V,
898
109
            encrypt.getKey("/R").getIntValueAsInt(),
899
109
            key_len,
900
109
            static_cast<int>(encrypt.getKey("/P").getIntValue()),
901
109
            encrypt.getKey("/O").getStringValue(),
902
109
            encrypt.getKey("/U").getStringValue(),
903
109
            V < 5 ? "" : encrypt.getKey("/OE").getStringValue(),
904
109
            V < 5 ? "" : encrypt.getKey("/UE").getStringValue(),
905
109
            V < 5 ? "" : encrypt.getKey("/Perms").getStringValue(),
906
109
            m->id1, // m->id1 == the other file's id1
907
109
            encrypt_metadata);
908
109
        m->encryption_key = V >= 5
909
109
            ? qpdf.getEncryptionKey()
910
109
            : m->encryption->compute_encryption_key(qpdf.getPaddedUserPassword());
911
109
        setEncryptionMinimumVersion();
912
109
    }
913
19.2k
}
914
915
void
916
QPDFWriter::disableIncompatibleEncryption(int major, int minor, int extension_level)
917
0
{
918
0
    if (!m->encryption) {
919
0
        return;
920
0
    }
921
0
    if (compareVersions(major, minor, 1, 3) < 0) {
922
0
        m->encryption = nullptr;
923
0
        return;
924
0
    }
925
0
    int V = m->encryption->getV();
926
0
    int R = m->encryption->getR();
927
0
    if (compareVersions(major, minor, 1, 4) < 0) {
928
0
        if (V > 1 || R > 2) {
929
0
            m->encryption = nullptr;
930
0
        }
931
0
    } else if (compareVersions(major, minor, 1, 5) < 0) {
932
0
        if (V > 2 || R > 3) {
933
0
            m->encryption = nullptr;
934
0
        }
935
0
    } else if (compareVersions(major, minor, 1, 6) < 0) {
936
0
        if (m->encrypt_use_aes) {
937
0
            m->encryption = nullptr;
938
0
        }
939
0
    } else if (
940
0
        (compareVersions(major, minor, 1, 7) < 0) ||
941
0
        ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) {
942
0
        if (V >= 5 || R >= 5) {
943
0
            m->encryption = nullptr;
944
0
        }
945
0
    }
946
947
0
    if (!m->encryption) {
948
0
        QTC::TC("qpdf", "QPDFWriter forced version disabled encryption");
949
0
    }
950
0
}
951
952
void
953
QPDFWriter::parseVersion(std::string const& version, int& major, int& minor) const
954
106k
{
955
106k
    major = QUtil::string_to_int(version.c_str());
956
106k
    minor = 0;
957
106k
    size_t p = version.find('.');
958
106k
    if ((p != std::string::npos) && (version.length() > p)) {
959
106k
        minor = QUtil::string_to_int(version.substr(p + 1).c_str());
960
106k
    }
961
106k
    std::string tmp = std::to_string(major) + "." + std::to_string(minor);
962
106k
    if (tmp != version) {
963
        // The version number in the input is probably invalid. This happens with some files that
964
        // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately
965
        // QPDFWriter doesn't have a way to give a warning, so we just ignore this case.
966
663
    }
967
106k
}
968
969
int
970
QPDFWriter::compareVersions(int major1, int minor1, int major2, int minor2) const
971
53.4k
{
972
53.4k
    if (major1 < major2) {
973
617
        return -1;
974
52.7k
    } else if (major1 > major2) {
975
798
        return 1;
976
51.9k
    } else if (minor1 < minor2) {
977
45.0k
        return -1;
978
45.0k
    } else if (minor1 > minor2) {
979
3.20k
        return 1;
980
3.69k
    } else {
981
3.69k
        return 0;
982
3.69k
    }
983
53.4k
}
984
985
void
986
QPDFWriter::setEncryptionMinimumVersion()
987
33.4k
{
988
33.4k
    auto const R = m->encryption->getR();
989
33.4k
    if (R >= 6) {
990
17.3k
        setMinimumPDFVersion("1.7", 8);
991
17.3k
    } else if (R == 5) {
992
0
        setMinimumPDFVersion("1.7", 3);
993
16.0k
    } else if (R == 4) {
994
0
        setMinimumPDFVersion(m->encrypt_use_aes ? "1.6" : "1.5");
995
16.0k
    } else if (R == 3) {
996
16.0k
        setMinimumPDFVersion("1.4");
997
16.0k
    } else {
998
0
        setMinimumPDFVersion("1.3");
999
0
    }
1000
33.4k
}
1001
1002
void
1003
QPDFWriter::setDataKey(int objid)
1004
1.05M
{
1005
1.05M
    if (m->encryption) {
1006
617k
        m->cur_data_key = QPDF::compute_data_key(
1007
617k
            m->encryption_key,
1008
617k
            objid,
1009
617k
            0,
1010
617k
            m->encrypt_use_aes,
1011
617k
            m->encryption->getV(),
1012
617k
            m->encryption->getR());
1013
617k
    }
1014
1.05M
}
1015
1016
unsigned int
1017
QPDFWriter::bytesNeeded(long long n)
1018
185k
{
1019
185k
    unsigned int bytes = 0;
1020
428k
    while (n) {
1021
243k
        ++bytes;
1022
243k
        n >>= 8;
1023
243k
    }
1024
185k
    return bytes;
1025
185k
}
1026
1027
void
1028
QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes)
1029
3.39M
{
1030
3.39M
    if (bytes > sizeof(unsigned long long)) {
1031
0
        throw std::logic_error("QPDFWriter::writeBinary called with too many bytes");
1032
0
    }
1033
3.39M
    unsigned char data[sizeof(unsigned long long)];
1034
8.33M
    for (unsigned int i = 0; i < bytes; ++i) {
1035
4.93M
        data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff);
1036
4.93M
        val >>= 8;
1037
4.93M
    }
1038
3.39M
    m->pipeline->write(data, bytes);
1039
3.39M
}
1040
1041
QPDFWriter&
1042
QPDFWriter::write(std::string_view str)
1043
62.6M
{
1044
62.6M
    m->pipeline->write(str);
1045
62.6M
    return *this;
1046
62.6M
}
1047
1048
QPDFWriter&
1049
QPDFWriter::write(std::integral auto val)
1050
6.42M
{
1051
6.42M
    m->pipeline->write(std::to_string(val));
1052
6.42M
    return *this;
1053
6.42M
}
_ZN10QPDFWriter5writeITkNSt3__18integralEiEERS_T_
Line
Count
Source
1050
4.34M
{
1051
4.34M
    m->pipeline->write(std::to_string(val));
1052
4.34M
    return *this;
1053
4.34M
}
_ZN10QPDFWriter5writeITkNSt3__18integralExEERS_T_
Line
Count
Source
1050
1.41M
{
1051
1.41M
    m->pipeline->write(std::to_string(val));
1052
1.41M
    return *this;
1053
1.41M
}
_ZN10QPDFWriter5writeITkNSt3__18integralEmEERS_T_
Line
Count
Source
1050
476k
{
1051
476k
    m->pipeline->write(std::to_string(val));
1052
476k
    return *this;
1053
476k
}
_ZN10QPDFWriter5writeITkNSt3__18integralEjEERS_T_
Line
Count
Source
1050
184k
{
1051
184k
    m->pipeline->write(std::to_string(val));
1052
184k
    return *this;
1053
184k
}
1054
1055
QPDFWriter&
1056
QPDFWriter::write(size_t count, char c)
1057
139k
{
1058
139k
    m->pipeline->write(count, c);
1059
139k
    return *this;
1060
139k
}
1061
1062
QPDFWriter&
1063
QPDFWriter::write_name(std::string const& str)
1064
4.49M
{
1065
4.49M
    m->pipeline->write(Name::normalize(str));
1066
4.49M
    return *this;
1067
4.49M
}
1068
1069
QPDFWriter&
1070
QPDFWriter::write_string(std::string const& str, bool force_binary)
1071
361k
{
1072
361k
    m->pipeline->write(QPDF_String(str).unparse(force_binary));
1073
361k
    return *this;
1074
361k
}
1075
1076
template <typename... Args>
1077
QPDFWriter&
1078
QPDFWriter::write_qdf(Args&&... args)
1079
3.72M
{
1080
3.72M
    if (m->qdf_mode) {
1081
455k
        m->pipeline->write(std::forward<Args>(args)...);
1082
455k
    }
1083
3.72M
    return *this;
1084
3.72M
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1079
2.83M
{
1080
2.83M
    if (m->qdf_mode) {
1081
380k
        m->pipeline->write(std::forward<Args>(args)...);
1082
380k
    }
1083
2.83M
    return *this;
1084
2.83M
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [3]>(char const (&) [3])
Line
Count
Source
1079
645k
{
1080
645k
    if (m->qdf_mode) {
1081
41.0k
        m->pipeline->write(std::forward<Args>(args)...);
1082
41.0k
    }
1083
645k
    return *this;
1084
645k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1079
149k
{
1080
149k
    if (m->qdf_mode) {
1081
16.8k
        m->pipeline->write(std::forward<Args>(args)...);
1082
16.8k
    }
1083
149k
    return *this;
1084
149k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [11]>(char const (&) [11])
Line
Count
Source
1079
92.9k
{
1080
92.9k
    if (m->qdf_mode) {
1081
17.4k
        m->pipeline->write(std::forward<Args>(args)...);
1082
17.4k
    }
1083
92.9k
    return *this;
1084
92.9k
}
1085
1086
template <typename... Args>
1087
QPDFWriter&
1088
QPDFWriter::write_no_qdf(Args&&... args)
1089
1.41M
{
1090
1.41M
    if (!m->qdf_mode) {
1091
1.26M
        m->pipeline->write(std::forward<Args>(args)...);
1092
1.26M
    }
1093
1.41M
    return *this;
1094
1.41M
}
QPDFWriter& QPDFWriter::write_no_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1089
1.26M
{
1090
1.26M
    if (!m->qdf_mode) {
1091
1.13M
        m->pipeline->write(std::forward<Args>(args)...);
1092
1.13M
    }
1093
1.26M
    return *this;
1094
1.26M
}
QPDFWriter& QPDFWriter::write_no_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1089
149k
{
1090
149k
    if (!m->qdf_mode) {
1091
132k
        m->pipeline->write(std::forward<Args>(args)...);
1092
132k
    }
1093
149k
    return *this;
1094
149k
}
1095
1096
void
1097
QPDFWriter::adjustAESStreamLength(size_t& length)
1098
346k
{
1099
346k
    if (m->encryption && !m->cur_data_key.empty() && m->encrypt_use_aes) {
1100
        // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16.  It will
1101
        // also be prepended by 16 bits of random data.
1102
104k
        length += 32 - (length & 0xf);
1103
104k
    }
1104
346k
}
1105
1106
QPDFWriter&
1107
QPDFWriter::write_encrypted(std::string_view str)
1108
345k
{
1109
345k
    if (!(m->encryption && !m->cur_data_key.empty())) {
1110
199k
        write(str);
1111
199k
    } else if (m->encrypt_use_aes) {
1112
103k
        write(pl::pipe<Pl_AES_PDF>(str, true, m->cur_data_key));
1113
103k
    } else {
1114
42.3k
        write(pl::pipe<Pl_RC4>(str, m->cur_data_key));
1115
42.3k
    }
1116
1117
345k
    return *this;
1118
345k
}
1119
1120
void
1121
QPDFWriter::computeDeterministicIDData()
1122
31.5k
{
1123
31.5k
    if (!m->id2.empty()) {
1124
        // Can't happen in the code
1125
0
        throw std::logic_error(
1126
0
            "Deterministic ID computation enabled after ID generation has already occurred.");
1127
0
    }
1128
31.5k
    qpdf_assert_debug(m->deterministic_id_data.empty());
1129
31.5k
    m->deterministic_id_data = m->pipeline_stack.hex_digest();
1130
31.5k
}
1131
1132
int
1133
QPDFWriter::openObject(int objid)
1134
1.27M
{
1135
1.27M
    if (objid == 0) {
1136
15.5k
        objid = m->next_objid++;
1137
15.5k
    }
1138
1.27M
    m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount());
1139
1.27M
    write(objid).write(" 0 obj\n");
1140
1.27M
    return objid;
1141
1.27M
}
1142
1143
void
1144
QPDFWriter::closeObject(int objid)
1145
1.27M
{
1146
    // Write a newline before endobj as it makes the file easier to repair.
1147
1.27M
    write("\nendobj\n").write_qdf("\n");
1148
1.27M
    auto& new_obj = m->new_obj[objid];
1149
1.27M
    new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset();
1150
1.27M
}
1151
1152
void
1153
QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen og)
1154
426k
{
1155
426k
    int objid = og.getObj();
1156
426k
    if ((og.getGen() != 0) || (!m->object_stream_to_objects.contains(objid))) {
1157
        // This is not an object stream.
1158
402k
        return;
1159
402k
    }
1160
1161
    // Reserve numbers for the objects that belong to this object stream.
1162
339k
    for (auto const& iter: m->object_stream_to_objects[objid]) {
1163
339k
        m->obj[iter].renumber = m->next_objid++;
1164
339k
    }
1165
24.1k
}
1166
1167
void
1168
QPDFWriter::enqueueObject(QPDFObjectHandle object)
1169
27.0M
{
1170
27.0M
    if (object.isIndirect()) {
1171
        // This owner check can only be done for indirect objects. It is possible for a direct
1172
        // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle from
1173
        // one file was insert into another file without copying. Doing that is safe even if the
1174
        // original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from its owner.
1175
2.55M
        if (object.getOwningQPDF() != &(m->pdf)) {
1176
0
            QTC::TC("qpdf", "QPDFWriter foreign object");
1177
0
            throw std::logic_error(
1178
0
                "QPDFObjectHandle from different QPDF found while writing.  Use "
1179
0
                "QPDF::copyForeignObject to add objects from another file.");
1180
0
        }
1181
1182
2.55M
        if (m->qdf_mode && object.isStreamOfType("/XRef")) {
1183
            // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so
1184
            // will confuse fix-qdf, which expects to see only one XRef stream at the end of the
1185
            // file. This case can occur when creating a QDF from a file with object streams when
1186
            // preserving unreferenced objects since the old cross reference streams are not
1187
            // actually referenced by object number.
1188
1.97k
            QTC::TC("qpdf", "QPDFWriter ignore XRef in qdf mode");
1189
1.97k
            return;
1190
1.97k
        }
1191
1192
2.55M
        QPDFObjGen og = object.getObjGen();
1193
2.55M
        auto& obj = m->obj[og];
1194
1195
2.55M
        if (obj.renumber == 0) {
1196
736k
            if (obj.object_stream > 0) {
1197
                // This is in an object stream.  Don't process it here.  Instead, enqueue the object
1198
                // stream.  Object streams always have generation 0.
1199
                // Detect loops by storing invalid object ID -1, which will get overwritten later.
1200
3.49k
                obj.renumber = -1;
1201
3.49k
                enqueueObject(m->pdf.getObject(obj.object_stream, 0));
1202
733k
            } else {
1203
733k
                m->object_queue.push_back(object);
1204
733k
                obj.renumber = m->next_objid++;
1205
1206
733k
                if ((og.getGen() == 0) && m->object_stream_to_objects.contains(og.getObj())) {
1207
                    // For linearized files, uncompressed objects go at end, and we take care of
1208
                    // assigning numbers to them elsewhere.
1209
23.8k
                    if (!m->linearized) {
1210
2.40k
                        assignCompressedObjectNumbers(og);
1211
2.40k
                    }
1212
709k
                } else if ((!m->direct_stream_lengths) && object.isStream()) {
1213
                    // reserve next object ID for length
1214
53.0k
                    ++m->next_objid;
1215
53.0k
                }
1216
733k
            }
1217
1.81M
        } else if (obj.renumber == -1) {
1218
            // This can happen if a specially constructed file indicates that an object stream is
1219
            // inside itself.
1220
481
        }
1221
2.55M
        return;
1222
24.4M
    } else if (!m->linearized) {
1223
24.4M
        if (object.isArray()) {
1224
12.4M
            for (auto& item: object.as_array()) {
1225
12.4M
                enqueueObject(item);
1226
12.4M
            }
1227
22.2M
        } else if (auto d = object.as_dictionary()) {
1228
22.2M
            for (auto const& item: d) {
1229
3.57M
                if (!item.second.null()) {
1230
3.24M
                    enqueueObject(item.second);
1231
3.24M
                }
1232
3.57M
            }
1233
22.2M
        }
1234
24.4M
    } else {
1235
        // ignore
1236
155
    }
1237
27.0M
}
1238
1239
void
1240
QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
1241
20.0M
{
1242
20.0M
    if (!m->linearized) {
1243
10.8M
        enqueueObject(child);
1244
10.8M
    }
1245
20.0M
    if (child.isIndirect()) {
1246
2.22M
        write(m->obj[child].renumber).write(" 0 R");
1247
17.8M
    } else {
1248
17.8M
        unparseObject(child, level, flags);
1249
17.8M
    }
1250
20.0M
}
1251
1252
void
1253
QPDFWriter::writeTrailer(
1254
    trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass)
1255
150k
{
1256
150k
    QPDFObjectHandle trailer = getTrimmedTrailer();
1257
150k
    if (xref_stream) {
1258
61.8k
        m->cur_data_key.clear();
1259
88.3k
    } else {
1260
88.3k
        write("trailer <<");
1261
88.3k
    }
1262
150k
    write_qdf("\n");
1263
150k
    if (which == t_lin_second) {
1264
57.8k
        write(" /Size ").write(size);
1265
92.3k
    } else {
1266
222k
        for (auto const& [key, value]: trailer.as_dictionary()) {
1267
222k
            if (value.null()) {
1268
47.2k
                continue;
1269
47.2k
            }
1270
174k
            write_qdf("  ").write_no_qdf(" ").write_name(key).write(" ");
1271
174k
            if (key == "/Size") {
1272
20.9k
                write(size);
1273
20.9k
                if (which == t_lin_first) {
1274
13.6k
                    write(" /Prev ");
1275
13.6k
                    qpdf_offset_t pos = m->pipeline->getCount();
1276
13.6k
                    write(prev).write(QIntC::to_size(pos - m->pipeline->getCount() + 21), ' ');
1277
13.6k
                }
1278
154k
            } else {
1279
154k
                unparseChild(value, 1, 0);
1280
154k
            }
1281
174k
            write_qdf("\n");
1282
174k
        }
1283
92.3k
    }
1284
1285
    // Write ID
1286
150k
    write_qdf(" ").write(" /ID [");
1287
150k
    if (linearization_pass == 1) {
1288
59.6k
        std::string original_id1 = getOriginalID1();
1289
59.6k
        if (original_id1.empty()) {
1290
53.5k
            write("<00000000000000000000000000000000>");
1291
53.5k
        } else {
1292
            // Write a string of zeroes equal in length to the representation of the original ID.
1293
            // While writing the original ID would have the same number of bytes, it would cause a
1294
            // change to the deterministic ID generated by older versions of the software that
1295
            // hard-coded the length of the ID to 16 bytes.
1296
6.06k
            size_t len = QPDF_String(original_id1).unparse(true).length() - 2;
1297
6.06k
            write("<").write(len, '0').write(">");
1298
6.06k
        }
1299
59.6k
        write("<00000000000000000000000000000000>");
1300
90.5k
    } else {
1301
90.5k
        if (linearization_pass == 0 && m->deterministic_id) {
1302
17.1k
            computeDeterministicIDData();
1303
17.1k
        }
1304
90.5k
        generateID();
1305
90.5k
        write_string(m->id1, true).write_string(m->id2, true);
1306
90.5k
    }
1307
150k
    write("]");
1308
1309
150k
    if (which != t_lin_second) {
1310
        // Write reference to encryption dictionary
1311
91.7k
        if (m->encryption) {
1312
45.6k
            write(" /Encrypt ").write(m->encryption_dict_objid).write(" 0 R");
1313
45.6k
        }
1314
91.7k
    }
1315
1316
150k
    write_qdf("\n>>").write_no_qdf(" >>");
1317
150k
}
1318
1319
bool
1320
QPDFWriter::willFilterStream(
1321
    QPDFObjectHandle stream,
1322
    bool& compress_stream,  // out only
1323
    bool& is_root_metadata, // out only
1324
    std::string* stream_data)
1325
378k
{
1326
378k
    compress_stream = false;
1327
378k
    is_root_metadata = false;
1328
1329
378k
    QPDFObjGen old_og = stream.getObjGen();
1330
378k
    QPDFObjectHandle stream_dict = stream.getDict();
1331
1332
378k
    if (stream.isRootMetadata()) {
1333
2.03k
        is_root_metadata = true;
1334
2.03k
    }
1335
378k
    bool filter = stream.isDataModified() || m->compress_streams || m->stream_decode_level;
1336
378k
    bool filter_on_write = stream.getFilterOnWrite();
1337
378k
    if (!filter_on_write) {
1338
55.0k
        QTC::TC("qpdf", "QPDFWriter getFilterOnWrite false");
1339
55.0k
        filter = false;
1340
55.0k
    }
1341
378k
    if (filter_on_write && m->compress_streams) {
1342
        // Don't filter if the stream is already compressed with FlateDecode. This way we don't make
1343
        // it worse if the original file used a better Flate algorithm, and we don't spend time and
1344
        // CPU cycles uncompressing and recompressing stuff. This can be overridden with
1345
        // setRecompressFlate(true).
1346
270k
        QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
1347
270k
        if (!m->recompress_flate && !stream.isDataModified() && filter_obj.isName() &&
1348
270k
            (filter_obj.getName() == "/FlateDecode" || filter_obj.getName() == "/Fl")) {
1349
56.1k
            QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode");
1350
56.1k
            filter = false;
1351
56.1k
        }
1352
270k
    }
1353
378k
    bool normalize = false;
1354
378k
    bool uncompress = false;
1355
378k
    if (filter_on_write && is_root_metadata &&
1356
378k
        (!m->encryption || !m->encryption->getEncryptMetadata())) {
1357
851
        QTC::TC("qpdf", "QPDFWriter not compressing metadata");
1358
851
        filter = true;
1359
851
        compress_stream = false;
1360
851
        uncompress = true;
1361
377k
    } else if (filter_on_write && m->normalize_content && m->normalized_streams.contains(old_og)) {
1362
6.77k
        normalize = true;
1363
6.77k
        filter = true;
1364
370k
    } else if (filter_on_write && filter && m->compress_streams) {
1365
213k
        compress_stream = true;
1366
213k
        QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");
1367
213k
    }
1368
1369
    // Disable compression for empty streams to improve compatibility
1370
378k
    if (stream_dict.getKey("/Length").isInteger() &&
1371
378k
        stream_dict.getKey("/Length").getIntValue() == 0) {
1372
10.5k
        filter = true;
1373
10.5k
        compress_stream = false;
1374
10.5k
    }
1375
1376
378k
    bool filtered = false;
1377
454k
    for (bool first_attempt: {true, false}) {
1378
454k
        auto pp_stream_data = stream_data ? m->pipeline_stack.activate(*stream_data)
1379
454k
                                          : m->pipeline_stack.activate(true);
1380
1381
454k
        try {
1382
454k
            filtered = stream.pipeStreamData(
1383
454k
                m->pipeline,
1384
454k
                !filter ? 0
1385
454k
                        : ((normalize ? qpdf_ef_normalize : 0) |
1386
273k
                           (compress_stream ? qpdf_ef_compress : 0)),
1387
454k
                !filter ? qpdf_dl_none : (uncompress ? qpdf_dl_all : m->stream_decode_level),
1388
454k
                false,
1389
454k
                first_attempt);
1390
454k
            if (filter && !filtered) {
1391
                // Try again
1392
76.4k
                filter = false;
1393
76.4k
                stream.setFilterOnWrite(false);
1394
378k
            } else {
1395
378k
                break;
1396
378k
            }
1397
454k
        } catch (std::runtime_error& e) {
1398
533
            if (filter && first_attempt) {
1399
437
                stream.warnIfPossible("error while getting stream data: "s + e.what());
1400
437
                stream.warnIfPossible("qpdf will attempt to write the damaged stream unchanged");
1401
437
                filter = false;
1402
437
                stream.setFilterOnWrite(false);
1403
437
                continue;
1404
437
            }
1405
96
            throw std::runtime_error(
1406
96
                "error while getting stream data for " + stream.unparse() + ": " + e.what());
1407
533
        }
1408
76.4k
        if (stream_data) {
1409
45.7k
            stream_data->clear();
1410
45.7k
        }
1411
76.4k
    }
1412
378k
    if (!filtered) {
1413
180k
        compress_stream = false;
1414
180k
    }
1415
378k
    return filtered;
1416
378k
}
1417
1418
void
1419
QPDFWriter::unparseObject(
1420
    QPDFObjectHandle object, int level, int flags, size_t stream_length, bool compress)
1421
19.6M
{
1422
19.6M
    QPDFObjGen old_og = object.getObjGen();
1423
19.6M
    int child_flags = flags & ~f_stream;
1424
19.6M
    if (level < 0) {
1425
0
        throw std::logic_error("invalid level in QPDFWriter::unparseObject");
1426
0
    }
1427
    // For non-qdf, "indent" and "indent_large" are a single space between tokens. For qdf, they
1428
    // include the preceding newline.
1429
19.6M
    std::string indent_large = " ";
1430
19.6M
    if (m->qdf_mode) {
1431
9.66M
        indent_large.append(static_cast<size_t>(2 * (level + 1)), ' ');
1432
9.66M
        indent_large[0] = '\n';
1433
9.66M
    }
1434
19.6M
    std::string_view indent{indent_large.data(), m->qdf_mode ? indent_large.size() - 2 : 1};
1435
1436
19.6M
    if (auto const tc = object.getTypeCode(); tc == ::ot_array) {
1437
        // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the
1438
        // [ in the /H key of the linearization parameter dictionary.  We'll do this unconditionally
1439
        // for all arrays because it looks nicer and doesn't make the files that much bigger.
1440
691k
        write("[");
1441
15.5M
        for (auto const& item: object.as_array()) {
1442
15.5M
            write(indent_large);
1443
15.5M
            unparseChild(item, level + 1, child_flags);
1444
15.5M
        }
1445
691k
        write(indent).write("]");
1446
18.9M
    } else if (tc == ::ot_dictionary) {
1447
        // Handle special cases for specific dictionaries.
1448
1449
1.42M
        if (old_og == m->root_og) {
1450
            // Extensions dictionaries.
1451
1452
            // We have one of several cases:
1453
            //
1454
            // * We need ADBE
1455
            //    - We already have Extensions
1456
            //       - If it has the right ADBE, preserve it
1457
            //       - Otherwise, replace ADBE
1458
            //    - We don't have Extensions: create one from scratch
1459
            // * We don't want ADBE
1460
            //    - We already have Extensions
1461
            //       - If it only has ADBE, remove it
1462
            //       - If it has other things, keep those and remove ADBE
1463
            //    - We have no extensions: no action required
1464
            //
1465
            // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE
1466
            // dictionary, so we can modify in place.
1467
1468
90.6k
            auto extensions = object.getKey("/Extensions");
1469
90.6k
            const bool has_extensions = extensions.isDictionary();
1470
90.6k
            const bool need_extensions_adbe = m->final_extension_level > 0;
1471
1472
90.6k
            if (has_extensions || need_extensions_adbe) {
1473
                // Make a shallow copy of this object so we can modify it safely without affecting
1474
                // the original. This code has logic to skip certain keys in agreement with
1475
                // prepareFileForWrite and with skip_stream_parameters so that replacing them
1476
                // doesn't leave unreferenced objects in the output. We can use unsafeShallowCopy
1477
                // here because all we are doing is removing or replacing top-level keys.
1478
32.4k
                object = object.unsafeShallowCopy();
1479
32.4k
                if (!has_extensions) {
1480
28.1k
                    extensions = QPDFObjectHandle();
1481
28.1k
                }
1482
1483
32.4k
                const bool have_extensions_adbe = extensions && extensions.hasKey("/ADBE");
1484
32.4k
                const bool have_extensions_other =
1485
32.4k
                    extensions && extensions.getKeys().size() > (have_extensions_adbe ? 1u : 0u);
1486
1487
32.4k
                if (need_extensions_adbe) {
1488
29.2k
                    if (!(have_extensions_other || have_extensions_adbe)) {
1489
                        // We need Extensions and don't have it.  Create it here.
1490
28.2k
                        QTC::TC("qpdf", "QPDFWriter create Extensions", m->qdf_mode ? 0 : 1);
1491
28.2k
                        extensions = object.replaceKeyAndGetNew(
1492
28.2k
                            "/Extensions", QPDFObjectHandle::newDictionary());
1493
28.2k
                    }
1494
29.2k
                } else if (!have_extensions_other) {
1495
                    // We have Extensions dictionary and don't want one.
1496
1.86k
                    if (have_extensions_adbe) {
1497
1.75k
                        QTC::TC("qpdf", "QPDFWriter remove existing Extensions");
1498
1.75k
                        object.removeKey("/Extensions");
1499
1.75k
                        extensions = QPDFObjectHandle(); // uninitialized
1500
1.75k
                    }
1501
1.86k
                }
1502
1503
32.4k
                if (extensions) {
1504
30.7k
                    QTC::TC("qpdf", "QPDFWriter preserve Extensions");
1505
30.7k
                    QPDFObjectHandle adbe = extensions.getKey("/ADBE");
1506
30.7k
                    if (adbe.isDictionary() &&
1507
30.7k
                        adbe.getKey("/BaseVersion").isNameAndEquals("/" + m->final_pdf_version) &&
1508
30.7k
                        adbe.getKey("/ExtensionLevel").isInteger() &&
1509
30.7k
                        (adbe.getKey("/ExtensionLevel").getIntValue() ==
1510
518
                         m->final_extension_level)) {
1511
513
                        QTC::TC("qpdf", "QPDFWriter preserve ADBE");
1512
30.1k
                    } else {
1513
30.1k
                        if (need_extensions_adbe) {
1514
28.7k
                            extensions.replaceKey(
1515
28.7k
                                "/ADBE",
1516
28.7k
                                QPDFObjectHandle::parse(
1517
28.7k
                                    "<< /BaseVersion /" + m->final_pdf_version +
1518
28.7k
                                    " /ExtensionLevel " + std::to_string(m->final_extension_level) +
1519
28.7k
                                    " >>"));
1520
28.7k
                        } else {
1521
1.43k
                            QTC::TC("qpdf", "QPDFWriter remove ADBE");
1522
1.43k
                            extensions.removeKey("/ADBE");
1523
1.43k
                        }
1524
30.1k
                    }
1525
30.7k
                }
1526
32.4k
            }
1527
90.6k
        }
1528
1529
        // Stream dictionaries.
1530
1531
1.42M
        if (flags & f_stream) {
1532
            // Suppress /Length since we will write it manually
1533
1534
            // Make a shallow copy of this object so we can modify it safely without affecting the
1535
            // original. This code has logic to skip certain keys in agreement with
1536
            // prepareFileForWrite and with skip_stream_parameters so that replacing them doesn't
1537
            // leave unreferenced objects in the output. We can use unsafeShallowCopy here because
1538
            // all we are doing is removing or replacing top-level keys.
1539
277k
            object = object.unsafeShallowCopy();
1540
1541
277k
            object.removeKey("/Length");
1542
1543
            // If /DecodeParms is an empty list, remove it.
1544
277k
            if (object.getKey("/DecodeParms").isArray() &&
1545
277k
                (0 == object.getKey("/DecodeParms").getArrayNItems())) {
1546
109
                QTC::TC("qpdf", "QPDFWriter remove empty DecodeParms");
1547
109
                object.removeKey("/DecodeParms");
1548
109
            }
1549
1550
277k
            if (flags & f_filtered) {
1551
                // We will supply our own filter and decode parameters.
1552
145k
                object.removeKey("/Filter");
1553
145k
                object.removeKey("/DecodeParms");
1554
145k
            } else {
1555
                // Make sure, no matter what else we have, that we don't have /Crypt in the output
1556
                // filters.
1557
131k
                QPDFObjectHandle filter = object.getKey("/Filter");
1558
131k
                QPDFObjectHandle decode_parms = object.getKey("/DecodeParms");
1559
131k
                if (filter.isOrHasName("/Crypt")) {
1560
2.00k
                    if (filter.isName()) {
1561
152
                        object.removeKey("/Filter");
1562
152
                        object.removeKey("/DecodeParms");
1563
1.85k
                    } else {
1564
1.85k
                        int idx = -1;
1565
100k
                        for (int i = 0; i < filter.getArrayNItems(); ++i) {
1566
100k
                            QPDFObjectHandle item = filter.getArrayItem(i);
1567
100k
                            if (item.isNameAndEquals("/Crypt")) {
1568
1.85k
                                idx = i;
1569
1.85k
                                break;
1570
1.85k
                            }
1571
100k
                        }
1572
1.85k
                        if (idx >= 0) {
1573
                            // If filter is an array, then the code in QPDF_Stream has already
1574
                            // verified that DecodeParms and Filters are arrays of the same length,
1575
                            // but if they weren't for some reason, eraseItem does type and bounds
1576
                            // checking.
1577
1.85k
                            QTC::TC("qpdf", "QPDFWriter remove Crypt");
1578
1.85k
                            filter.eraseItem(idx);
1579
1.85k
                            decode_parms.eraseItem(idx);
1580
1.85k
                        }
1581
1.85k
                    }
1582
2.00k
                }
1583
131k
            }
1584
277k
        }
1585
1586
1.42M
        write("<<");
1587
1588
5.20M
        for (auto const& [key, value]: object.as_dictionary()) {
1589
5.20M
            if (!value.null()) {
1590
4.32M
                write(indent_large).write_name(key).write(" ");
1591
4.32M
                if (key == "/Contents" && object.isDictionaryOfType("/Sig") &&
1592
4.32M
                    object.hasKey("/ByteRange")) {
1593
126
                    QTC::TC("qpdf", "QPDFWriter no encryption sig contents");
1594
126
                    unparseChild(value, level + 1, child_flags | f_hex_string | f_no_encryption);
1595
4.32M
                } else {
1596
4.32M
                    unparseChild(value, level + 1, child_flags);
1597
4.32M
                }
1598
4.32M
            }
1599
5.20M
        }
1600
1601
1.42M
        if (flags & f_stream) {
1602
276k
            write(indent_large).write("/Length ");
1603
1604
276k
            if (m->direct_stream_lengths) {
1605
223k
                write(stream_length);
1606
223k
            } else {
1607
52.5k
                write(m->cur_stream_length_id).write(" 0 R");
1608
52.5k
            }
1609
276k
            if (compress && (flags & f_filtered)) {
1610
112k
                write(indent_large).write("/Filter /FlateDecode");
1611
112k
            }
1612
276k
        }
1613
1614
1.42M
        write(indent).write(">>");
1615
17.5M
    } else if (tc == ::ot_stream) {
1616
        // Write stream data to a buffer.
1617
277k
        if (!m->direct_stream_lengths) {
1618
52.7k
            m->cur_stream_length_id = m->obj[old_og].renumber + 1;
1619
52.7k
        }
1620
1621
277k
        flags |= f_stream;
1622
277k
        bool compress_stream = false;
1623
277k
        bool is_metadata = false;
1624
277k
        std::string stream_data;
1625
277k
        if (willFilterStream(object, compress_stream, is_metadata, &stream_data)) {
1626
145k
            flags |= f_filtered;
1627
145k
        }
1628
277k
        QPDFObjectHandle stream_dict = object.getDict();
1629
1630
277k
        m->cur_stream_length = stream_data.size();
1631
277k
        if (is_metadata && m->encryption && !m->encryption->getEncryptMetadata()) {
1632
            // Don't encrypt stream data for the metadata stream
1633
0
            m->cur_data_key.clear();
1634
0
        }
1635
277k
        adjustAESStreamLength(m->cur_stream_length);
1636
277k
        unparseObject(stream_dict, 0, flags, m->cur_stream_length, compress_stream);
1637
277k
        char last_char = stream_data.empty() ? '\0' : stream_data.back();
1638
277k
        write("\nstream\n").write_encrypted(stream_data);
1639
277k
        m->added_newline = m->newline_before_endstream || (m->qdf_mode && last_char != '\n');
1640
277k
        write(m->added_newline ? "\nendstream" : "endstream");
1641
17.2M
    } else if (tc == ::ot_string) {
1642
252k
        std::string val;
1643
252k
        if (m->encryption && !(flags & f_in_ostream) && !(flags & f_no_encryption) &&
1644
252k
            !m->cur_data_key.empty()) {
1645
95.2k
            val = object.getStringValue();
1646
95.2k
            if (m->encrypt_use_aes) {
1647
65.7k
                Pl_Buffer bufpl("encrypted string");
1648
65.7k
                Pl_AES_PDF pl("aes encrypt string", &bufpl, true, m->cur_data_key);
1649
65.7k
                pl.writeString(val);
1650
65.7k
                pl.finish();
1651
65.7k
                val = QPDF_String(bufpl.getString()).unparse(true);
1652
65.7k
            } else {
1653
29.4k
                auto tmp_ph = QUtil::make_unique_cstr(val);
1654
29.4k
                char* tmp = tmp_ph.get();
1655
29.4k
                size_t vlen = val.length();
1656
29.4k
                RC4 rc4(
1657
29.4k
                    QUtil::unsigned_char_pointer(m->cur_data_key),
1658
29.4k
                    QIntC::to_int(m->cur_data_key.length()));
1659
29.4k
                auto data = QUtil::unsigned_char_pointer(tmp);
1660
29.4k
                rc4.process(data, vlen, data);
1661
29.4k
                val = QPDF_String(std::string(tmp, vlen)).unparse();
1662
29.4k
            }
1663
157k
        } else if (flags & f_hex_string) {
1664
122
            val = QPDF_String(object.getStringValue()).unparse(true);
1665
157k
        } else {
1666
157k
            val = object.unparseResolved();
1667
157k
        }
1668
252k
        write(val);
1669
16.9M
    } else {
1670
16.9M
        write(object.unparseResolved());
1671
16.9M
    }
1672
19.6M
}
1673
1674
void
1675
QPDFWriter::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj)
1676
80.9k
{
1677
80.9k
    qpdf_assert_debug(first_obj > 0);
1678
80.9k
    bool is_first = true;
1679
80.9k
    auto id = std::to_string(first_obj) + ' ';
1680
1.12M
    for (auto& offset: offsets) {
1681
1.12M
        if (is_first) {
1682
80.9k
            is_first = false;
1683
1.04M
        } else {
1684
1.04M
            write_qdf("\n").write_no_qdf(" ");
1685
1.04M
        }
1686
1.12M
        write(id);
1687
1.12M
        util::increment(id, 1);
1688
1.12M
        write(offset);
1689
1.12M
    }
1690
80.9k
    write("\n");
1691
80.9k
}
1692
1693
void
1694
QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1695
40.4k
{
1696
    // Note: object might be null if this is a place-holder for an object stream that we are
1697
    // generating from scratch.
1698
1699
40.4k
    QPDFObjGen old_og = object.getObjGen();
1700
40.4k
    qpdf_assert_debug(old_og.getGen() == 0);
1701
40.4k
    int old_id = old_og.getObj();
1702
40.4k
    int new_stream_id = m->obj[old_og].renumber;
1703
1704
40.4k
    std::vector<qpdf_offset_t> offsets;
1705
40.4k
    qpdf_offset_t first = 0;
1706
1707
    // Generate stream itself.  We have to do this in two passes so we can calculate offsets in the
1708
    // first pass.
1709
40.4k
    std::string stream_buffer_pass1;
1710
40.4k
    std::string stream_buffer_pass2;
1711
40.4k
    int first_obj = -1;
1712
40.4k
    const bool compressed = m->compress_streams && !m->qdf_mode;
1713
40.4k
    {
1714
        // Pass 1
1715
40.4k
        auto pp_ostream_pass1 = m->pipeline_stack.activate(stream_buffer_pass1);
1716
1717
40.4k
        int count = -1;
1718
564k
        for (auto const& obj: m->object_stream_to_objects[old_id]) {
1719
564k
            ++count;
1720
564k
            int new_obj = m->obj[obj].renumber;
1721
564k
            if (first_obj == -1) {
1722
40.4k
                first_obj = new_obj;
1723
40.4k
            }
1724
564k
            if (m->qdf_mode) {
1725
51.3k
                write("%% Object stream: object ").write(new_obj).write(", index ").write(count);
1726
51.3k
                if (!m->suppress_original_object_ids) {
1727
51.3k
                    write("; original object ID: ").write(obj.getObj());
1728
                    // For compatibility, only write the generation if non-zero.  While object
1729
                    // streams only allow objects with generation 0, if we are generating object
1730
                    // streams, the old object could have a non-zero generation.
1731
51.3k
                    if (obj.getGen() != 0) {
1732
0
                        QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
1733
0
                        write(" ").write(obj.getGen());
1734
0
                    }
1735
51.3k
                }
1736
51.3k
                write("\n");
1737
51.3k
            }
1738
1739
564k
            offsets.push_back(m->pipeline->getCount());
1740
            // To avoid double-counting objects being written in object streams for progress
1741
            // reporting, decrement in pass 1.
1742
564k
            indicateProgress(true, false);
1743
1744
564k
            QPDFObjectHandle obj_to_write = m->pdf.getObject(obj);
1745
564k
            if (obj_to_write.isStream()) {
1746
                // This condition occurred in a fuzz input. Ideally we should block it at parse
1747
                // time, but it's not clear to me how to construct a case for this.
1748
0
                obj_to_write.warnIfPossible("stream found inside object stream; treating as null");
1749
0
                obj_to_write = QPDFObjectHandle::newNull();
1750
0
            }
1751
564k
            writeObject(obj_to_write, count);
1752
1753
564k
            m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count);
1754
564k
        }
1755
40.4k
    }
1756
40.4k
    {
1757
        // Adjust offsets to skip over comment before first object
1758
40.4k
        first = offsets.at(0);
1759
564k
        for (auto& iter: offsets) {
1760
564k
            iter -= first;
1761
564k
        }
1762
1763
        // Take one pass at writing pairs of numbers so we can get their size information
1764
40.4k
        {
1765
40.4k
            auto pp_discard = m->pipeline_stack.activate(true);
1766
40.4k
            writeObjectStreamOffsets(offsets, first_obj);
1767
40.4k
            first += m->pipeline->getCount();
1768
40.4k
        }
1769
1770
        // Set up a stream to write the stream data into a buffer.
1771
40.4k
        auto pp_ostream = m->pipeline_stack.activate(stream_buffer_pass2);
1772
1773
40.4k
        writeObjectStreamOffsets(offsets, first_obj);
1774
40.4k
        write(stream_buffer_pass1);
1775
40.4k
        stream_buffer_pass1.clear();
1776
40.4k
        stream_buffer_pass1.shrink_to_fit();
1777
40.4k
        if (compressed) {
1778
38.0k
            stream_buffer_pass2 = pl::pipe<Pl_Flate>(stream_buffer_pass2, Pl_Flate::a_deflate);
1779
38.0k
        }
1780
40.4k
    }
1781
1782
    // Write the object
1783
40.4k
    openObject(new_stream_id);
1784
40.4k
    setDataKey(new_stream_id);
1785
40.4k
    write("<<").write_qdf("\n ").write(" /Type /ObjStm").write_qdf("\n ");
1786
40.4k
    size_t length = stream_buffer_pass2.size();
1787
40.4k
    adjustAESStreamLength(length);
1788
40.4k
    write(" /Length ").write(length).write_qdf("\n ");
1789
40.4k
    if (compressed) {
1790
38.0k
        write(" /Filter /FlateDecode");
1791
38.0k
    }
1792
40.4k
    write(" /N ").write(offsets.size()).write_qdf("\n ").write(" /First ").write(first);
1793
40.4k
    if (!object.isNull()) {
1794
        // If the original object has an /Extends key, preserve it.
1795
4.47k
        QPDFObjectHandle dict = object.getDict();
1796
4.47k
        QPDFObjectHandle extends = dict.getKey("/Extends");
1797
4.47k
        if (extends.isIndirect()) {
1798
1.02k
            QTC::TC("qpdf", "QPDFWriter copy Extends");
1799
1.02k
            write_qdf("\n ").write(" /Extends ");
1800
1.02k
            unparseChild(extends, 1, f_in_ostream);
1801
1.02k
        }
1802
4.47k
    }
1803
40.4k
    write_qdf("\n").write_no_qdf(" ").write(">>\nstream\n").write_encrypted(stream_buffer_pass2);
1804
40.4k
    if (m->encryption) {
1805
7.18k
        QTC::TC("qpdf", "QPDFWriter encrypt object stream");
1806
7.18k
    }
1807
40.4k
    write(m->newline_before_endstream ? "\nendstream" : "endstream");
1808
40.4k
    m->cur_data_key.clear();
1809
40.4k
    closeObject(new_stream_id);
1810
40.4k
}
1811
1812
void
1813
QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
1814
1.58M
{
1815
1.58M
    QPDFObjGen old_og = object.getObjGen();
1816
1817
1.58M
    if (object_stream_index == -1 && old_og.getGen() == 0 &&
1818
1.58M
        m->object_stream_to_objects.contains(old_og.getObj())) {
1819
40.4k
        writeObjectStream(object);
1820
40.4k
        return;
1821
40.4k
    }
1822
1823
1.54M
    indicateProgress(false, false);
1824
1.54M
    auto new_id = m->obj[old_og].renumber;
1825
1.54M
    if (m->qdf_mode) {
1826
212k
        if (m->page_object_to_seq.contains(old_og)) {
1827
21.1k
            write("%% Page ").write(m->page_object_to_seq[old_og]).write("\n");
1828
21.1k
        }
1829
212k
        if (m->contents_to_page_seq.contains(old_og)) {
1830
13.4k
            write("%% Contents for page ").write(m->contents_to_page_seq[old_og]).write("\n");
1831
13.4k
        }
1832
212k
    }
1833
1.54M
    if (object_stream_index == -1) {
1834
984k
        if (m->qdf_mode && (!m->suppress_original_object_ids)) {
1835
161k
            write("%% Original object ID: ").write(object.getObjGen().unparse(' ')).write("\n");
1836
161k
        }
1837
984k
        openObject(new_id);
1838
984k
        setDataKey(new_id);
1839
984k
        unparseObject(object, 0, 0);
1840
984k
        m->cur_data_key.clear();
1841
984k
        closeObject(new_id);
1842
984k
    } else {
1843
564k
        unparseObject(object, 0, f_in_ostream);
1844
564k
        write("\n");
1845
564k
    }
1846
1847
1.54M
    if (!m->direct_stream_lengths && object.isStream()) {
1848
52.5k
        if (m->qdf_mode) {
1849
52.5k
            if (m->added_newline) {
1850
24.8k
                write("%QDF: ignore_newline\n");
1851
24.8k
            }
1852
52.5k
        }
1853
52.5k
        openObject(new_id + 1);
1854
52.5k
        write(m->cur_stream_length);
1855
52.5k
        closeObject(new_id + 1);
1856
52.5k
    }
1857
1.54M
}
1858
1859
std::string
1860
QPDFWriter::getOriginalID1()
1861
124k
{
1862
124k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1863
124k
    if (trailer.hasKey("/ID")) {
1864
13.2k
        return trailer.getKey("/ID").getArrayItem(0).getStringValue();
1865
111k
    } else {
1866
111k
        return "";
1867
111k
    }
1868
124k
}
1869
1870
void
1871
QPDFWriter::generateID()
1872
124k
{
1873
    // Generate the ID lazily so that we can handle the user's preference to use static or
1874
    // deterministic ID generation.
1875
1876
124k
    if (!m->id2.empty()) {
1877
59.0k
        return;
1878
59.0k
    }
1879
1880
65.0k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1881
1882
65.0k
    std::string result;
1883
1884
65.0k
    if (m->static_id) {
1885
        // For test suite use only...
1886
33.4k
        static unsigned char tmp[] = {
1887
33.4k
            0x31,
1888
33.4k
            0x41,
1889
33.4k
            0x59,
1890
33.4k
            0x26,
1891
33.4k
            0x53,
1892
33.4k
            0x58,
1893
33.4k
            0x97,
1894
33.4k
            0x93,
1895
33.4k
            0x23,
1896
33.4k
            0x84,
1897
33.4k
            0x62,
1898
33.4k
            0x64,
1899
33.4k
            0x33,
1900
33.4k
            0x83,
1901
33.4k
            0x27,
1902
33.4k
            0x95,
1903
33.4k
            0x00};
1904
33.4k
        result = reinterpret_cast<char*>(tmp);
1905
33.4k
    } else {
1906
        // The PDF specification has guidelines for creating IDs, but it states clearly that the
1907
        // only thing that's really important is that it is very likely to be unique.  We can't
1908
        // really follow the guidelines in the spec exactly because we haven't written the file yet.
1909
        // This scheme should be fine though.  The deterministic ID case uses a digest of a
1910
        // sufficient portion of the file's contents such no two non-matching files would match in
1911
        // the subsets used for this computation.  Note that we explicitly omit the filename from
1912
        // the digest calculation for deterministic ID so that the same file converted with qpdf, in
1913
        // that case, would have the same ID regardless of the output file's name.
1914
1915
31.6k
        std::string seed;
1916
31.6k
        if (m->deterministic_id) {
1917
31.6k
            if (m->deterministic_id_data.empty()) {
1918
109
                QTC::TC("qpdf", "QPDFWriter deterministic with no data");
1919
109
                throw std::runtime_error(
1920
109
                    "INTERNAL ERROR: QPDFWriter::generateID has no data for "
1921
109
                    "deterministic ID.  This may happen if deterministic ID "
1922
109
                    "and file encryption are requested together.");
1923
109
            }
1924
31.5k
            seed += m->deterministic_id_data;
1925
31.5k
        } else {
1926
0
            seed += std::to_string(QUtil::get_current_time());
1927
0
            seed += m->filename;
1928
0
            seed += " ";
1929
0
        }
1930
31.5k
        seed += " QPDF ";
1931
31.5k
        if (trailer.hasKey("/Info")) {
1932
12.4k
            for (auto const& item: trailer.getKey("/Info").as_dictionary()) {
1933
12.4k
                if (item.second.isString()) {
1934
3.57k
                    seed += " ";
1935
3.57k
                    seed += item.second.getStringValue();
1936
3.57k
                }
1937
12.4k
            }
1938
1.05k
        }
1939
1940
31.5k
        MD5 m;
1941
31.5k
        m.encodeString(seed.c_str());
1942
31.5k
        MD5::Digest digest;
1943
31.5k
        m.digest(digest);
1944
31.5k
        result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest));
1945
31.5k
    }
1946
1947
    // If /ID already exists, follow the spec: use the original first word and generate a new second
1948
    // word.  Otherwise, we'll use the generated ID for both.
1949
1950
64.9k
    m->id2 = result;
1951
    // Note: keep /ID from old file even if --static-id was given.
1952
64.9k
    m->id1 = getOriginalID1();
1953
64.9k
    if (m->id1.empty()) {
1954
58.3k
        m->id1 = m->id2;
1955
58.3k
    }
1956
64.9k
}
1957
1958
void
1959
QPDFWriter::initializeSpecialStreams()
1960
70.2k
{
1961
    // Mark all page content streams in case we are filtering or normalizing.
1962
70.2k
    std::vector<QPDFObjectHandle> pages = m->pdf.getAllPages();
1963
70.2k
    int num = 0;
1964
104k
    for (auto& page: pages) {
1965
104k
        m->page_object_to_seq[page.getObjGen()] = ++num;
1966
104k
        QPDFObjectHandle contents = page.getKey("/Contents");
1967
104k
        std::vector<QPDFObjGen> contents_objects;
1968
104k
        if (contents.isArray()) {
1969
4.71k
            int n = contents.getArrayNItems();
1970
130k
            for (int i = 0; i < n; ++i) {
1971
125k
                contents_objects.push_back(contents.getArrayItem(i).getObjGen());
1972
125k
            }
1973
100k
        } else if (contents.isStream()) {
1974
20.5k
            contents_objects.push_back(contents.getObjGen());
1975
20.5k
        }
1976
1977
146k
        for (auto const& c: contents_objects) {
1978
146k
            m->contents_to_page_seq[c] = num;
1979
146k
            m->normalized_streams.insert(c);
1980
146k
        }
1981
104k
    }
1982
70.2k
}
1983
1984
void
1985
QPDFWriter::preserveObjectStreams()
1986
34.9k
{
1987
34.9k
    auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
1988
    // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
1989
    // streams out of old objects that have generation numbers greater than zero. However in an
1990
    // existing PDF, all object stream objects and all objects in them must have generation 0
1991
    // because the PDF spec does not provide any way to do otherwise. This code filters out objects
1992
    // that are not allowed to be in object streams. In addition to removing objects that were
1993
    // erroneously included in object streams in the source PDF, it also prevents unreferenced
1994
    // objects from being included.
1995
34.9k
    auto end = xref.cend();
1996
34.9k
    m->obj.streams_empty = true;
1997
34.9k
    if (m->preserve_unreferenced_objects) {
1998
0
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
1999
0
            if (iter->second.getType() == 2) {
2000
                // Pdf contains object streams.
2001
0
                QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
2002
0
                m->obj.streams_empty = false;
2003
0
                m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
2004
0
            }
2005
0
        }
2006
34.9k
    } else {
2007
        // Start by scanning for first compressed object in case we don't have any object streams to
2008
        // process.
2009
371k
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
2010
340k
            if (iter->second.getType() == 2) {
2011
                // Pdf contains object streams.
2012
3.85k
                QTC::TC("qpdf", "QPDFWriter preserve object streams");
2013
3.85k
                m->obj.streams_empty = false;
2014
3.85k
                auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
2015
                // The object pointed to by iter may be a previous generation, in which case it is
2016
                // removed by getCompressibleObjSet. We need to restart the loop (while the object
2017
                // table may contain multiple generations of an object).
2018
1.03M
                for (iter = xref.cbegin(); iter != end; ++iter) {
2019
1.03M
                    if (iter->second.getType() == 2) {
2020
950k
                        auto id = static_cast<size_t>(iter->first.getObj());
2021
950k
                        if (id < eligible.size() && eligible[id]) {
2022
146k
                            m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
2023
804k
                        } else {
2024
804k
                            QTC::TC("qpdf", "QPDFWriter exclude from object stream");
2025
804k
                        }
2026
950k
                    }
2027
1.03M
                }
2028
3.85k
                return;
2029
3.85k
            }
2030
340k
        }
2031
34.9k
    }
2032
34.9k
}
2033
2034
void
2035
QPDFWriter::generateObjectStreams()
2036
19.0k
{
2037
    // Basic strategy: make a list of objects that can go into an object stream.  Then figure out
2038
    // how many object streams are needed so that we can distribute objects approximately evenly
2039
    // without having any object stream exceed 100 members.  We don't have to worry about linearized
2040
    // files here -- if the file is linearized, we take care of excluding things that aren't allowed
2041
    // here later.
2042
2043
    // This code doesn't do anything with /Extends.
2044
2045
19.0k
    std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf);
2046
19.0k
    size_t n_object_streams = (eligible.size() + 99U) / 100U;
2047
2048
19.0k
    initializeTables(2U * n_object_streams);
2049
19.0k
    if (n_object_streams == 0) {
2050
67
        m->obj.streams_empty = true;
2051
67
        return;
2052
67
    }
2053
19.0k
    size_t n_per = eligible.size() / n_object_streams;
2054
19.0k
    if (n_per * n_object_streams < eligible.size()) {
2055
283
        ++n_per;
2056
283
    }
2057
19.0k
    unsigned int n = 0;
2058
19.0k
    int cur_ostream = m->pdf.newIndirectNull().getObjectID();
2059
259k
    for (auto const& item: eligible) {
2060
259k
        if (n == n_per) {
2061
1.36k
            QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
2062
1.36k
            n = 0;
2063
            // Construct a new null object as the "original" object stream.  The rest of the code
2064
            // knows that this means we're creating the object stream from scratch.
2065
1.36k
            cur_ostream = m->pdf.newIndirectNull().getObjectID();
2066
1.36k
        }
2067
259k
        auto& obj = m->obj[item];
2068
259k
        obj.object_stream = cur_ostream;
2069
259k
        obj.gen = item.getGen();
2070
259k
        ++n;
2071
259k
    }
2072
19.0k
}
2073
2074
QPDFObjectHandle
2075
QPDFWriter::getTrimmedTrailer()
2076
183k
{
2077
    // Remove keys from the trailer that necessarily have to be replaced when writing the file.
2078
2079
183k
    QPDFObjectHandle trailer = m->pdf.getTrailer().unsafeShallowCopy();
2080
2081
    // Remove encryption keys
2082
183k
    trailer.removeKey("/ID");
2083
183k
    trailer.removeKey("/Encrypt");
2084
2085
    // Remove modification information
2086
183k
    trailer.removeKey("/Prev");
2087
2088
    // Remove all trailer keys that potentially come from a cross-reference stream
2089
183k
    trailer.removeKey("/Index");
2090
183k
    trailer.removeKey("/W");
2091
183k
    trailer.removeKey("/Length");
2092
183k
    trailer.removeKey("/Filter");
2093
183k
    trailer.removeKey("/DecodeParms");
2094
183k
    trailer.removeKey("/Type");
2095
183k
    trailer.removeKey("/XRefStm");
2096
2097
183k
    return trailer;
2098
183k
}
2099
2100
// Make document extension level information direct as required by the spec.
2101
void
2102
QPDFWriter::prepareFileForWrite()
2103
69.4k
{
2104
69.4k
    m->pdf.fixDanglingReferences();
2105
69.4k
    auto root = m->pdf.getRoot();
2106
69.4k
    auto oh = root.getKey("/Extensions");
2107
69.4k
    if (oh.isDictionary()) {
2108
3.48k
        const bool extensions_indirect = oh.isIndirect();
2109
3.48k
        if (extensions_indirect) {
2110
1.08k
            QTC::TC("qpdf", "QPDFWriter make Extensions direct");
2111
1.08k
            oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy());
2112
1.08k
        }
2113
3.48k
        if (oh.hasKey("/ADBE")) {
2114
2.36k
            auto adbe = oh.getKey("/ADBE");
2115
2.36k
            if (adbe.isIndirect()) {
2116
1.29k
                QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1);
2117
1.29k
                adbe.makeDirect();
2118
1.29k
                oh.replaceKey("/ADBE", adbe);
2119
1.29k
            }
2120
2.36k
        }
2121
3.48k
    }
2122
69.4k
}
2123
2124
void
2125
QPDFWriter::initializeTables(size_t extra)
2126
70.0k
{
2127
70.0k
    auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra;
2128
70.0k
    m->obj.resize(size);
2129
70.0k
    m->new_obj.resize(size);
2130
70.0k
}
2131
2132
void
2133
QPDFWriter::doWriteSetup()
2134
70.3k
{
2135
70.3k
    if (m->did_write_setup) {
2136
0
        return;
2137
0
    }
2138
70.3k
    m->did_write_setup = true;
2139
2140
    // Do preliminary setup
2141
2142
70.3k
    if (m->linearized) {
2143
36.6k
        m->qdf_mode = false;
2144
36.6k
    }
2145
2146
70.3k
    if (m->pclm) {
2147
0
        m->stream_decode_level = qpdf_dl_none;
2148
0
        m->compress_streams = false;
2149
0
        m->encryption = nullptr;
2150
0
    }
2151
2152
70.3k
    if (m->qdf_mode) {
2153
17.6k
        if (!m->normalize_content_set) {
2154
17.6k
            m->normalize_content = true;
2155
17.6k
        }
2156
17.6k
        if (!m->compress_streams_set) {
2157
17.6k
            m->compress_streams = false;
2158
17.6k
        }
2159
17.6k
        if (!m->stream_decode_level_set) {
2160
0
            m->stream_decode_level = qpdf_dl_generalized;
2161
0
        }
2162
17.6k
    }
2163
2164
70.3k
    if (m->encryption) {
2165
        // Encryption has been explicitly set
2166
33.4k
        m->preserve_encryption = false;
2167
36.9k
    } else if (m->normalize_content || !m->compress_streams || m->pclm || m->qdf_mode) {
2168
        // Encryption makes looking at contents pretty useless.  If the user explicitly encrypted
2169
        // though, we still obey that.
2170
17.6k
        m->preserve_encryption = false;
2171
17.6k
    }
2172
2173
70.3k
    if (m->preserve_encryption) {
2174
19.2k
        copyEncryptionParameters(m->pdf);
2175
19.2k
    }
2176
2177
70.3k
    if (!m->forced_pdf_version.empty()) {
2178
0
        int major = 0;
2179
0
        int minor = 0;
2180
0
        parseVersion(m->forced_pdf_version, major, minor);
2181
0
        disableIncompatibleEncryption(major, minor, m->forced_extension_level);
2182
0
        if (compareVersions(major, minor, 1, 5) < 0) {
2183
0
            QTC::TC("qpdf", "QPDFWriter forcing object stream disable");
2184
0
            m->object_stream_mode = qpdf_o_disable;
2185
0
        }
2186
0
    }
2187
2188
70.3k
    if (m->qdf_mode || m->normalize_content || m->stream_decode_level) {
2189
70.2k
        initializeSpecialStreams();
2190
70.2k
    }
2191
2192
70.3k
    if (m->qdf_mode) {
2193
        // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing
2194
        // recomputed stream length data. Certain streams such as object streams, xref streams, and
2195
        // hint streams always get direct stream lengths.
2196
17.6k
        m->direct_stream_lengths = false;
2197
17.6k
    }
2198
2199
70.3k
    switch (m->object_stream_mode) {
2200
16.0k
    case qpdf_o_disable:
2201
16.0k
        initializeTables();
2202
16.0k
        m->obj.streams_empty = true;
2203
16.0k
        break;
2204
2205
34.9k
    case qpdf_o_preserve:
2206
34.9k
        initializeTables();
2207
34.9k
        preserveObjectStreams();
2208
34.9k
        break;
2209
2210
19.0k
    case qpdf_o_generate:
2211
19.0k
        generateObjectStreams();
2212
19.0k
        break;
2213
2214
        // no default so gcc will warn for missing case tag
2215
70.3k
    }
2216
2217
69.9k
    if (!m->obj.streams_empty) {
2218
22.7k
        if (m->linearized) {
2219
            // Page dictionaries are not allowed to be compressed objects.
2220
39.5k
            for (auto& page: m->pdf.getAllPages()) {
2221
39.5k
                if (m->obj[page].object_stream > 0) {
2222
33.6k
                    QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
2223
33.6k
                    m->obj[page].object_stream = 0;
2224
33.6k
                }
2225
39.5k
            }
2226
21.4k
        }
2227
2228
22.7k
        if (m->linearized || m->encryption) {
2229
            // The document catalog is not allowed to be compressed in linearized files either.  It
2230
            // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
2231
            // handle encrypted files with compressed document catalogs, so we disable them in that
2232
            // case as well.
2233
21.4k
            if (m->obj[m->root_og].object_stream > 0) {
2234
17.0k
                QTC::TC("qpdf", "QPDFWriter uncompressing root");
2235
17.0k
                m->obj[m->root_og].object_stream = 0;
2236
17.0k
            }
2237
21.4k
        }
2238
2239
        // Generate reverse mapping from object stream to objects
2240
14.0M
        m->obj.forEach([this](auto id, auto const& item) -> void {
2241
14.0M
            if (item.object_stream > 0) {
2242
354k
                auto& vec = m->object_stream_to_objects[item.object_stream];
2243
354k
                vec.emplace_back(id, item.gen);
2244
354k
                if (m->max_ostream_index < vec.size()) {
2245
148k
                    ++m->max_ostream_index;
2246
148k
                }
2247
354k
            }
2248
14.0M
        });
2249
22.7k
        --m->max_ostream_index;
2250
2251
22.7k
        if (m->object_stream_to_objects.empty()) {
2252
2.48k
            m->obj.streams_empty = true;
2253
20.2k
        } else {
2254
20.2k
            setMinimumPDFVersion("1.5");
2255
20.2k
        }
2256
22.7k
    }
2257
2258
69.9k
    setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel());
2259
69.9k
    m->final_pdf_version = m->min_pdf_version;
2260
69.9k
    m->final_extension_level = m->min_extension_level;
2261
69.9k
    if (!m->forced_pdf_version.empty()) {
2262
0
        QTC::TC("qpdf", "QPDFWriter using forced PDF version");
2263
0
        m->final_pdf_version = m->forced_pdf_version;
2264
0
        m->final_extension_level = m->forced_extension_level;
2265
0
    }
2266
69.9k
}
2267
2268
void
2269
QPDFWriter::write()
2270
70.3k
{
2271
70.3k
    doWriteSetup();
2272
2273
    // Set up progress reporting. For linearized files, we write two passes. events_expected is an
2274
    // approximation, but it's good enough for progress reporting, which is mostly a guess anyway.
2275
70.3k
    m->events_expected = QIntC::to_int(m->pdf.getObjectCount() * (m->linearized ? 2 : 1));
2276
2277
70.3k
    prepareFileForWrite();
2278
2279
70.3k
    if (m->linearized) {
2280
35.9k
        writeLinearized();
2281
35.9k
    } else {
2282
34.4k
        writeStandard();
2283
34.4k
    }
2284
2285
70.3k
    m->pipeline->finish();
2286
70.3k
    if (m->close_file) {
2287
0
        fclose(m->file);
2288
0
    }
2289
70.3k
    m->file = nullptr;
2290
70.3k
    if (m->buffer_pipeline) {
2291
0
        m->output_buffer = m->buffer_pipeline->getBuffer();
2292
0
        m->buffer_pipeline = nullptr;
2293
0
    }
2294
70.3k
    indicateProgress(false, true);
2295
70.3k
}
2296
2297
QPDFObjGen
2298
QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
2299
0
{
2300
0
    return {m->obj[og].renumber, 0};
2301
0
}
2302
2303
std::map<QPDFObjGen, QPDFXRefEntry>
2304
QPDFWriter::getWrittenXRefTable()
2305
0
{
2306
0
    std::map<QPDFObjGen, QPDFXRefEntry> result;
2307
2308
0
    auto it = result.begin();
2309
0
    m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void {
2310
0
        if (item.xref.getType() != 0) {
2311
0
            it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref);
2312
0
        }
2313
0
    });
2314
0
    return result;
2315
0
}
2316
2317
void
2318
QPDFWriter::enqueuePart(std::vector<QPDFObjectHandle>& part)
2319
159k
{
2320
423k
    for (auto const& oh: part) {
2321
423k
        enqueueObject(oh);
2322
423k
    }
2323
159k
}
2324
2325
void
2326
QPDFWriter::writeEncryptionDictionary()
2327
45.5k
{
2328
45.5k
    m->encryption_dict_objid = openObject(m->encryption_dict_objid);
2329
45.5k
    auto& enc = *m->encryption;
2330
45.5k
    auto const V = enc.getV();
2331
2332
45.5k
    write("<<");
2333
45.5k
    if (V >= 4) {
2334
29.9k
        write(" /CF << /StdCF << /AuthEvent /DocOpen /CFM ");
2335
29.9k
        write(m->encrypt_use_aes ? ((V < 5) ? "/AESV2" : "/AESV3") : "/V2");
2336
        // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of
2337
        // MacOS won't open encrypted files without it.
2338
29.9k
        write((V < 5) ? " /Length 16 >> >>" : " /Length 32 >> >>");
2339
29.9k
        if (!m->encryption->getEncryptMetadata()) {
2340
0
            write(" /EncryptMetadata false");
2341
0
        }
2342
29.9k
    }
2343
45.5k
    write(" /Filter /Standard /Length ").write(enc.getLengthBytes() * 8);
2344
45.5k
    write(" /O ").write_string(enc.getO(), true);
2345
45.5k
    if (V >= 4) {
2346
29.9k
        write(" /OE ").write_string(enc.getOE(), true);
2347
29.9k
    }
2348
45.5k
    write(" /P ").write(enc.getP());
2349
45.5k
    if (V >= 5) {
2350
29.9k
        write(" /Perms ").write_string(enc.getPerms(), true);
2351
29.9k
    }
2352
45.5k
    write(" /R ").write(enc.getR());
2353
2354
45.5k
    if (V >= 4) {
2355
29.9k
        write(" /StmF /StdCF /StrF /StdCF");
2356
29.9k
    }
2357
45.5k
    write(" /U ").write_string(enc.getU(), true);
2358
45.5k
    if (V >= 4) {
2359
29.9k
        write(" /UE ").write_string(enc.getUE(), true);
2360
29.9k
    }
2361
45.5k
    write(" /V ").write(enc.getV()).write(" >>");
2362
45.5k
    closeObject(m->encryption_dict_objid);
2363
45.5k
}
2364
2365
std::string
2366
QPDFWriter::getFinalVersion()
2367
0
{
2368
0
    doWriteSetup();
2369
0
    return m->final_pdf_version;
2370
0
}
2371
2372
void
2373
QPDFWriter::writeHeader()
2374
92.9k
{
2375
92.9k
    write("%PDF-").write(m->final_pdf_version);
2376
92.9k
    if (m->pclm) {
2377
        // PCLm version
2378
0
        write("\n%PCLm 1.0\n");
2379
92.9k
    } else {
2380
        // This string of binary characters would not be valid UTF-8, so it really should be treated
2381
        // as binary.
2382
92.9k
        write("\n%\xbf\xf7\xa2\xfe\n");
2383
92.9k
    }
2384
92.9k
    write_qdf("%QDF-1.0\n\n");
2385
2386
    // Note: do not write extra header text here.  Linearized PDFs must include the entire
2387
    // linearization parameter dictionary within the first 1024 characters of the PDF file, so for
2388
    // linearized files, we have to write extra header text after the linearization parameter
2389
    // dictionary.
2390
92.9k
}
2391
2392
void
2393
QPDFWriter::writeHintStream(int hint_id)
2394
28.9k
{
2395
28.9k
    std::string hint_buffer;
2396
28.9k
    int S = 0;
2397
28.9k
    int O = 0;
2398
28.9k
    bool compressed = m->compress_streams && !m->qdf_mode;
2399
28.9k
    QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed);
2400
2401
28.9k
    openObject(hint_id);
2402
28.9k
    setDataKey(hint_id);
2403
2404
28.9k
    size_t hlen = hint_buffer.size();
2405
2406
28.9k
    write("<< ");
2407
28.9k
    if (compressed) {
2408
28.9k
        write("/Filter /FlateDecode ");
2409
28.9k
    }
2410
28.9k
    write("/S ").write(S);
2411
28.9k
    if (O) {
2412
980
        write(" /O ").write(O);
2413
980
    }
2414
28.9k
    adjustAESStreamLength(hlen);
2415
28.9k
    write(" /Length ").write(hlen);
2416
28.9k
    write(" >>\nstream\n").write_encrypted(hint_buffer);
2417
2418
28.9k
    if (m->encryption) {
2419
14.6k
        QTC::TC("qpdf", "QPDFWriter encrypted hint stream");
2420
14.6k
    }
2421
2422
28.9k
    write(hint_buffer.empty() || hint_buffer.back() != '\n' ? "\nendstream" : "endstream");
2423
28.9k
    closeObject(hint_id);
2424
28.9k
}
2425
2426
qpdf_offset_t
2427
QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size)
2428
32.1k
{
2429
    // There are too many extra arguments to replace overloaded function with defaults in the header
2430
    // file...too much risk of leaving something off.
2431
32.1k
    return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0);
2432
32.1k
}
2433
2434
qpdf_offset_t
2435
QPDFWriter::writeXRefTable(
2436
    trailer_e which,
2437
    int first,
2438
    int last,
2439
    int size,
2440
    qpdf_offset_t prev,
2441
    bool suppress_offsets,
2442
    int hint_id,
2443
    qpdf_offset_t hint_offset,
2444
    qpdf_offset_t hint_length,
2445
    int linearization_pass)
2446
88.3k
{
2447
88.3k
    write("xref\n").write(first).write(" ").write(last - first + 1);
2448
88.3k
    qpdf_offset_t space_before_zero = m->pipeline->getCount();
2449
88.3k
    write("\n");
2450
88.3k
    if (first == 0) {
2451
59.9k
        write("0000000000 65535 f \n");
2452
59.9k
        ++first;
2453
59.9k
    }
2454
854k
    for (int i = first; i <= last; ++i) {
2455
766k
        qpdf_offset_t offset = 0;
2456
766k
        if (!suppress_offsets) {
2457
607k
            offset = m->new_obj[i].xref.getOffset();
2458
607k
            if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2459
83.0k
                offset += hint_length;
2460
83.0k
            }
2461
607k
        }
2462
766k
        write(QUtil::int_to_string(offset, 10)).write(" 00000 n \n");
2463
766k
    }
2464
88.3k
    writeTrailer(which, size, false, prev, linearization_pass);
2465
88.3k
    write("\n");
2466
88.3k
    return space_before_zero;
2467
88.3k
}
2468
2469
qpdf_offset_t
2470
QPDFWriter::writeXRefStream(
2471
    int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size)
2472
593
{
2473
    // There are too many extra arguments to replace overloaded function with defaults in the header
2474
    // file...too much risk of leaving something off.
2475
593
    return writeXRefStream(
2476
593
        objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0);
2477
593
}
2478
2479
qpdf_offset_t
2480
QPDFWriter::writeXRefStream(
2481
    int xref_id,
2482
    int max_id,
2483
    qpdf_offset_t max_offset,
2484
    trailer_e which,
2485
    int first,
2486
    int last,
2487
    int size,
2488
    qpdf_offset_t prev,
2489
    int hint_id,
2490
    qpdf_offset_t hint_offset,
2491
    qpdf_offset_t hint_length,
2492
    bool skip_compression,
2493
    int linearization_pass)
2494
61.8k
{
2495
61.8k
    qpdf_offset_t xref_offset = m->pipeline->getCount();
2496
61.8k
    qpdf_offset_t space_before_zero = xref_offset - 1;
2497
2498
    // field 1 contains offsets and object stream identifiers
2499
61.8k
    unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id));
2500
2501
    // field 2 contains object stream indices
2502
61.8k
    unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index));
2503
2504
61.8k
    unsigned int esize = 1 + f1_size + f2_size;
2505
2506
    // Must store in xref table in advance of writing the actual data rather than waiting for
2507
    // openObject to do it.
2508
61.8k
    m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2509
2510
61.8k
    std::string xref_data;
2511
61.8k
    const bool compressed = m->compress_streams && !m->qdf_mode;
2512
61.8k
    {
2513
61.8k
        auto pp_xref = m->pipeline_stack.activate(xref_data);
2514
2515
1.19M
        for (int i = first; i <= last; ++i) {
2516
1.13M
            QPDFXRefEntry& e = m->new_obj[i].xref;
2517
1.13M
            switch (e.getType()) {
2518
266k
            case 0:
2519
266k
                writeBinary(0, 1);
2520
266k
                writeBinary(0, f1_size);
2521
266k
                writeBinary(0, f2_size);
2522
266k
                break;
2523
2524
403k
            case 1:
2525
403k
                {
2526
403k
                    qpdf_offset_t offset = e.getOffset();
2527
403k
                    if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2528
101k
                        offset += hint_length;
2529
101k
                    }
2530
403k
                    writeBinary(1, 1);
2531
403k
                    writeBinary(QIntC::to_ulonglong(offset), f1_size);
2532
403k
                    writeBinary(0, f2_size);
2533
403k
                }
2534
403k
                break;
2535
2536
461k
            case 2:
2537
461k
                writeBinary(2, 1);
2538
461k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size);
2539
461k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size);
2540
461k
                break;
2541
2542
0
            default:
2543
0
                throw std::logic_error("invalid type writing xref stream");
2544
0
                break;
2545
1.13M
            }
2546
1.13M
        }
2547
61.8k
    }
2548
2549
61.8k
    if (compressed) {
2550
61.2k
        xref_data = pl::pipe<Pl_PNGFilter>(xref_data, Pl_PNGFilter::a_encode, esize);
2551
61.2k
        if (!skip_compression) {
2552
            // Write the stream dictionary for compression but don't actually compress.  This
2553
            // helps us with computation of padding for pass 1 of linearization.
2554
29.9k
            xref_data = pl::pipe<Pl_Flate>(xref_data, Pl_Flate::a_deflate);
2555
29.9k
        }
2556
61.2k
    }
2557
2558
61.8k
    openObject(xref_id);
2559
61.8k
    write("<<").write_qdf("\n ").write(" /Type /XRef").write_qdf("\n ");
2560
61.8k
    write(" /Length ").write(xref_data.size());
2561
61.8k
    if (compressed) {
2562
61.2k
        write_qdf("\n ").write(" /Filter /FlateDecode").write_qdf("\n ");
2563
61.2k
        write(" /DecodeParms << /Columns ").write(esize).write(" /Predictor 12 >>");
2564
61.2k
    }
2565
61.8k
    write_qdf("\n ").write(" /W [ 1 ").write(f1_size).write(" ").write(f2_size).write(" ]");
2566
61.8k
    if (!(first == 0 && last == (size - 1))) {
2567
31.2k
        write(" /Index [ ").write(first).write(" ").write(last - first + 1).write(" ]");
2568
31.2k
    }
2569
61.8k
    writeTrailer(which, size, true, prev, linearization_pass);
2570
61.8k
    write("\nstream\n").write(xref_data).write("\nendstream");
2571
61.8k
    closeObject(xref_id);
2572
61.8k
    return space_before_zero;
2573
61.8k
}
2574
2575
size_t
2576
QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
2577
30.9k
{
2578
    // This routine is called right after a linearization first pass xref stream has been written
2579
    // without compression.  Calculate the amount of padding that would be required in the worst
2580
    // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is
2581
    // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add
2582
    // 10 extra bytes for number length increases.
2583
2584
30.9k
    return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384)));
2585
30.9k
}
2586
2587
void
2588
QPDFWriter::writeLinearized()
2589
35.9k
{
2590
    // Optimize file and enqueue objects in order
2591
2592
35.9k
    std::map<int, int> stream_cache;
2593
2594
216k
    auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) {
2595
216k
        auto& result = stream_cache[stream.getObjectID()];
2596
216k
        if (result == 0) {
2597
100k
            bool compress_stream;
2598
100k
            bool is_metadata;
2599
100k
            if (willFilterStream(stream, compress_stream, is_metadata, nullptr)) {
2600
51.8k
                result = 2;
2601
51.8k
            } else {
2602
48.6k
                result = 1;
2603
48.6k
            }
2604
100k
        }
2605
216k
        return result;
2606
216k
    };
2607
2608
35.9k
    QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters);
2609
2610
35.9k
    std::vector<QPDFObjectHandle> part4;
2611
35.9k
    std::vector<QPDFObjectHandle> part6;
2612
35.9k
    std::vector<QPDFObjectHandle> part7;
2613
35.9k
    std::vector<QPDFObjectHandle> part8;
2614
35.9k
    std::vector<QPDFObjectHandle> part9;
2615
35.9k
    QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9);
2616
2617
    // Object number sequence:
2618
    //
2619
    //  second half
2620
    //    second half uncompressed objects
2621
    //    second half xref stream, if any
2622
    //    second half compressed objects
2623
    //  first half
2624
    //    linearization dictionary
2625
    //    first half xref stream, if any
2626
    //    part 4 uncompresesd objects
2627
    //    encryption dictionary, if any
2628
    //    hint stream
2629
    //    part 6 uncompressed objects
2630
    //    first half compressed objects
2631
    //
2632
2633
    // Second half objects
2634
35.9k
    int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size());
2635
35.9k
    int second_half_first_obj = 1;
2636
35.9k
    int after_second_half = 1 + second_half_uncompressed;
2637
35.9k
    m->next_objid = after_second_half;
2638
35.9k
    int second_half_xref = 0;
2639
35.9k
    bool need_xref_stream = !m->obj.streams_empty;
2640
35.9k
    if (need_xref_stream) {
2641
16.7k
        second_half_xref = m->next_objid++;
2642
16.7k
    }
2643
    // Assign numbers to all compressed objects in the second half.
2644
35.9k
    std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9};
2645
132k
    for (int i = 0; i < 3; ++i) {
2646
172k
        for (auto const& oh: *vecs2[i]) {
2647
172k
            assignCompressedObjectNumbers(oh.getObjGen());
2648
172k
        }
2649
96.7k
    }
2650
35.9k
    int second_half_end = m->next_objid - 1;
2651
35.9k
    int second_trailer_size = m->next_objid;
2652
2653
    // First half objects
2654
35.9k
    int first_half_start = m->next_objid;
2655
35.9k
    int lindict_id = m->next_objid++;
2656
35.9k
    int first_half_xref = 0;
2657
35.9k
    if (need_xref_stream) {
2658
16.7k
        first_half_xref = m->next_objid++;
2659
16.7k
    }
2660
35.9k
    int part4_first_obj = m->next_objid;
2661
35.9k
    m->next_objid += QIntC::to_int(part4.size());
2662
35.9k
    int after_part4 = m->next_objid;
2663
35.9k
    if (m->encryption) {
2664
16.1k
        m->encryption_dict_objid = m->next_objid++;
2665
16.1k
    }
2666
35.9k
    int hint_id = m->next_objid++;
2667
35.9k
    int part6_first_obj = m->next_objid;
2668
35.9k
    m->next_objid += QIntC::to_int(part6.size());
2669
35.9k
    int after_part6 = m->next_objid;
2670
    // Assign numbers to all compressed objects in the first half
2671
35.9k
    std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6};
2672
100k
    for (int i = 0; i < 2; ++i) {
2673
251k
        for (auto const& oh: *vecs1[i]) {
2674
251k
            assignCompressedObjectNumbers(oh.getObjGen());
2675
251k
        }
2676
64.5k
    }
2677
35.9k
    int first_half_end = m->next_objid - 1;
2678
35.9k
    int first_trailer_size = m->next_objid;
2679
2680
35.9k
    int part4_end_marker = part4.back().getObjectID();
2681
35.9k
    int part6_end_marker = part6.back().getObjectID();
2682
35.9k
    qpdf_offset_t space_before_zero = 0;
2683
35.9k
    qpdf_offset_t file_size = 0;
2684
35.9k
    qpdf_offset_t part6_end_offset = 0;
2685
35.9k
    qpdf_offset_t first_half_max_obj_offset = 0;
2686
35.9k
    qpdf_offset_t second_xref_offset = 0;
2687
35.9k
    qpdf_offset_t first_xref_end = 0;
2688
35.9k
    qpdf_offset_t second_xref_end = 0;
2689
2690
35.9k
    m->next_objid = part4_first_obj;
2691
35.9k
    enqueuePart(part4);
2692
35.9k
    if (m->next_objid != after_part4) {
2693
        // This can happen with very botched files as in the fuzzer test. There are likely some
2694
        // faulty assumptions in calculateLinearizationData
2695
26
        throw std::runtime_error("error encountered after writing part 4 of linearized data");
2696
26
    }
2697
35.8k
    m->next_objid = part6_first_obj;
2698
35.8k
    enqueuePart(part6);
2699
35.8k
    if (m->next_objid != after_part6) {
2700
540
        throw std::runtime_error("error encountered after writing part 6 of linearized data");
2701
540
    }
2702
35.3k
    m->next_objid = second_half_first_obj;
2703
35.3k
    enqueuePart(part7);
2704
35.3k
    enqueuePart(part8);
2705
35.3k
    enqueuePart(part9);
2706
35.3k
    if (m->next_objid != after_second_half) {
2707
960
        throw std::runtime_error("error encountered after writing part 9 of linearized data");
2708
960
    }
2709
2710
34.3k
    qpdf_offset_t hint_length = 0;
2711
34.3k
    std::string hint_buffer;
2712
2713
    // Write file in two passes.  Part numbers refer to PDF spec 1.4.
2714
2715
34.3k
    FILE* lin_pass1_file = nullptr;
2716
34.3k
    auto pp_pass1 = m->pipeline_stack.popper();
2717
34.3k
    auto pp_md5 = m->pipeline_stack.popper();
2718
59.6k
    for (int pass: {1, 2}) {
2719
59.6k
        if (pass == 1) {
2720
30.6k
            if (!m->lin_pass1_filename.empty()) {
2721
0
                lin_pass1_file = QUtil::safe_fopen(m->lin_pass1_filename.c_str(), "wb");
2722
0
                m->pipeline_stack.activate(
2723
0
                    pp_pass1,
2724
0
                    std::make_unique<Pl_StdioFile>("linearization pass1", lin_pass1_file));
2725
30.6k
            } else {
2726
30.6k
                m->pipeline_stack.activate(pp_pass1, true);
2727
30.6k
            }
2728
30.6k
            if (m->deterministic_id) {
2729
15.2k
                m->pipeline_stack.activate_md5(pp_md5);
2730
15.2k
            }
2731
30.6k
        }
2732
2733
        // Part 1: header
2734
2735
59.6k
        writeHeader();
2736
2737
        // Part 2: linearization parameter dictionary.  Save enough space to write real dictionary.
2738
        // 200 characters is enough space if all numerical values in the parameter dictionary that
2739
        // contain offsets are 20 digits long plus a few extra characters for safety.  The entire
2740
        // linearization parameter dictionary must appear within the first 1024 characters of the
2741
        // file.
2742
2743
59.6k
        qpdf_offset_t pos = m->pipeline->getCount();
2744
59.6k
        openObject(lindict_id);
2745
59.6k
        write("<<");
2746
59.6k
        if (pass == 2) {
2747
28.9k
            std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages();
2748
28.9k
            int first_page_object = m->obj[pages.at(0)].renumber;
2749
2750
28.9k
            write(" /Linearized 1 /L ").write(file_size + hint_length);
2751
            // Implementation note 121 states that a space is mandatory after this open bracket.
2752
28.9k
            write(" /H [ ").write(m->new_obj[hint_id].xref.getOffset()).write(" ");
2753
28.9k
            write(hint_length);
2754
28.9k
            write(" ] /O ").write(first_page_object);
2755
28.9k
            write(" /E ").write(part6_end_offset + hint_length);
2756
28.9k
            write(" /N ").write(pages.size());
2757
28.9k
            write(" /T ").write(space_before_zero + hint_length);
2758
28.9k
        }
2759
59.6k
        write(" >>");
2760
59.6k
        closeObject(lindict_id);
2761
59.6k
        static int const pad = 200;
2762
59.6k
        write(QIntC::to_size(pos - m->pipeline->getCount() + pad), ' ').write("\n");
2763
2764
        // If the user supplied any additional header text, write it here after the linearization
2765
        // parameter dictionary.
2766
59.6k
        write(m->extra_header_text);
2767
2768
        // Part 3: first page cross reference table and trailer.
2769
2770
59.6k
        qpdf_offset_t first_xref_offset = m->pipeline->getCount();
2771
59.6k
        qpdf_offset_t hint_offset = 0;
2772
59.6k
        if (pass == 2) {
2773
28.9k
            hint_offset = m->new_obj[hint_id].xref.getOffset();
2774
28.9k
        }
2775
59.6k
        if (need_xref_stream) {
2776
            // Must pad here too.
2777
31.2k
            if (pass == 1) {
2778
                // Set first_half_max_obj_offset to a value large enough to force four bytes to be
2779
                // reserved for each file offset.  This would provide adequate space for the xref
2780
                // stream as long as the last object in page 1 starts with in the first 4 GB of the
2781
                // file, which is extremely likely.  In the second pass, we will know the actual
2782
                // value for this, but it's okay if it's smaller.
2783
16.1k
                first_half_max_obj_offset = 1 << 25;
2784
16.1k
            }
2785
31.2k
            pos = m->pipeline->getCount();
2786
31.2k
            writeXRefStream(
2787
31.2k
                first_half_xref,
2788
31.2k
                first_half_end,
2789
31.2k
                first_half_max_obj_offset,
2790
31.2k
                t_lin_first,
2791
31.2k
                first_half_start,
2792
31.2k
                first_half_end,
2793
31.2k
                first_trailer_size,
2794
31.2k
                hint_length + second_xref_offset,
2795
31.2k
                hint_id,
2796
31.2k
                hint_offset,
2797
31.2k
                hint_length,
2798
31.2k
                (pass == 1),
2799
31.2k
                pass);
2800
31.2k
            qpdf_offset_t endpos = m->pipeline->getCount();
2801
31.2k
            if (pass == 1) {
2802
                // Pad so we have enough room for the real xref stream.
2803
15.8k
                write(calculateXrefStreamPadding(endpos - pos), ' ');
2804
15.8k
                first_xref_end = m->pipeline->getCount();
2805
15.8k
            } else {
2806
                // Pad so that the next object starts at the same place as in pass 1.
2807
15.3k
                write(QIntC::to_size(first_xref_end - endpos), ' ');
2808
2809
15.3k
                if (m->pipeline->getCount() != first_xref_end) {
2810
0
                    throw std::logic_error(
2811
0
                        "insufficient padding for first pass xref stream; first_xref_end=" +
2812
0
                        std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos));
2813
0
                }
2814
15.3k
            }
2815
31.2k
            write("\n");
2816
31.2k
        } else {
2817
28.4k
            writeXRefTable(
2818
28.4k
                t_lin_first,
2819
28.4k
                first_half_start,
2820
28.4k
                first_half_end,
2821
28.4k
                first_trailer_size,
2822
28.4k
                hint_length + second_xref_offset,
2823
28.4k
                (pass == 1),
2824
28.4k
                hint_id,
2825
28.4k
                hint_offset,
2826
28.4k
                hint_length,
2827
28.4k
                pass);
2828
28.4k
            write("startxref\n0\n%%EOF\n");
2829
28.4k
        }
2830
2831
        // Parts 4 through 9
2832
2833
713k
        for (auto const& cur_object: m->object_queue) {
2834
713k
            if (cur_object.getObjectID() == part6_end_marker) {
2835
58.9k
                first_half_max_obj_offset = m->pipeline->getCount();
2836
58.9k
            }
2837
713k
            writeObject(cur_object);
2838
713k
            if (cur_object.getObjectID() == part4_end_marker) {
2839
59.2k
                if (m->encryption) {
2840
29.9k
                    writeEncryptionDictionary();
2841
29.9k
                }
2842
59.2k
                if (pass == 1) {
2843
30.2k
                    m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2844
30.2k
                } else {
2845
                    // Part 5: hint stream
2846
28.9k
                    write(hint_buffer);
2847
28.9k
                }
2848
59.2k
            }
2849
713k
            if (cur_object.getObjectID() == part6_end_marker) {
2850
58.4k
                part6_end_offset = m->pipeline->getCount();
2851
58.4k
            }
2852
713k
        }
2853
2854
        // Part 10: overflow hint stream -- not used
2855
2856
        // Part 11: main cross reference table and trailer
2857
2858
59.6k
        second_xref_offset = m->pipeline->getCount();
2859
59.6k
        if (need_xref_stream) {
2860
29.9k
            pos = m->pipeline->getCount();
2861
29.9k
            space_before_zero = writeXRefStream(
2862
29.9k
                second_half_xref,
2863
29.9k
                second_half_end,
2864
29.9k
                second_xref_offset,
2865
29.9k
                t_lin_second,
2866
29.9k
                0,
2867
29.9k
                second_half_end,
2868
29.9k
                second_trailer_size,
2869
29.9k
                0,
2870
29.9k
                0,
2871
29.9k
                0,
2872
29.9k
                0,
2873
29.9k
                (pass == 1),
2874
29.9k
                pass);
2875
29.9k
            qpdf_offset_t endpos = m->pipeline->getCount();
2876
2877
29.9k
            if (pass == 1) {
2878
                // Pad so we have enough room for the real xref stream.  See comments for previous
2879
                // xref stream on how we calculate the padding.
2880
15.0k
                write(calculateXrefStreamPadding(endpos - pos), ' ').write("\n");
2881
15.0k
                second_xref_end = m->pipeline->getCount();
2882
15.0k
            } else {
2883
                // Make the file size the same.
2884
14.9k
                auto padding =
2885
14.9k
                    QIntC::to_size(second_xref_end + hint_length - 1 - m->pipeline->getCount());
2886
14.9k
                write(padding, ' ').write("\n");
2887
2888
                // If this assertion fails, maybe we didn't have enough padding above.
2889
14.9k
                if (m->pipeline->getCount() != second_xref_end + hint_length) {
2890
0
                    throw std::logic_error(
2891
0
                        "count mismatch after xref stream; possible insufficient padding?");
2892
0
                }
2893
14.9k
            }
2894
29.9k
        } else {
2895
29.6k
            space_before_zero = writeXRefTable(
2896
29.6k
                t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass);
2897
29.6k
        }
2898
59.6k
        write("startxref\n").write(first_xref_offset).write("\n%%EOF\n");
2899
2900
59.6k
        if (pass == 1) {
2901
28.9k
            if (m->deterministic_id) {
2902
14.3k
                QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1);
2903
14.3k
                computeDeterministicIDData();
2904
14.3k
                pp_md5.pop();
2905
14.3k
            }
2906
2907
            // Close first pass pipeline
2908
28.9k
            file_size = m->pipeline->getCount();
2909
28.9k
            pp_pass1.pop();
2910
2911
            // Save hint offset since it will be set to zero by calling openObject.
2912
28.9k
            qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset();
2913
2914
            // Write hint stream to a buffer
2915
28.9k
            {
2916
28.9k
                auto pp_hint = m->pipeline_stack.activate(hint_buffer);
2917
28.9k
                writeHintStream(hint_id);
2918
28.9k
            }
2919
28.9k
            hint_length = QIntC::to_offset(hint_buffer.size());
2920
2921
            // Restore hint offset
2922
28.9k
            m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1);
2923
28.9k
            if (lin_pass1_file) {
2924
                // Write some debugging information
2925
0
                fprintf(
2926
0
                    lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str());
2927
0
                fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str());
2928
0
                fprintf(
2929
0
                    lin_pass1_file,
2930
0
                    "%% second_xref_offset=%s\n",
2931
0
                    std::to_string(second_xref_offset).c_str());
2932
0
                fprintf(
2933
0
                    lin_pass1_file,
2934
0
                    "%% second_xref_end=%s\n",
2935
0
                    std::to_string(second_xref_end).c_str());
2936
0
                fclose(lin_pass1_file);
2937
0
                lin_pass1_file = nullptr;
2938
0
            }
2939
28.9k
        }
2940
59.6k
    }
2941
34.3k
}
2942
2943
void
2944
QPDFWriter::enqueueObjectsStandard()
2945
33.2k
{
2946
33.2k
    if (m->preserve_unreferenced_objects) {
2947
0
        QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard");
2948
0
        for (auto const& oh: m->pdf.getAllObjects()) {
2949
0
            enqueueObject(oh);
2950
0
        }
2951
0
    }
2952
2953
    // Put root first on queue.
2954
33.2k
    QPDFObjectHandle trailer = getTrimmedTrailer();
2955
33.2k
    enqueueObject(trailer.getKey("/Root"));
2956
2957
    // Next place any other objects referenced from the trailer dictionary into the queue, handling
2958
    // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op.
2959
72.5k
    for (auto& item: trailer.as_dictionary()) {
2960
72.5k
        if (!item.second.null()) {
2961
59.3k
            enqueueObject(item.second);
2962
59.3k
        }
2963
72.5k
    }
2964
33.2k
}
2965
2966
void
2967
QPDFWriter::enqueueObjectsPCLm()
2968
0
{
2969
    // Image transform stream content for page strip images. Each of this new stream has to come
2970
    // after every page image strip written in the pclm file.
2971
0
    std::string image_transform_content = "q /image Do Q\n";
2972
2973
    // enqueue all pages first
2974
0
    std::vector<QPDFObjectHandle> all = m->pdf.getAllPages();
2975
0
    for (auto& page: all) {
2976
        // enqueue page
2977
0
        enqueueObject(page);
2978
2979
        // enqueue page contents stream
2980
0
        enqueueObject(page.getKey("/Contents"));
2981
2982
        // enqueue all the strips for each page
2983
0
        QPDFObjectHandle strips = page.getKey("/Resources").getKey("/XObject");
2984
0
        for (auto& image: strips.as_dictionary()) {
2985
0
            if (!image.second.null()) {
2986
0
                enqueueObject(image.second);
2987
0
                enqueueObject(QPDFObjectHandle::newStream(&m->pdf, image_transform_content));
2988
0
            }
2989
0
        }
2990
0
    }
2991
2992
    // Put root in queue.
2993
0
    QPDFObjectHandle trailer = getTrimmedTrailer();
2994
0
    enqueueObject(trailer.getKey("/Root"));
2995
0
}
2996
2997
void
2998
QPDFWriter::indicateProgress(bool decrement, bool finished)
2999
2.17M
{
3000
2.17M
    if (decrement) {
3001
564k
        --m->events_seen;
3002
564k
        return;
3003
564k
    }
3004
3005
1.61M
    ++m->events_seen;
3006
3007
1.61M
    if (!m->progress_reporter.get()) {
3008
1.61M
        return;
3009
1.61M
    }
3010
3011
0
    if (finished || (m->events_seen >= m->next_progress_report)) {
3012
0
        int percentage =
3013
0
            (finished ? 100
3014
0
                 : m->next_progress_report == 0
3015
0
                 ? 0
3016
0
                 : std::min(99, 1 + ((100 * m->events_seen) / m->events_expected)));
3017
0
        m->progress_reporter->reportProgress(percentage);
3018
0
    }
3019
0
    int increment = std::max(1, (m->events_expected / 100));
3020
0
    while (m->events_seen >= m->next_progress_report) {
3021
0
        m->next_progress_report += increment;
3022
0
    }
3023
0
}
3024
3025
void
3026
QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr)
3027
0
{
3028
0
    m->progress_reporter = pr;
3029
0
}
3030
3031
void
3032
QPDFWriter::writeStandard()
3033
33.2k
{
3034
33.2k
    auto pp_md5 = m->pipeline_stack.popper();
3035
33.2k
    if (m->deterministic_id) {
3036
17.4k
        m->pipeline_stack.activate_md5(pp_md5);
3037
17.4k
    }
3038
3039
    // Start writing
3040
3041
33.2k
    writeHeader();
3042
33.2k
    write(m->extra_header_text);
3043
3044
33.2k
    if (m->pclm) {
3045
0
        enqueueObjectsPCLm();
3046
33.2k
    } else {
3047
33.2k
        enqueueObjectsStandard();
3048
33.2k
    }
3049
3050
    // Now start walking queue, outputting each object.
3051
345k
    while (m->object_queue_front < m->object_queue.size()) {
3052
311k
        QPDFObjectHandle cur_object = m->object_queue.at(m->object_queue_front);
3053
311k
        ++m->object_queue_front;
3054
311k
        writeObject(cur_object);
3055
311k
    }
3056
3057
    // Write out the encryption dictionary, if any
3058
33.2k
    if (m->encryption) {
3059
15.5k
        writeEncryptionDictionary();
3060
15.5k
    }
3061
3062
    // Now write out xref.  next_objid is now the number of objects.
3063
33.2k
    qpdf_offset_t xref_offset = m->pipeline->getCount();
3064
33.2k
    if (m->object_stream_to_objects.empty()) {
3065
        // Write regular cross-reference table
3066
32.1k
        writeXRefTable(t_normal, 0, m->next_objid - 1, m->next_objid);
3067
32.1k
    } else {
3068
        // Write cross-reference stream.
3069
1.17k
        int xref_id = m->next_objid++;
3070
1.17k
        writeXRefStream(
3071
1.17k
            xref_id, xref_id, xref_offset, t_normal, 0, m->next_objid - 1, m->next_objid);
3072
1.17k
    }
3073
33.2k
    write("startxref\n").write(xref_offset).write("\n%%EOF\n");
3074
3075
33.2k
    if (m->deterministic_id) {
3076
16.8k
        QTC::TC(
3077
16.8k
            "qpdf",
3078
16.8k
            "QPDFWriter standard deterministic ID",
3079
16.8k
            m->object_stream_to_objects.empty() ? 0 : 1);
3080
16.8k
    }
3081
33.2k
}