Coverage Report

Created: 2025-08-29 06:57

/src/qpdf/libqpdf/QPDFWriter.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/assert_debug.h>
2
3
#include <qpdf/qpdf-config.h> // include early for large file support
4
5
#include <qpdf/QPDFWriter_private.hh>
6
7
#include <qpdf/MD5.hh>
8
#include <qpdf/Pl_AES_PDF.hh>
9
#include <qpdf/Pl_Flate.hh>
10
#include <qpdf/Pl_MD5.hh>
11
#include <qpdf/Pl_PNGFilter.hh>
12
#include <qpdf/Pl_RC4.hh>
13
#include <qpdf/Pl_StdioFile.hh>
14
#include <qpdf/Pl_String.hh>
15
#include <qpdf/QIntC.hh>
16
#include <qpdf/QPDFObjectHandle_private.hh>
17
#include <qpdf/QPDFObject_private.hh>
18
#include <qpdf/QPDF_private.hh>
19
#include <qpdf/QTC.hh>
20
#include <qpdf/QUtil.hh>
21
#include <qpdf/RC4.hh>
22
#include <qpdf/Util.hh>
23
24
#include <algorithm>
25
#include <cstdlib>
26
#include <stdexcept>
27
28
using namespace std::literals;
29
using namespace qpdf;
30
31
QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default)
32
0
{
33
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
34
0
}
35
36
QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) :
37
0
    handler(handler)
38
0
{
39
0
}
40
41
QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT
42
                                                                  // (modernize-use-equals-default)
43
0
{
44
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
45
0
}
46
47
void
48
QPDFWriter::FunctionProgressReporter::reportProgress(int progress)
49
0
{
50
0
    handler(progress);
51
0
}
52
53
namespace
54
{
55
    class Pl_stack
56
    {
57
        // A pipeline Popper is normally returned by Pl_stack::activate, or, if necessary, a
58
        // reference to a Popper instance can be passed into activate. When the Popper goes out of
59
        // scope, the pipeline stack is popped. This causes finish to be called on the current
60
        // pipeline and the pipeline stack to be popped until the top of stack is a previous active
61
        // top of stack and restores the pipeline to that point. It deletes any pipelines that it
62
        // pops.
63
        class Popper
64
        {
65
            friend class Pl_stack;
66
67
          public:
68
            Popper() = default;
69
            Popper(Popper const&) = delete;
70
            Popper(Popper&& other) noexcept
71
0
            {
72
0
                // For MSVC, default pops the stack
73
0
                if (this != &other) {
74
0
                    stack = other.stack;
75
0
                    stack_id = other.stack_id;
76
0
                    other.stack = nullptr;
77
0
                    other.stack_id = 0;
78
0
                };
79
0
            }
80
            Popper& operator=(Popper const&) = delete;
81
            Popper&
82
            operator=(Popper&& other) noexcept
83
0
            {
84
0
                // For MSVC, default pops the stack
85
0
                if (this != &other) {
86
0
                    stack = other.stack;
87
0
                    stack_id = other.stack_id;
88
0
                    other.stack = nullptr;
89
0
                    other.stack_id = 0;
90
0
                };
91
0
                return *this;
92
0
            }
93
94
            ~Popper();
95
96
            // Manually pop pipeline from the pipeline stack.
97
            void pop();
98
99
          private:
100
            Popper(Pl_stack& stack) :
101
115k
                stack(&stack)
102
115k
            {
103
115k
            }
104
105
            Pl_stack* stack{nullptr};
106
            unsigned long stack_id{0};
107
        };
108
109
      public:
110
        Pl_stack(pl::Count*& top) :
111
9.51k
            top(top)
112
9.51k
        {
113
9.51k
        }
114
115
        Popper
116
        popper()
117
16.6k
        {
118
16.6k
            return {*this};
119
16.6k
        }
120
121
        void
122
        initialize(Pipeline* p)
123
9.51k
        {
124
9.51k
            auto c = std::make_unique<pl::Count>(++last_id, p);
125
9.51k
            top = c.get();
126
9.51k
            stack.emplace_back(std::move(c));
127
9.51k
        }
128
129
        Popper
130
        activate(std::string& str)
131
63.2k
        {
132
63.2k
            Popper pp{*this};
133
63.2k
            activate(pp, str);
134
63.2k
            return pp;
135
63.2k
        }
136
137
        void
138
        activate(Popper& pp, std::string& str)
139
63.2k
        {
140
63.2k
            activate(pp, false, &str, nullptr);
141
63.2k
        }
142
143
        void
144
        activate(Popper& pp, std::unique_ptr<Pipeline> next)
145
0
        {
146
0
            count_buffer.clear();
147
0
            activate(pp, false, &count_buffer, std::move(next));
148
0
        }
149
150
        Popper
151
        activate(
152
            bool discard = false,
153
            std::string* str = nullptr,
154
            std::unique_ptr<Pipeline> next = nullptr)
155
35.8k
        {
156
35.8k
            Popper pp{*this};
157
35.8k
            activate(pp, discard, str, std::move(next));
158
35.8k
            return pp;
159
35.8k
        }
160
161
        void
162
        activate(
163
            Popper& pp,
164
            bool discard = false,
165
            std::string* str = nullptr,
166
            std::unique_ptr<Pipeline> next = nullptr)
167
107k
        {
168
107k
            std::unique_ptr<pl::Count> c;
169
107k
            if (next) {
170
0
                c = std::make_unique<pl::Count>(++last_id, count_buffer, std::move(next));
171
107k
            } else if (discard) {
172
44.1k
                c = std::make_unique<pl::Count>(++last_id, nullptr);
173
63.2k
            } else if (!str) {
174
0
                c = std::make_unique<pl::Count>(++last_id, top);
175
63.2k
            } else {
176
63.2k
                c = std::make_unique<pl::Count>(++last_id, *str);
177
63.2k
            }
178
107k
            pp.stack_id = last_id;
179
107k
            top = c.get();
180
107k
            stack.emplace_back(std::move(c));
181
107k
        }
182
        void
183
        activate_md5(Popper& pp)
184
0
        {
185
0
            qpdf_assert_debug(!md5_pipeline);
186
0
            qpdf_assert_debug(md5_id == 0);
187
0
            qpdf_assert_debug(top->getCount() == 0);
188
0
            md5_pipeline = std::make_unique<Pl_MD5>("qpdf md5", top);
189
0
            md5_pipeline->persistAcrossFinish(true);
190
            // Special case code in pop clears m->md5_pipeline upon deletion.
191
0
            auto c = std::make_unique<pl::Count>(++last_id, md5_pipeline.get());
192
0
            pp.stack_id = last_id;
193
0
            md5_id = last_id;
194
0
            top = c.get();
195
0
            stack.emplace_back(std::move(c));
196
0
        }
197
198
        // Return the hex digest and disable the MD5 pipeline.
199
        std::string
200
        hex_digest()
201
0
        {
202
0
            qpdf_assert_debug(md5_pipeline);
203
0
            auto digest = md5_pipeline->getHexDigest();
204
0
            md5_pipeline->enable(false);
205
0
            return digest;
206
0
        }
207
208
        void
209
        clear_buffer()
210
0
        {
211
0
            count_buffer.clear();
212
0
        }
213
214
      private:
215
        void
216
        pop(unsigned long stack_id)
217
115k
        {
218
115k
            if (!stack_id) {
219
8.32k
                return;
220
8.32k
            }
221
107k
            qpdf_assert_debug(stack.size() >= 2);
222
107k
            top->finish();
223
107k
            qpdf_assert_debug(stack.back().get() == top);
224
            // It used to be possible for this assertion to fail if writeLinearized exits by
225
            // exception when deterministic ID. There are no longer any cases in which two
226
            // dynamically allocated pipeline Popper objects ever exist at the same time, so the
227
            // assertion will fail if they get popped out of order from automatic destruction.
228
107k
            qpdf_assert_debug(top->id() == stack_id);
229
107k
            if (stack_id == md5_id) {
230
0
                md5_pipeline = nullptr;
231
0
                md5_id = 0;
232
0
            }
233
107k
            stack.pop_back();
234
107k
            top = stack.back().get();
235
107k
        }
236
237
        std::vector<std::unique_ptr<pl::Count>> stack;
238
        pl::Count*& top;
239
        std::unique_ptr<Pl_MD5> md5_pipeline{nullptr};
240
        unsigned long last_id{0};
241
        unsigned long md5_id{0};
242
        std::string count_buffer;
243
    };
244
} // namespace
245
246
Pl_stack::Popper::~Popper()
247
115k
{
248
115k
    if (stack) {
249
107k
        stack->pop(stack_id);
250
107k
    }
251
115k
}
252
253
void
254
Pl_stack::Popper::pop()
255
7.82k
{
256
7.82k
    if (stack) {
257
7.82k
        stack->pop(stack_id);
258
7.82k
    }
259
7.82k
    stack_id = 0;
260
7.82k
    stack = nullptr;
261
7.82k
}
262
263
class QPDFWriter::Members
264
{
265
    friend class QPDFWriter;
266
267
  public:
268
    ~Members();
269
270
  private:
271
    Members(QPDF& pdf);
272
    Members(Members const&) = delete;
273
274
    QPDF& pdf;
275
    QPDFObjGen root_og{-1, 0};
276
    char const* filename{"unspecified"};
277
    FILE* file{nullptr};
278
    bool close_file{false};
279
    std::unique_ptr<Pl_Buffer> buffer_pipeline{nullptr};
280
    Buffer* output_buffer{nullptr};
281
    bool normalize_content_set{false};
282
    bool normalize_content{false};
283
    bool compress_streams{true};
284
    bool compress_streams_set{false};
285
    qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_generalized};
286
    bool stream_decode_level_set{false};
287
    bool recompress_flate{false};
288
    bool qdf_mode{false};
289
    bool preserve_unreferenced_objects{false};
290
    bool newline_before_endstream{false};
291
    bool static_id{false};
292
    bool suppress_original_object_ids{false};
293
    bool direct_stream_lengths{true};
294
    bool preserve_encryption{true};
295
    bool linearized{false};
296
    bool pclm{false};
297
    qpdf_object_stream_e object_stream_mode{qpdf_o_preserve};
298
299
    std::unique_ptr<QPDF::EncryptionData> encryption;
300
    std::string encryption_key;
301
    bool encrypt_use_aes{false};
302
303
    std::string id1; // for /ID key of
304
    std::string id2; // trailer dictionary
305
    std::string final_pdf_version;
306
    int final_extension_level{0};
307
    std::string min_pdf_version;
308
    int min_extension_level{0};
309
    std::string forced_pdf_version;
310
    int forced_extension_level{0};
311
    std::string extra_header_text;
312
    int encryption_dict_objid{0};
313
    std::string cur_data_key;
314
    std::unique_ptr<Pipeline> file_pl;
315
    qpdf::pl::Count* pipeline{nullptr};
316
    std::vector<QPDFObjectHandle> object_queue;
317
    size_t object_queue_front{0};
318
    QPDFWriter::ObjTable obj;
319
    QPDFWriter::NewObjTable new_obj;
320
    int next_objid{1};
321
    int cur_stream_length_id{0};
322
    size_t cur_stream_length{0};
323
    bool added_newline{false};
324
    size_t max_ostream_index{0};
325
    std::set<QPDFObjGen> normalized_streams;
326
    std::map<QPDFObjGen, int> page_object_to_seq;
327
    std::map<QPDFObjGen, int> contents_to_page_seq;
328
    std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects;
329
    Pl_stack pipeline_stack;
330
    bool deterministic_id{false};
331
    std::string deterministic_id_data;
332
    bool did_write_setup{false};
333
334
    // For linearization only
335
    std::string lin_pass1_filename;
336
337
    // For progress reporting
338
    std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter;
339
    int events_expected{0};
340
    int events_seen{0};
341
    int next_progress_report{0};
342
};
343
344
QPDFWriter::Members::Members(QPDF& pdf) :
345
9.72k
    pdf(pdf),
346
9.72k
    root_og(pdf.getRoot().getObjGen().isIndirect() ? pdf.getRoot().getObjGen() : QPDFObjGen(-1, 0)),
347
9.72k
    pipeline_stack(pipeline)
348
9.72k
{
349
9.72k
}
350
351
QPDFWriter::Members::~Members()
352
9.51k
{
353
9.51k
    if (file && close_file) {
354
0
        fclose(file);
355
0
    }
356
9.51k
    delete output_buffer;
357
9.51k
}
358
359
QPDFWriter::QPDFWriter(QPDF& pdf) :
360
9.72k
    m(new Members(pdf))
361
9.72k
{
362
9.72k
}
363
364
QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) :
365
0
    m(new Members(pdf))
366
0
{
367
0
    setOutputFilename(filename);
368
0
}
369
370
QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) :
371
0
    m(new Members(pdf))
372
0
{
373
0
    setOutputFile(description, file, close_file);
374
0
}
375
376
void
377
QPDFWriter::setOutputFilename(char const* filename)
378
0
{
379
0
    char const* description = filename;
380
0
    FILE* f = nullptr;
381
0
    bool close_file = false;
382
0
    if (filename == nullptr) {
383
0
        description = "standard output";
384
0
        QTC::TC("qpdf", "QPDFWriter write to stdout");
385
0
        f = stdout;
386
0
        QUtil::binary_stdout();
387
0
    } else {
388
0
        QTC::TC("qpdf", "QPDFWriter write to file");
389
0
        f = QUtil::safe_fopen(filename, "wb+");
390
0
        close_file = true;
391
0
    }
392
0
    setOutputFile(description, f, close_file);
393
0
}
394
395
void
396
QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file)
397
0
{
398
0
    m->filename = description;
399
0
    m->file = file;
400
0
    m->close_file = close_file;
401
0
    m->file_pl = std::make_unique<Pl_StdioFile>("qpdf output", file);
402
0
    m->pipeline_stack.initialize(m->file_pl.get());
403
0
}
404
405
void
406
QPDFWriter::setOutputMemory()
407
0
{
408
0
    m->filename = "memory buffer";
409
0
    m->buffer_pipeline = std::make_unique<Pl_Buffer>("qpdf output");
410
0
    m->pipeline_stack.initialize(m->buffer_pipeline.get());
411
0
}
412
413
Buffer*
414
QPDFWriter::getBuffer()
415
0
{
416
0
    Buffer* result = m->output_buffer;
417
0
    m->output_buffer = nullptr;
418
0
    return result;
419
0
}
420
421
std::shared_ptr<Buffer>
422
QPDFWriter::getBufferSharedPointer()
423
0
{
424
0
    return std::shared_ptr<Buffer>(getBuffer());
425
0
}
426
427
void
428
QPDFWriter::setOutputPipeline(Pipeline* p)
429
9.51k
{
430
9.51k
    m->filename = "custom pipeline";
431
9.51k
    m->pipeline_stack.initialize(p);
432
9.51k
}
433
434
void
435
QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode)
436
0
{
437
0
    m->object_stream_mode = mode;
438
0
}
439
440
void
441
QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode)
442
0
{
443
0
    switch (mode) {
444
0
    case qpdf_s_uncompress:
445
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
446
0
        m->compress_streams = false;
447
0
        break;
448
449
0
    case qpdf_s_preserve:
450
0
        m->stream_decode_level = qpdf_dl_none;
451
0
        m->compress_streams = false;
452
0
        break;
453
454
0
    case qpdf_s_compress:
455
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
456
0
        m->compress_streams = true;
457
0
        break;
458
0
    }
459
0
    m->stream_decode_level_set = true;
460
0
    m->compress_streams_set = true;
461
0
}
462
463
void
464
QPDFWriter::setCompressStreams(bool val)
465
0
{
466
0
    m->compress_streams = val;
467
0
    m->compress_streams_set = true;
468
0
}
469
470
void
471
QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
472
9.51k
{
473
9.51k
    m->stream_decode_level = val;
474
9.51k
    m->stream_decode_level_set = true;
475
9.51k
}
476
477
void
478
QPDFWriter::setRecompressFlate(bool val)
479
0
{
480
0
    m->recompress_flate = val;
481
0
}
482
483
void
484
QPDFWriter::setContentNormalization(bool val)
485
0
{
486
0
    m->normalize_content_set = true;
487
0
    m->normalize_content = val;
488
0
}
489
490
void
491
QPDFWriter::setQDFMode(bool val)
492
0
{
493
0
    m->qdf_mode = val;
494
0
}
495
496
void
497
QPDFWriter::setPreserveUnreferencedObjects(bool val)
498
0
{
499
0
    m->preserve_unreferenced_objects = val;
500
0
}
501
502
void
503
QPDFWriter::setNewlineBeforeEndstream(bool val)
504
0
{
505
0
    m->newline_before_endstream = val;
506
0
}
507
508
void
509
QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level)
510
19.9k
{
511
19.9k
    bool set_version = false;
512
19.9k
    bool set_extension_level = false;
513
19.9k
    if (m->min_pdf_version.empty()) {
514
9.50k
        set_version = true;
515
9.50k
        set_extension_level = true;
516
10.4k
    } else {
517
10.4k
        int old_major = 0;
518
10.4k
        int old_minor = 0;
519
10.4k
        int min_major = 0;
520
10.4k
        int min_minor = 0;
521
10.4k
        parseVersion(version, old_major, old_minor);
522
10.4k
        parseVersion(m->min_pdf_version, min_major, min_minor);
523
10.4k
        int compare = compareVersions(old_major, old_minor, min_major, min_minor);
524
10.4k
        if (compare > 0) {
525
327
            QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1);
526
327
            set_version = true;
527
327
            set_extension_level = true;
528
10.1k
        } else if (compare == 0) {
529
1.56k
            if (extension_level > m->min_extension_level) {
530
1
                QTC::TC("qpdf", "QPDFWriter increasing extension level");
531
1
                set_extension_level = true;
532
1
            }
533
1.56k
        }
534
10.4k
    }
535
536
19.9k
    if (set_version) {
537
9.82k
        m->min_pdf_version = version;
538
9.82k
    }
539
19.9k
    if (set_extension_level) {
540
9.82k
        m->min_extension_level = extension_level;
541
9.82k
    }
542
19.9k
}
543
544
void
545
QPDFWriter::setMinimumPDFVersion(PDFVersion const& v)
546
0
{
547
0
    std::string version;
548
0
    int extension_level;
549
0
    v.getVersion(version, extension_level);
550
0
    setMinimumPDFVersion(version, extension_level);
551
0
}
552
553
void
554
QPDFWriter::forcePDFVersion(std::string const& version, int extension_level)
555
0
{
556
0
    m->forced_pdf_version = version;
557
0
    m->forced_extension_level = extension_level;
558
0
}
559
560
void
561
QPDFWriter::setExtraHeaderText(std::string const& text)
562
0
{
563
0
    m->extra_header_text = text;
564
0
    if (!m->extra_header_text.empty() && *m->extra_header_text.rbegin() != '\n') {
565
0
        QTC::TC("qpdf", "QPDFWriter extra header text add newline");
566
0
        m->extra_header_text += "\n";
567
0
    } else {
568
0
        QTC::TC("qpdf", "QPDFWriter extra header text no newline");
569
0
    }
570
0
}
571
572
void
573
QPDFWriter::setStaticID(bool val)
574
9.51k
{
575
9.51k
    m->static_id = val;
576
9.51k
}
577
578
void
579
QPDFWriter::setDeterministicID(bool val)
580
0
{
581
0
    m->deterministic_id = val;
582
0
}
583
584
void
585
QPDFWriter::setStaticAesIV(bool val)
586
0
{
587
0
    if (val) {
588
0
        Pl_AES_PDF::useStaticIV();
589
0
    }
590
0
}
591
592
void
593
QPDFWriter::setSuppressOriginalObjectIDs(bool val)
594
0
{
595
0
    m->suppress_original_object_ids = val;
596
0
}
597
598
void
599
QPDFWriter::setPreserveEncryption(bool val)
600
0
{
601
0
    m->preserve_encryption = val;
602
0
}
603
604
void
605
QPDFWriter::setLinearization(bool val)
606
9.51k
{
607
9.51k
    m->linearized = val;
608
9.51k
    if (val) {
609
9.51k
        m->pclm = false;
610
9.51k
    }
611
9.51k
}
612
613
void
614
QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
615
0
{
616
0
    m->lin_pass1_filename = filename;
617
0
}
618
619
void
620
QPDFWriter::setPCLm(bool val)
621
0
{
622
0
    m->pclm = val;
623
0
    if (val) {
624
0
        m->linearized = false;
625
0
    }
626
0
}
627
628
void
629
QPDFWriter::setR2EncryptionParametersInsecure(
630
    char const* user_password,
631
    char const* owner_password,
632
    bool allow_print,
633
    bool allow_modify,
634
    bool allow_extract,
635
    bool allow_annotate)
636
0
{
637
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(1, 2, 5, true);
638
0
    if (!allow_print) {
639
0
        m->encryption->setP(3, false);
640
0
    }
641
0
    if (!allow_modify) {
642
0
        m->encryption->setP(4, false);
643
0
    }
644
0
    if (!allow_extract) {
645
0
        m->encryption->setP(5, false);
646
0
    }
647
0
    if (!allow_annotate) {
648
0
        m->encryption->setP(6, false);
649
0
    }
650
0
    setEncryptionParameters(user_password, owner_password);
651
0
}
652
653
void
654
QPDFWriter::setR3EncryptionParametersInsecure(
655
    char const* user_password,
656
    char const* owner_password,
657
    bool allow_accessibility,
658
    bool allow_extract,
659
    bool allow_assemble,
660
    bool allow_annotate_and_form,
661
    bool allow_form_filling,
662
    bool allow_modify_other,
663
    qpdf_r3_print_e print)
664
0
{
665
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(2, 3, 16, true);
666
0
    interpretR3EncryptionParameters(
667
0
        allow_accessibility,
668
0
        allow_extract,
669
0
        allow_assemble,
670
0
        allow_annotate_and_form,
671
0
        allow_form_filling,
672
0
        allow_modify_other,
673
0
        print,
674
0
        qpdf_r3m_all);
675
0
    setEncryptionParameters(user_password, owner_password);
676
0
}
677
678
void
679
QPDFWriter::setR4EncryptionParametersInsecure(
680
    char const* user_password,
681
    char const* owner_password,
682
    bool allow_accessibility,
683
    bool allow_extract,
684
    bool allow_assemble,
685
    bool allow_annotate_and_form,
686
    bool allow_form_filling,
687
    bool allow_modify_other,
688
    qpdf_r3_print_e print,
689
    bool encrypt_metadata,
690
    bool use_aes)
691
0
{
692
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(4, 4, 16, encrypt_metadata);
693
0
    m->encrypt_use_aes = use_aes;
694
0
    interpretR3EncryptionParameters(
695
0
        allow_accessibility,
696
0
        allow_extract,
697
0
        allow_assemble,
698
0
        allow_annotate_and_form,
699
0
        allow_form_filling,
700
0
        allow_modify_other,
701
0
        print,
702
0
        qpdf_r3m_all);
703
0
    setEncryptionParameters(user_password, owner_password);
704
0
}
705
706
void
707
QPDFWriter::setR5EncryptionParameters(
708
    char const* user_password,
709
    char const* owner_password,
710
    bool allow_accessibility,
711
    bool allow_extract,
712
    bool allow_assemble,
713
    bool allow_annotate_and_form,
714
    bool allow_form_filling,
715
    bool allow_modify_other,
716
    qpdf_r3_print_e print,
717
    bool encrypt_metadata)
718
0
{
719
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(5, 5, 32, encrypt_metadata);
720
0
    m->encrypt_use_aes = true;
721
0
    interpretR3EncryptionParameters(
722
0
        allow_accessibility,
723
0
        allow_extract,
724
0
        allow_assemble,
725
0
        allow_annotate_and_form,
726
0
        allow_form_filling,
727
0
        allow_modify_other,
728
0
        print,
729
0
        qpdf_r3m_all);
730
0
    setEncryptionParameters(user_password, owner_password);
731
0
}
732
733
void
734
QPDFWriter::setR6EncryptionParameters(
735
    char const* user_password,
736
    char const* owner_password,
737
    bool allow_accessibility,
738
    bool allow_extract,
739
    bool allow_assemble,
740
    bool allow_annotate_and_form,
741
    bool allow_form_filling,
742
    bool allow_modify_other,
743
    qpdf_r3_print_e print,
744
    bool encrypt_metadata)
745
9.51k
{
746
9.51k
    m->encryption = std::make_unique<QPDF::EncryptionData>(5, 6, 32, encrypt_metadata);
747
9.51k
    interpretR3EncryptionParameters(
748
9.51k
        allow_accessibility,
749
9.51k
        allow_extract,
750
9.51k
        allow_assemble,
751
9.51k
        allow_annotate_and_form,
752
9.51k
        allow_form_filling,
753
9.51k
        allow_modify_other,
754
9.51k
        print,
755
9.51k
        qpdf_r3m_all);
756
9.51k
    m->encrypt_use_aes = true;
757
9.51k
    setEncryptionParameters(user_password, owner_password);
758
9.51k
}
759
760
void
761
QPDFWriter::interpretR3EncryptionParameters(
762
    bool allow_accessibility,
763
    bool allow_extract,
764
    bool allow_assemble,
765
    bool allow_annotate_and_form,
766
    bool allow_form_filling,
767
    bool allow_modify_other,
768
    qpdf_r3_print_e print,
769
    qpdf_r3_modify_e modify)
770
9.51k
{
771
    // Acrobat 5 security options:
772
773
    // Checkboxes:
774
    //   Enable Content Access for the Visually Impaired
775
    //   Allow Content Copying and Extraction
776
777
    // Allowed changes menu:
778
    //   None
779
    //   Only Document Assembly
780
    //   Only Form Field Fill-in or Signing
781
    //   Comment Authoring, Form Field Fill-in or Signing
782
    //   General Editing, Comment and Form Field Authoring
783
784
    // Allowed printing menu:
785
    //   None
786
    //   Low Resolution
787
    //   Full printing
788
789
    // Meanings of bits in P when R >= 3
790
    //
791
    //  3: low-resolution printing
792
    //  4: document modification except as controlled by 6, 9, and 11
793
    //  5: extraction
794
    //  6: add/modify annotations (comment), fill in forms
795
    //     if 4+6 are set, also allows modification of form fields
796
    //  9: fill in forms even if 6 is clear
797
    // 10: accessibility; ignored by readers, should always be set
798
    // 11: document assembly even if 4 is clear
799
    // 12: high-resolution printing
800
9.51k
    if (!allow_accessibility && m->encryption->getR() <= 3) {
801
        // Bit 10 is deprecated and should always be set.  This used to mean accessibility.  There
802
        // is no way to disable accessibility with R > 3.
803
0
        m->encryption->setP(10, false);
804
0
    }
805
9.51k
    if (!allow_extract) {
806
0
        m->encryption->setP(5, false);
807
0
    }
808
809
9.51k
    switch (print) {
810
0
    case qpdf_r3p_none:
811
0
        m->encryption->setP(3, false); // any printing
812
0
        [[fallthrough]];
813
0
    case qpdf_r3p_low:
814
0
        m->encryption->setP(12, false); // high resolution printing
815
0
        [[fallthrough]];
816
9.51k
    case qpdf_r3p_full:
817
9.51k
        break;
818
        // no default so gcc warns for missing cases
819
9.51k
    }
820
821
    // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full
822
    // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're
823
    // stuck with it. See also allow checks below to control the bits individually.
824
825
    // NOT EXERCISED IN TEST SUITE
826
9.51k
    switch (modify) {
827
0
    case qpdf_r3m_none:
828
0
        m->encryption->setP(11, false); // document assembly
829
0
        [[fallthrough]];
830
0
    case qpdf_r3m_assembly:
831
0
        m->encryption->setP(9, false); // filling in form fields
832
0
        [[fallthrough]];
833
0
    case qpdf_r3m_form:
834
0
        m->encryption->setP(6, false); // modify annotations, fill in form fields
835
0
        [[fallthrough]];
836
0
    case qpdf_r3m_annotate:
837
0
        m->encryption->setP(4, false); // other modifications
838
0
        [[fallthrough]];
839
9.51k
    case qpdf_r3m_all:
840
9.51k
        break;
841
        // no default so gcc warns for missing cases
842
9.51k
    }
843
    // END NOT EXERCISED IN TEST SUITE
844
845
9.51k
    if (!allow_assemble) {
846
0
        m->encryption->setP(11, false);
847
0
    }
848
9.51k
    if (!allow_annotate_and_form) {
849
0
        m->encryption->setP(6, false);
850
0
    }
851
9.51k
    if (!allow_form_filling) {
852
0
        m->encryption->setP(9, false);
853
0
    }
854
9.51k
    if (!allow_modify_other) {
855
0
        m->encryption->setP(4, false);
856
0
    }
857
9.51k
}
858
859
void
860
QPDFWriter::setEncryptionParameters(char const* user_password, char const* owner_password)
861
9.51k
{
862
9.51k
    generateID(true);
863
9.51k
    m->encryption->setId1(m->id1);
864
9.51k
    m->encryption_key = m->encryption->compute_parameters(user_password, owner_password);
865
9.51k
    setEncryptionMinimumVersion();
866
9.51k
}
867
868
void
869
QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
870
0
{
871
0
    m->preserve_encryption = false;
872
0
    QPDFObjectHandle trailer = qpdf.getTrailer();
873
0
    if (trailer.hasKey("/Encrypt")) {
874
0
        generateID(true);
875
0
        m->id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue();
876
0
        QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
877
0
        int V = encrypt.getKey("/V").getIntValueAsInt();
878
0
        int key_len = 5;
879
0
        if (V > 1) {
880
0
            key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8;
881
0
        }
882
0
        const bool encrypt_metadata =
883
0
            encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool()
884
0
            ? encrypt.getKey("/EncryptMetadata").getBoolValue()
885
0
            : true;
886
0
        if (V >= 4) {
887
            // When copying encryption parameters, use AES even if the original file did not.
888
            // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of
889
            // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF
890
            // all potentially having different values.
891
0
            m->encrypt_use_aes = true;
892
0
        }
893
0
        QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", encrypt_metadata ? 0 : 1);
894
0
        QTC::TC("qpdf", "QPDFWriter copy use_aes", m->encrypt_use_aes ? 0 : 1);
895
896
0
        m->encryption = std::make_unique<QPDF::EncryptionData>(
897
0
            V,
898
0
            encrypt.getKey("/R").getIntValueAsInt(),
899
0
            key_len,
900
0
            static_cast<int>(encrypt.getKey("/P").getIntValue()),
901
0
            encrypt.getKey("/O").getStringValue(),
902
0
            encrypt.getKey("/U").getStringValue(),
903
0
            V < 5 ? "" : encrypt.getKey("/OE").getStringValue(),
904
0
            V < 5 ? "" : encrypt.getKey("/UE").getStringValue(),
905
0
            V < 5 ? "" : encrypt.getKey("/Perms").getStringValue(),
906
0
            m->id1, // m->id1 == the other file's id1
907
0
            encrypt_metadata);
908
0
        m->encryption_key = V >= 5
909
0
            ? qpdf.getEncryptionKey()
910
0
            : m->encryption->compute_encryption_key(qpdf.getPaddedUserPassword());
911
0
        setEncryptionMinimumVersion();
912
0
    }
913
0
}
914
915
void
916
QPDFWriter::disableIncompatibleEncryption(int major, int minor, int extension_level)
917
0
{
918
0
    if (!m->encryption) {
919
0
        return;
920
0
    }
921
0
    if (compareVersions(major, minor, 1, 3) < 0) {
922
0
        m->encryption = nullptr;
923
0
        return;
924
0
    }
925
0
    int V = m->encryption->getV();
926
0
    int R = m->encryption->getR();
927
0
    if (compareVersions(major, minor, 1, 4) < 0) {
928
0
        if (V > 1 || R > 2) {
929
0
            m->encryption = nullptr;
930
0
        }
931
0
    } else if (compareVersions(major, minor, 1, 5) < 0) {
932
0
        if (V > 2 || R > 3) {
933
0
            m->encryption = nullptr;
934
0
        }
935
0
    } else if (compareVersions(major, minor, 1, 6) < 0) {
936
0
        if (m->encrypt_use_aes) {
937
0
            m->encryption = nullptr;
938
0
        }
939
0
    } else if (
940
0
        (compareVersions(major, minor, 1, 7) < 0) ||
941
0
        ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) {
942
0
        if (V >= 5 || R >= 5) {
943
0
            m->encryption = nullptr;
944
0
        }
945
0
    }
946
947
0
    if (!m->encryption) {
948
0
        QTC::TC("qpdf", "QPDFWriter forced version disabled encryption");
949
0
    }
950
0
}
951
952
void
953
QPDFWriter::parseVersion(std::string const& version, int& major, int& minor) const
954
20.9k
{
955
20.9k
    major = QUtil::string_to_int(version.c_str());
956
20.9k
    minor = 0;
957
20.9k
    size_t p = version.find('.');
958
20.9k
    if ((p != std::string::npos) && (version.length() > p)) {
959
20.9k
        minor = QUtil::string_to_int(version.substr(p + 1).c_str());
960
20.9k
    }
961
20.9k
    std::string tmp = std::to_string(major) + "." + std::to_string(minor);
962
20.9k
    if (tmp != version) {
963
        // The version number in the input is probably invalid. This happens with some files that
964
        // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately
965
        // QPDFWriter doesn't have a way to give a warning, so we just ignore this case.
966
41
    }
967
20.9k
}
968
969
int
970
QPDFWriter::compareVersions(int major1, int minor1, int major2, int minor2) const
971
10.4k
{
972
10.4k
    if (major1 < major2) {
973
209
        return -1;
974
10.2k
    } else if (major1 > major2) {
975
170
        return 1;
976
10.0k
    } else if (minor1 < minor2) {
977
8.35k
        return -1;
978
8.35k
    } else if (minor1 > minor2) {
979
157
        return 1;
980
1.56k
    } else {
981
1.56k
        return 0;
982
1.56k
    }
983
10.4k
}
984
985
void
986
QPDFWriter::setEncryptionMinimumVersion()
987
9.50k
{
988
9.50k
    auto const R = m->encryption->getR();
989
9.50k
    if (R >= 6) {
990
9.50k
        setMinimumPDFVersion("1.7", 8);
991
9.50k
    } else if (R == 5) {
992
0
        setMinimumPDFVersion("1.7", 3);
993
0
    } else if (R == 4) {
994
0
        setMinimumPDFVersion(m->encrypt_use_aes ? "1.6" : "1.5");
995
0
    } else if (R == 3) {
996
0
        setMinimumPDFVersion("1.4");
997
0
    } else {
998
0
        setMinimumPDFVersion("1.3");
999
0
    }
1000
9.50k
}
1001
1002
void
1003
QPDFWriter::setDataKey(int objid)
1004
247k
{
1005
247k
    if (m->encryption) {
1006
247k
        m->cur_data_key = QPDF::compute_data_key(
1007
247k
            m->encryption_key,
1008
247k
            objid,
1009
247k
            0,
1010
247k
            m->encrypt_use_aes,
1011
247k
            m->encryption->getV(),
1012
247k
            m->encryption->getR());
1013
247k
    }
1014
247k
}
1015
1016
unsigned int
1017
QPDFWriter::bytesNeeded(long long n)
1018
6.74k
{
1019
6.74k
    unsigned int bytes = 0;
1020
16.6k
    while (n) {
1021
9.88k
        ++bytes;
1022
9.88k
        n >>= 8;
1023
9.88k
    }
1024
6.74k
    return bytes;
1025
6.74k
}
1026
1027
void
1028
QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes)
1029
483k
{
1030
483k
    if (bytes > sizeof(unsigned long long)) {
1031
0
        throw std::logic_error("QPDFWriter::writeBinary called with too many bytes");
1032
0
    }
1033
483k
    unsigned char data[sizeof(unsigned long long)];
1034
1.21M
    for (unsigned int i = 0; i < bytes; ++i) {
1035
732k
        data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff);
1036
732k
        val >>= 8;
1037
732k
    }
1038
483k
    m->pipeline->write(data, bytes);
1039
483k
}
1040
1041
QPDFWriter&
1042
QPDFWriter::write(std::string_view str)
1043
8.92M
{
1044
8.92M
    m->pipeline->write(str);
1045
8.92M
    return *this;
1046
8.92M
}
1047
1048
QPDFWriter&
1049
QPDFWriter::write(std::integral auto val)
1050
1.24M
{
1051
1.24M
    m->pipeline->write(std::to_string(val));
1052
1.24M
    return *this;
1053
1.24M
}
_ZN10QPDFWriter5writeITkNSt3__18integralEiEERS_T_
Line
Count
Source
1050
928k
{
1051
928k
    m->pipeline->write(std::to_string(val));
1052
928k
    return *this;
1053
928k
}
_ZN10QPDFWriter5writeITkNSt3__18integralExEERS_T_
Line
Count
Source
1050
240k
{
1051
240k
    m->pipeline->write(std::to_string(val));
1052
240k
    return *this;
1053
240k
}
_ZN10QPDFWriter5writeITkNSt3__18integralEmEERS_T_
Line
Count
Source
1050
69.2k
{
1051
69.2k
    m->pipeline->write(std::to_string(val));
1052
69.2k
    return *this;
1053
69.2k
}
_ZN10QPDFWriter5writeITkNSt3__18integralEjEERS_T_
Line
Count
Source
1050
6.74k
{
1051
6.74k
    m->pipeline->write(std::to_string(val));
1052
6.74k
    return *this;
1053
6.74k
}
1054
1055
QPDFWriter&
1056
QPDFWriter::write(size_t count, char c)
1057
24.9k
{
1058
24.9k
    m->pipeline->write(count, c);
1059
24.9k
    return *this;
1060
24.9k
}
1061
1062
QPDFWriter&
1063
QPDFWriter::write_name(std::string const& str)
1064
959k
{
1065
959k
    m->pipeline->write(Name::normalize(str));
1066
959k
    return *this;
1067
959k
}
1068
1069
QPDFWriter&
1070
QPDFWriter::write_string(std::string const& str, bool force_binary)
1071
111k
{
1072
111k
    m->pipeline->write(QPDF_String(str).unparse(force_binary));
1073
111k
    return *this;
1074
111k
}
1075
1076
template <typename... Args>
1077
QPDFWriter&
1078
QPDFWriter::write_qdf(Args&&... args)
1079
663k
{
1080
663k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
663k
    return *this;
1084
663k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1079
551k
{
1080
551k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
551k
    return *this;
1084
551k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [3]>(char const (&) [3])
Line
Count
Source
1079
63.5k
{
1080
63.5k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
63.5k
    return *this;
1084
63.5k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1079
31.7k
{
1080
31.7k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
31.7k
    return *this;
1084
31.7k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [11]>(char const (&) [11])
Line
Count
Source
1079
16.1k
{
1080
16.1k
    if (m->qdf_mode) {
1081
0
        m->pipeline->write(std::forward<Args>(args)...);
1082
0
    }
1083
16.1k
    return *this;
1084
16.1k
}
1085
1086
template <typename... Args>
1087
QPDFWriter&
1088
QPDFWriter::write_no_qdf(Args&&... args)
1089
238k
{
1090
238k
    if (!m->qdf_mode) {
1091
238k
        m->pipeline->write(std::forward<Args>(args)...);
1092
238k
    }
1093
238k
    return *this;
1094
238k
}
QPDFWriter& QPDFWriter::write_no_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1089
206k
{
1090
206k
    if (!m->qdf_mode) {
1091
206k
        m->pipeline->write(std::forward<Args>(args)...);
1092
206k
    }
1093
206k
    return *this;
1094
206k
}
QPDFWriter& QPDFWriter::write_no_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1089
31.7k
{
1090
31.7k
    if (!m->qdf_mode) {
1091
31.7k
        m->pipeline->write(std::forward<Args>(args)...);
1092
31.7k
    }
1093
31.7k
    return *this;
1094
31.7k
}
1095
1096
void
1097
QPDFWriter::adjustAESStreamLength(size_t& length)
1098
55.1k
{
1099
55.1k
    if (m->encryption && !m->cur_data_key.empty() && m->encrypt_use_aes) {
1100
        // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16.  It will
1101
        // also be prepended by 16 bits of random data.
1102
55.1k
        length += 32 - (length & 0xf);
1103
55.1k
    }
1104
55.1k
}
1105
1106
QPDFWriter&
1107
QPDFWriter::write_encrypted(std::string_view str)
1108
54.6k
{
1109
54.6k
    if (!(m->encryption && !m->cur_data_key.empty())) {
1110
0
        write(str);
1111
54.6k
    } else if (m->encrypt_use_aes) {
1112
54.6k
        write(pl::pipe<Pl_AES_PDF>(str, true, m->cur_data_key));
1113
54.6k
    } else {
1114
0
        write(pl::pipe<Pl_RC4>(str, m->cur_data_key));
1115
0
    }
1116
1117
54.6k
    return *this;
1118
54.6k
}
1119
1120
void
1121
QPDFWriter::computeDeterministicIDData()
1122
0
{
1123
0
    if (!m->id2.empty()) {
1124
        // Can't happen in the code
1125
0
        throw std::logic_error(
1126
0
            "Deterministic ID computation enabled after ID generation has already occurred.");
1127
0
    }
1128
0
    qpdf_assert_debug(m->deterministic_id_data.empty());
1129
0
    m->deterministic_id_data = m->pipeline_stack.hex_digest();
1130
0
}
1131
1132
int
1133
QPDFWriter::openObject(int objid)
1134
282k
{
1135
282k
    if (objid == 0) {
1136
0
        objid = m->next_objid++;
1137
0
    }
1138
282k
    m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount());
1139
282k
    write(objid).write(" 0 obj\n");
1140
282k
    return objid;
1141
282k
}
1142
1143
void
1144
QPDFWriter::closeObject(int objid)
1145
281k
{
1146
    // Write a newline before endobj as it makes the file easier to repair.
1147
281k
    write("\nendobj\n").write_qdf("\n");
1148
281k
    auto& new_obj = m->new_obj[objid];
1149
281k
    new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset();
1150
281k
}
1151
1152
void
1153
QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen og)
1154
140k
{
1155
140k
    int objid = og.getObj();
1156
140k
    if ((og.getGen() != 0) || (!m->object_stream_to_objects.contains(objid))) {
1157
        // This is not an object stream.
1158
137k
        return;
1159
137k
    }
1160
1161
    // Reserve numbers for the objects that belong to this object stream.
1162
53.0k
    for (auto const& iter: m->object_stream_to_objects[objid]) {
1163
53.0k
        m->obj[iter].renumber = m->next_objid++;
1164
53.0k
    }
1165
2.92k
}
1166
1167
void
1168
QPDFWriter::enqueueObject(QPDFObjectHandle object)
1169
139k
{
1170
139k
    if (object.isIndirect()) {
1171
        // This owner check can only be done for indirect objects. It is possible for a direct
1172
        // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle from
1173
        // one file was insert into another file without copying. Doing that is safe even if the
1174
        // original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from its owner.
1175
139k
        if (object.getOwningQPDF() != &(m->pdf)) {
1176
0
            QTC::TC("qpdf", "QPDFWriter foreign object");
1177
0
            throw std::logic_error(
1178
0
                "QPDFObjectHandle from different QPDF found while writing.  Use "
1179
0
                "QPDF::copyForeignObject to add objects from another file.");
1180
0
        }
1181
1182
139k
        if (m->qdf_mode && object.isStreamOfType("/XRef")) {
1183
            // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so
1184
            // will confuse fix-qdf, which expects to see only one XRef stream at the end of the
1185
            // file. This case can occur when creating a QDF from a file with object streams when
1186
            // preserving unreferenced objects since the old cross reference streams are not
1187
            // actually referenced by object number.
1188
0
            QTC::TC("qpdf", "QPDFWriter ignore XRef in qdf mode");
1189
0
            return;
1190
0
        }
1191
1192
139k
        QPDFObjGen og = object.getObjGen();
1193
139k
        auto& obj = m->obj[og];
1194
1195
139k
        if (obj.renumber == 0) {
1196
138k
            if (obj.object_stream > 0) {
1197
                // This is in an object stream.  Don't process it here.  Instead, enqueue the object
1198
                // stream.  Object streams always have generation 0.
1199
                // Detect loops by storing invalid object ID -1, which will get overwritten later.
1200
21
                obj.renumber = -1;
1201
21
                enqueueObject(m->pdf.getObject(obj.object_stream, 0));
1202
138k
            } else {
1203
138k
                m->object_queue.push_back(object);
1204
138k
                obj.renumber = m->next_objid++;
1205
1206
138k
                if ((og.getGen() == 0) && m->object_stream_to_objects.contains(og.getObj())) {
1207
                    // For linearized files, uncompressed objects go at end, and we take care of
1208
                    // assigning numbers to them elsewhere.
1209
2.86k
                    if (!m->linearized) {
1210
0
                        assignCompressedObjectNumbers(og);
1211
0
                    }
1212
135k
                } else if ((!m->direct_stream_lengths) && object.isStream()) {
1213
                    // reserve next object ID for length
1214
0
                    ++m->next_objid;
1215
0
                }
1216
138k
            }
1217
138k
        } else if (obj.renumber == -1) {
1218
            // This can happen if a specially constructed file indicates that an object stream is
1219
            // inside itself.
1220
0
        }
1221
139k
        return;
1222
139k
    } else if (!m->linearized) {
1223
0
        if (object.isArray()) {
1224
0
            for (auto& item: object.as_array()) {
1225
0
                enqueueObject(item);
1226
0
            }
1227
0
        } else if (auto d = object.as_dictionary()) {
1228
0
            for (auto const& item: d) {
1229
0
                if (!item.second.null()) {
1230
0
                    enqueueObject(item.second);
1231
0
                }
1232
0
            }
1233
0
        }
1234
182
    } else {
1235
        // ignore
1236
182
    }
1237
139k
}
1238
1239
void
1240
QPDFWriter::unparseChild(QPDFObjectHandle const& child, size_t level, int flags)
1241
2.17M
{
1242
2.17M
    if (!m->linearized) {
1243
0
        enqueueObject(child);
1244
0
    }
1245
2.17M
    if (child.isIndirect()) {
1246
467k
        write(m->obj[child].renumber).write(" 0 R");
1247
1.70M
    } else {
1248
1.70M
        unparseObject(child, level, flags);
1249
1.70M
    }
1250
2.17M
}
1251
1252
void
1253
QPDFWriter::writeTrailer(
1254
    trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass)
1255
31.7k
{
1256
31.7k
    QPDFObjectHandle trailer = getTrimmedTrailer();
1257
31.7k
    if (xref_stream) {
1258
2.24k
        m->cur_data_key.clear();
1259
29.5k
    } else {
1260
29.5k
        write("trailer <<");
1261
29.5k
    }
1262
31.7k
    write_qdf("\n");
1263
31.7k
    if (which == t_lin_second) {
1264
15.6k
        write(" /Size ").write(size);
1265
16.1k
    } else {
1266
47.3k
        for (auto const& [key, value]: trailer.as_dictionary()) {
1267
47.3k
            if (value.null()) {
1268
13.0k
                continue;
1269
13.0k
            }
1270
34.2k
            write_qdf("  ").write_no_qdf(" ").write_name(key).write(" ");
1271
34.2k
            if (key == "/Size") {
1272
4.76k
                write(size);
1273
4.76k
                if (which == t_lin_first) {
1274
4.76k
                    write(" /Prev ");
1275
4.76k
                    qpdf_offset_t pos = m->pipeline->getCount();
1276
4.76k
                    write(prev).write(QIntC::to_size(pos - m->pipeline->getCount() + 21), ' ');
1277
4.76k
                }
1278
29.5k
            } else {
1279
29.5k
                unparseChild(value, 1, 0);
1280
29.5k
            }
1281
34.2k
            write_qdf("\n");
1282
34.2k
        }
1283
16.1k
    }
1284
1285
    // Write ID
1286
31.7k
    write_qdf(" ").write(" /ID [");
1287
31.7k
    if (linearization_pass == 1) {
1288
16.1k
        std::string original_id1 = getOriginalID1();
1289
16.1k
        if (original_id1.empty()) {
1290
14.3k
            write("<00000000000000000000000000000000>");
1291
14.3k
        } else {
1292
            // Write a string of zeroes equal in length to the representation of the original ID.
1293
            // While writing the original ID would have the same number of bytes, it would cause a
1294
            // change to the deterministic ID generated by older versions of the software that
1295
            // hard-coded the length of the ID to 16 bytes.
1296
1.76k
            size_t len = QPDF_String(original_id1).unparse(true).length() - 2;
1297
1.76k
            write("<").write(len, '0').write(">");
1298
1.76k
        }
1299
16.1k
        write("<00000000000000000000000000000000>");
1300
16.1k
    } else {
1301
15.6k
        if (linearization_pass == 0 && m->deterministic_id) {
1302
0
            computeDeterministicIDData();
1303
0
        }
1304
15.6k
        generateID(m->encryption.get());
1305
15.6k
        write_string(m->id1, true).write_string(m->id2, true);
1306
15.6k
    }
1307
31.7k
    write("]");
1308
1309
31.7k
    if (which != t_lin_second) {
1310
        // Write reference to encryption dictionary
1311
16.1k
        if (m->encryption) {
1312
16.1k
            write(" /Encrypt ").write(m->encryption_dict_objid).write(" 0 R");
1313
16.1k
        }
1314
16.1k
    }
1315
1316
31.7k
    write_qdf("\n>>").write_no_qdf(" >>");
1317
31.7k
}
1318
1319
bool
1320
QPDFWriter::willFilterStream(
1321
    QPDFObjectHandle stream,
1322
    bool& compress_stream,  // out only
1323
    bool& is_root_metadata, // out only
1324
    std::string* stream_data)
1325
66.4k
{
1326
66.4k
    compress_stream = false;
1327
66.4k
    is_root_metadata = false;
1328
1329
66.4k
    QPDFObjGen old_og = stream.getObjGen();
1330
66.4k
    QPDFObjectHandle stream_dict = stream.getDict();
1331
1332
66.4k
    if (stream.isRootMetadata()) {
1333
600
        is_root_metadata = true;
1334
600
    }
1335
66.4k
    bool filter = stream.isDataModified() || m->compress_streams || m->stream_decode_level;
1336
66.4k
    bool filter_on_write = stream.getFilterOnWrite();
1337
66.4k
    if (!filter_on_write) {
1338
14.8k
        QTC::TC("qpdf", "QPDFWriter getFilterOnWrite false");
1339
14.8k
        filter = false;
1340
14.8k
    }
1341
66.4k
    if (filter_on_write && m->compress_streams) {
1342
        // Don't filter if the stream is already compressed with FlateDecode. This way we don't make
1343
        // it worse if the original file used a better Flate algorithm, and we don't spend time and
1344
        // CPU cycles uncompressing and recompressing stuff. This can be overridden with
1345
        // setRecompressFlate(true).
1346
51.6k
        QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
1347
51.6k
        if (!m->recompress_flate && !stream.isDataModified() && filter_obj.isName() &&
1348
51.6k
            (filter_obj.getName() == "/FlateDecode" || filter_obj.getName() == "/Fl")) {
1349
12.5k
            QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode");
1350
12.5k
            filter = false;
1351
12.5k
        }
1352
51.6k
    }
1353
66.4k
    bool normalize = false;
1354
66.4k
    bool uncompress = false;
1355
66.4k
    if (filter_on_write && is_root_metadata &&
1356
66.4k
        (!m->encryption || !m->encryption->getEncryptMetadata())) {
1357
0
        QTC::TC("qpdf", "QPDFWriter not compressing metadata");
1358
0
        filter = true;
1359
0
        compress_stream = false;
1360
0
        uncompress = true;
1361
66.4k
    } else if (filter_on_write && m->normalize_content && m->normalized_streams.contains(old_og)) {
1362
0
        normalize = true;
1363
0
        filter = true;
1364
66.4k
    } else if (filter_on_write && filter && m->compress_streams) {
1365
39.1k
        compress_stream = true;
1366
39.1k
        QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");
1367
39.1k
    }
1368
1369
    // Disable compression for empty streams to improve compatibility
1370
66.4k
    if (stream_dict.getKey("/Length").isInteger() &&
1371
66.4k
        stream_dict.getKey("/Length").getIntValue() == 0) {
1372
2.57k
        filter = true;
1373
2.57k
        compress_stream = false;
1374
2.57k
    }
1375
1376
66.4k
    bool filtered = false;
1377
75.9k
    for (bool first_attempt: {true, false}) {
1378
75.9k
        auto pp_stream_data = stream_data ? m->pipeline_stack.activate(*stream_data)
1379
75.9k
                                          : m->pipeline_stack.activate(true);
1380
1381
75.9k
        try {
1382
75.9k
            filtered = stream.pipeStreamData(
1383
75.9k
                m->pipeline,
1384
75.9k
                !filter ? 0
1385
75.9k
                        : ((normalize ? qpdf_ef_normalize : 0) |
1386
40.7k
                           (compress_stream ? qpdf_ef_compress : 0)),
1387
75.9k
                !filter ? qpdf_dl_none : (uncompress ? qpdf_dl_all : m->stream_decode_level),
1388
75.9k
                false,
1389
75.9k
                first_attempt);
1390
75.9k
            if (filter && !filtered) {
1391
                // Try again
1392
9.41k
                filter = false;
1393
9.41k
                stream.setFilterOnWrite(false);
1394
66.5k
            } else {
1395
66.5k
                break;
1396
66.5k
            }
1397
75.9k
        } catch (std::runtime_error& e) {
1398
83
            if (filter && first_attempt) {
1399
71
                stream.warn("error while getting stream data: "s + e.what());
1400
71
                stream.warn("qpdf will attempt to write the damaged stream unchanged");
1401
71
                filter = false;
1402
71
                stream.setFilterOnWrite(false);
1403
71
                continue;
1404
71
            }
1405
12
            throw std::runtime_error(
1406
12
                "error while getting stream data for " + stream.unparse() + ": " + e.what());
1407
83
        }
1408
9.41k
        if (stream_data) {
1409
1.48k
            stream_data->clear();
1410
1.48k
        }
1411
9.41k
    }
1412
66.5k
    if (!filtered) {
1413
34.9k
        compress_stream = false;
1414
34.9k
    }
1415
66.5k
    return filtered;
1416
66.4k
}
1417
1418
void
1419
QPDFWriter::unparseObject(
1420
    QPDFObjectHandle object, size_t level, int flags, size_t stream_length, bool compress)
1421
2.07M
{
1422
2.07M
    QPDFObjGen old_og = object.getObjGen();
1423
2.07M
    int child_flags = flags & ~f_stream;
1424
    // For non-qdf, "indent" and "indent_large" are a single space between tokens. For qdf, they
1425
    // include the preceding newline.
1426
2.07M
    std::string indent_large = " ";
1427
2.07M
    if (m->qdf_mode) {
1428
0
        indent_large.append(2 * (level + 1), ' ');
1429
0
        indent_large[0] = '\n';
1430
0
    }
1431
2.07M
    std::string_view indent{indent_large.data(), m->qdf_mode ? indent_large.size() - 2 : 1};
1432
1433
2.07M
    if (auto const tc = object.getTypeCode(); tc == ::ot_array) {
1434
        // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the
1435
        // [ in the /H key of the linearization parameter dictionary.  We'll do this unconditionally
1436
        // for all arrays because it looks nicer and doesn't make the files that much bigger.
1437
132k
        write("[");
1438
1.21M
        for (auto const& item: object.as_array()) {
1439
1.21M
            write(indent_large);
1440
1.21M
            unparseChild(item, level + 1, child_flags);
1441
1.21M
        }
1442
132k
        write(indent).write("]");
1443
1.93M
    } else if (tc == ::ot_dictionary) {
1444
        // Handle special cases for specific dictionaries.
1445
1446
310k
        if (old_og == m->root_og) {
1447
            // Extensions dictionaries.
1448
1449
            // We have one of several cases:
1450
            //
1451
            // * We need ADBE
1452
            //    - We already have Extensions
1453
            //       - If it has the right ADBE, preserve it
1454
            //       - Otherwise, replace ADBE
1455
            //    - We don't have Extensions: create one from scratch
1456
            // * We don't want ADBE
1457
            //    - We already have Extensions
1458
            //       - If it only has ADBE, remove it
1459
            //       - If it has other things, keep those and remove ADBE
1460
            //    - We have no extensions: no action required
1461
            //
1462
            // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE
1463
            // dictionary, so we can modify in place.
1464
1465
16.1k
            auto extensions = object.getKey("/Extensions");
1466
16.1k
            const bool has_extensions = extensions.isDictionary();
1467
16.1k
            const bool need_extensions_adbe = m->final_extension_level > 0;
1468
1469
16.1k
            if (has_extensions || need_extensions_adbe) {
1470
                // Make a shallow copy of this object so we can modify it safely without affecting
1471
                // the original. This code has logic to skip certain keys in agreement with
1472
                // prepareFileForWrite and with skip_stream_parameters so that replacing them
1473
                // doesn't leave unreferenced objects in the output. We can use unsafeShallowCopy
1474
                // here because all we are doing is removing or replacing top-level keys.
1475
15.6k
                object = object.unsafeShallowCopy();
1476
15.6k
                if (!has_extensions) {
1477
15.1k
                    extensions = QPDFObjectHandle();
1478
15.1k
                }
1479
1480
15.6k
                const bool have_extensions_adbe = extensions && extensions.hasKey("/ADBE");
1481
15.6k
                const bool have_extensions_other =
1482
15.6k
                    extensions && extensions.getKeys().size() > (have_extensions_adbe ? 1u : 0u);
1483
1484
15.6k
                if (need_extensions_adbe) {
1485
15.5k
                    if (!(have_extensions_other || have_extensions_adbe)) {
1486
                        // We need Extensions and don't have it.  Create it here.
1487
15.1k
                        QTC::TC("qpdf", "QPDFWriter create Extensions", m->qdf_mode ? 0 : 1);
1488
15.1k
                        extensions = object.replaceKeyAndGetNew(
1489
15.1k
                            "/Extensions", QPDFObjectHandle::newDictionary());
1490
15.1k
                    }
1491
15.5k
                } else if (!have_extensions_other) {
1492
                    // We have Extensions dictionary and don't want one.
1493
6
                    if (have_extensions_adbe) {
1494
3
                        QTC::TC("qpdf", "QPDFWriter remove existing Extensions");
1495
3
                        object.removeKey("/Extensions");
1496
3
                        extensions = QPDFObjectHandle(); // uninitialized
1497
3
                    }
1498
6
                }
1499
1500
15.6k
                if (extensions) {
1501
15.6k
                    QTC::TC("qpdf", "QPDFWriter preserve Extensions");
1502
15.6k
                    QPDFObjectHandle adbe = extensions.getKey("/ADBE");
1503
15.6k
                    if (adbe.isDictionary() &&
1504
15.6k
                        adbe.getKey("/BaseVersion").isNameAndEquals("/" + m->final_pdf_version) &&
1505
15.6k
                        adbe.getKey("/ExtensionLevel").isInteger() &&
1506
15.6k
                        (adbe.getKey("/ExtensionLevel").getIntValue() ==
1507
238
                         m->final_extension_level)) {
1508
236
                        QTC::TC("qpdf", "QPDFWriter preserve ADBE");
1509
15.3k
                    } else {
1510
15.3k
                        if (need_extensions_adbe) {
1511
15.3k
                            extensions.replaceKey(
1512
15.3k
                                "/ADBE",
1513
15.3k
                                QPDFObjectHandle::parse(
1514
15.3k
                                    "<< /BaseVersion /" + m->final_pdf_version +
1515
15.3k
                                    " /ExtensionLevel " + std::to_string(m->final_extension_level) +
1516
15.3k
                                    " >>"));
1517
15.3k
                        } else {
1518
33
                            QTC::TC("qpdf", "QPDFWriter remove ADBE");
1519
33
                            extensions.removeKey("/ADBE");
1520
33
                        }
1521
15.3k
                    }
1522
15.6k
                }
1523
15.6k
            }
1524
16.1k
        }
1525
1526
        // Stream dictionaries.
1527
1528
310k
        if (flags & f_stream) {
1529
            // Suppress /Length since we will write it manually
1530
1531
            // Make a shallow copy of this object so we can modify it safely without affecting the
1532
            // original. This code has logic to skip certain keys in agreement with
1533
            // prepareFileForWrite and with skip_stream_parameters so that replacing them doesn't
1534
            // leave unreferenced objects in the output. We can use unsafeShallowCopy here because
1535
            // all we are doing is removing or replacing top-level keys.
1536
42.9k
            object = object.unsafeShallowCopy();
1537
1538
42.9k
            object.removeKey("/Length");
1539
1540
            // If /DecodeParms is an empty list, remove it.
1541
42.9k
            if (object.getKey("/DecodeParms").empty()) {
1542
40.8k
                object.removeKey("/DecodeParms");
1543
40.8k
            }
1544
1545
42.9k
            if (flags & f_filtered) {
1546
                // We will supply our own filter and decode parameters.
1547
20.4k
                object.removeKey("/Filter");
1548
20.4k
                object.removeKey("/DecodeParms");
1549
22.4k
            } else {
1550
                // Make sure, no matter what else we have, that we don't have /Crypt in the output
1551
                // filters.
1552
22.4k
                QPDFObjectHandle filter = object.getKey("/Filter");
1553
22.4k
                QPDFObjectHandle decode_parms = object.getKey("/DecodeParms");
1554
22.4k
                if (filter.isOrHasName("/Crypt")) {
1555
366
                    if (filter.isName()) {
1556
24
                        object.removeKey("/Filter");
1557
24
                        object.removeKey("/DecodeParms");
1558
342
                    } else {
1559
342
                        int idx = 0;
1560
25.5k
                        for (auto const& item: filter.as_array()) {
1561
25.5k
                            if (item.isNameAndEquals("/Crypt")) {
1562
                                // If filter is an array, then the code in QPDF_Stream has already
1563
                                // verified that DecodeParms and Filters are arrays of the same
1564
                                // length, but if they weren't for some reason, eraseItem does type
1565
                                // and bounds checking. Fuzzing tells us that this can actually
1566
                                // happen.
1567
342
                                filter.eraseItem(idx);
1568
342
                                decode_parms.eraseItem(idx);
1569
342
                                break;
1570
342
                            }
1571
25.1k
                            ++idx;
1572
25.1k
                        }
1573
342
                    }
1574
366
                }
1575
22.4k
            }
1576
42.9k
        }
1577
1578
310k
        write("<<");
1579
1580
1.09M
        for (auto const& [key, value]: object.as_dictionary()) {
1581
1.09M
            if (!value.null()) {
1582
924k
                write(indent_large).write_name(key).write(" ");
1583
924k
                if (key == "/Contents" && object.isDictionaryOfType("/Sig") &&
1584
924k
                    object.hasKey("/ByteRange")) {
1585
19
                    QTC::TC("qpdf", "QPDFWriter no encryption sig contents");
1586
19
                    unparseChild(value, level + 1, child_flags | f_hex_string | f_no_encryption);
1587
924k
                } else {
1588
924k
                    unparseChild(value, level + 1, child_flags);
1589
924k
                }
1590
924k
            }
1591
1.09M
        }
1592
1593
310k
        if (flags & f_stream) {
1594
42.6k
            write(indent_large).write("/Length ");
1595
1596
42.6k
            if (m->direct_stream_lengths) {
1597
42.6k
                write(stream_length);
1598
42.6k
            } else {
1599
0
                write(m->cur_stream_length_id).write(" 0 R");
1600
0
            }
1601
42.6k
            if (compress && (flags & f_filtered)) {
1602
20.2k
                write(indent_large).write("/Filter /FlateDecode");
1603
20.2k
            }
1604
42.6k
        }
1605
1606
310k
        write(indent).write(">>");
1607
1.62M
    } else if (tc == ::ot_stream) {
1608
        // Write stream data to a buffer.
1609
42.9k
        if (!m->direct_stream_lengths) {
1610
0
            m->cur_stream_length_id = m->obj[old_og].renumber + 1;
1611
0
        }
1612
1613
42.9k
        flags |= f_stream;
1614
42.9k
        bool compress_stream = false;
1615
42.9k
        bool is_metadata = false;
1616
42.9k
        std::string stream_data;
1617
42.9k
        if (willFilterStream(object, compress_stream, is_metadata, &stream_data)) {
1618
20.4k
            flags |= f_filtered;
1619
20.4k
        }
1620
42.9k
        QPDFObjectHandle stream_dict = object.getDict();
1621
1622
42.9k
        m->cur_stream_length = stream_data.size();
1623
42.9k
        if (is_metadata && m->encryption && !m->encryption->getEncryptMetadata()) {
1624
            // Don't encrypt stream data for the metadata stream
1625
0
            m->cur_data_key.clear();
1626
0
        }
1627
42.9k
        adjustAESStreamLength(m->cur_stream_length);
1628
42.9k
        unparseObject(stream_dict, 0, flags, m->cur_stream_length, compress_stream);
1629
42.9k
        char last_char = stream_data.empty() ? '\0' : stream_data.back();
1630
42.9k
        write("\nstream\n").write_encrypted(stream_data);
1631
42.9k
        m->added_newline = m->newline_before_endstream || (m->qdf_mode && last_char != '\n');
1632
42.9k
        write(m->added_newline ? "\nendstream" : "endstream");
1633
1.58M
    } else if (tc == ::ot_string) {
1634
45.0k
        std::string val;
1635
45.0k
        if (m->encryption && !(flags & f_in_ostream) && !(flags & f_no_encryption) &&
1636
45.0k
            !m->cur_data_key.empty()) {
1637
33.8k
            val = object.getStringValue();
1638
33.8k
            if (m->encrypt_use_aes) {
1639
33.8k
                Pl_Buffer bufpl("encrypted string");
1640
33.8k
                Pl_AES_PDF pl("aes encrypt string", &bufpl, true, m->cur_data_key);
1641
33.8k
                pl.writeString(val);
1642
33.8k
                pl.finish();
1643
33.8k
                val = QPDF_String(bufpl.getString()).unparse(true);
1644
33.8k
            } else {
1645
0
                auto tmp_ph = QUtil::make_unique_cstr(val);
1646
0
                char* tmp = tmp_ph.get();
1647
0
                size_t vlen = val.length();
1648
0
                RC4 rc4(
1649
0
                    QUtil::unsigned_char_pointer(m->cur_data_key),
1650
0
                    QIntC::to_int(m->cur_data_key.length()));
1651
0
                auto data = QUtil::unsigned_char_pointer(tmp);
1652
0
                rc4.process(data, vlen, data);
1653
0
                val = QPDF_String(std::string(tmp, vlen)).unparse();
1654
0
            }
1655
33.8k
        } else if (flags & f_hex_string) {
1656
19
            val = QPDF_String(object.getStringValue()).unparse(true);
1657
11.1k
        } else {
1658
11.1k
            val = object.unparseResolved();
1659
11.1k
        }
1660
45.0k
        write(val);
1661
1.53M
    } else {
1662
1.53M
        write(object.unparseResolved());
1663
1.53M
    }
1664
2.07M
}
1665
1666
void
1667
QPDFWriter::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj)
1668
8.75k
{
1669
8.75k
    qpdf_assert_debug(first_obj > 0);
1670
8.75k
    bool is_first = true;
1671
8.75k
    auto id = std::to_string(first_obj) + ' ';
1672
176k
    for (auto& offset: offsets) {
1673
176k
        if (is_first) {
1674
8.75k
            is_first = false;
1675
168k
        } else {
1676
168k
            write_qdf("\n").write_no_qdf(" ");
1677
168k
        }
1678
176k
        write(id);
1679
176k
        util::increment(id, 1);
1680
176k
        write(offset);
1681
176k
    }
1682
8.75k
    write("\n");
1683
8.75k
}
1684
1685
void
1686
QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1687
4.38k
{
1688
    // Note: object might be null if this is a place-holder for an object stream that we are
1689
    // generating from scratch.
1690
1691
4.38k
    QPDFObjGen old_og = object.getObjGen();
1692
4.38k
    qpdf_assert_debug(old_og.getGen() == 0);
1693
4.38k
    int old_id = old_og.getObj();
1694
4.38k
    int new_stream_id = m->obj[old_og].renumber;
1695
1696
4.38k
    std::vector<qpdf_offset_t> offsets;
1697
4.38k
    qpdf_offset_t first = 0;
1698
1699
    // Generate stream itself.  We have to do this in two passes so we can calculate offsets in the
1700
    // first pass.
1701
4.38k
    std::string stream_buffer_pass1;
1702
4.38k
    std::string stream_buffer_pass2;
1703
4.38k
    int first_obj = -1;
1704
4.38k
    const bool compressed = m->compress_streams && !m->qdf_mode;
1705
4.38k
    {
1706
        // Pass 1
1707
4.38k
        auto pp_ostream_pass1 = m->pipeline_stack.activate(stream_buffer_pass1);
1708
1709
4.38k
        int count = -1;
1710
88.4k
        for (auto const& obj: m->object_stream_to_objects[old_id]) {
1711
88.4k
            ++count;
1712
88.4k
            int new_obj = m->obj[obj].renumber;
1713
88.4k
            if (first_obj == -1) {
1714
4.38k
                first_obj = new_obj;
1715
4.38k
            }
1716
88.4k
            if (m->qdf_mode) {
1717
0
                write("%% Object stream: object ").write(new_obj).write(", index ").write(count);
1718
0
                if (!m->suppress_original_object_ids) {
1719
0
                    write("; original object ID: ").write(obj.getObj());
1720
                    // For compatibility, only write the generation if non-zero.  While object
1721
                    // streams only allow objects with generation 0, if we are generating object
1722
                    // streams, the old object could have a non-zero generation.
1723
0
                    if (obj.getGen() != 0) {
1724
0
                        QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
1725
0
                        write(" ").write(obj.getGen());
1726
0
                    }
1727
0
                }
1728
0
                write("\n");
1729
0
            }
1730
1731
88.4k
            offsets.push_back(m->pipeline->getCount());
1732
            // To avoid double-counting objects being written in object streams for progress
1733
            // reporting, decrement in pass 1.
1734
88.4k
            indicateProgress(true, false);
1735
1736
88.4k
            QPDFObjectHandle obj_to_write = m->pdf.getObject(obj);
1737
88.4k
            if (obj_to_write.isStream()) {
1738
                // This condition occurred in a fuzz input. Ideally we should block it at parse
1739
                // time, but it's not clear to me how to construct a case for this.
1740
0
                obj_to_write.warn("stream found inside object stream; treating as null");
1741
0
                obj_to_write = QPDFObjectHandle::newNull();
1742
0
            }
1743
88.4k
            writeObject(obj_to_write, count);
1744
1745
88.4k
            m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count);
1746
88.4k
        }
1747
4.38k
    }
1748
4.38k
    {
1749
        // Adjust offsets to skip over comment before first object
1750
4.38k
        first = offsets.at(0);
1751
88.4k
        for (auto& iter: offsets) {
1752
88.4k
            iter -= first;
1753
88.4k
        }
1754
1755
        // Take one pass at writing pairs of numbers so we can get their size information
1756
4.38k
        {
1757
4.38k
            auto pp_discard = m->pipeline_stack.activate(true);
1758
4.38k
            writeObjectStreamOffsets(offsets, first_obj);
1759
4.38k
            first += m->pipeline->getCount();
1760
4.38k
        }
1761
1762
        // Set up a stream to write the stream data into a buffer.
1763
4.38k
        auto pp_ostream = m->pipeline_stack.activate(stream_buffer_pass2);
1764
1765
4.38k
        writeObjectStreamOffsets(offsets, first_obj);
1766
4.38k
        write(stream_buffer_pass1);
1767
4.38k
        stream_buffer_pass1.clear();
1768
4.38k
        stream_buffer_pass1.shrink_to_fit();
1769
4.38k
        if (compressed) {
1770
4.37k
            stream_buffer_pass2 = pl::pipe<Pl_Flate>(stream_buffer_pass2, Pl_Flate::a_deflate);
1771
4.37k
        }
1772
4.38k
    }
1773
1774
    // Write the object
1775
4.38k
    openObject(new_stream_id);
1776
4.38k
    setDataKey(new_stream_id);
1777
4.38k
    write("<<").write_qdf("\n ").write(" /Type /ObjStm").write_qdf("\n ");
1778
4.38k
    size_t length = stream_buffer_pass2.size();
1779
4.38k
    adjustAESStreamLength(length);
1780
4.38k
    write(" /Length ").write(length).write_qdf("\n ");
1781
4.38k
    if (compressed) {
1782
4.37k
        write(" /Filter /FlateDecode");
1783
4.37k
    }
1784
4.38k
    write(" /N ").write(offsets.size()).write_qdf("\n ").write(" /First ").write(first);
1785
4.38k
    if (!object.isNull()) {
1786
        // If the original object has an /Extends key, preserve it.
1787
1.70k
        QPDFObjectHandle dict = object.getDict();
1788
1.70k
        QPDFObjectHandle extends = dict.getKey("/Extends");
1789
1.70k
        if (extends.isIndirect()) {
1790
503
            QTC::TC("qpdf", "QPDFWriter copy Extends");
1791
503
            write_qdf("\n ").write(" /Extends ");
1792
503
            unparseChild(extends, 1, f_in_ostream);
1793
503
        }
1794
1.70k
    }
1795
4.38k
    write_qdf("\n").write_no_qdf(" ").write(">>\nstream\n").write_encrypted(stream_buffer_pass2);
1796
4.38k
    if (m->encryption) {
1797
4.21k
        QTC::TC("qpdf", "QPDFWriter encrypt object stream");
1798
4.21k
    }
1799
4.38k
    write(m->newline_before_endstream ? "\nendstream" : "endstream");
1800
4.38k
    m->cur_data_key.clear();
1801
4.38k
    closeObject(new_stream_id);
1802
4.38k
}
1803
1804
void
1805
QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
1806
328k
{
1807
328k
    QPDFObjGen old_og = object.getObjGen();
1808
1809
328k
    if (object_stream_index == -1 && old_og.getGen() == 0 &&
1810
328k
        m->object_stream_to_objects.contains(old_og.getObj())) {
1811
4.38k
        writeObjectStream(object);
1812
4.38k
        return;
1813
4.38k
    }
1814
1815
324k
    indicateProgress(false, false);
1816
324k
    auto new_id = m->obj[old_og].renumber;
1817
324k
    if (m->qdf_mode) {
1818
0
        if (m->page_object_to_seq.contains(old_og)) {
1819
0
            write("%% Page ").write(m->page_object_to_seq[old_og]).write("\n");
1820
0
        }
1821
0
        if (m->contents_to_page_seq.contains(old_og)) {
1822
0
            write("%% Contents for page ").write(m->contents_to_page_seq[old_og]).write("\n");
1823
0
        }
1824
0
    }
1825
324k
    if (object_stream_index == -1) {
1826
235k
        if (m->qdf_mode && (!m->suppress_original_object_ids)) {
1827
0
            write("%% Original object ID: ").write(object.getObjGen().unparse(' ')).write("\n");
1828
0
        }
1829
235k
        openObject(new_id);
1830
235k
        setDataKey(new_id);
1831
235k
        unparseObject(object, 0, 0);
1832
235k
        m->cur_data_key.clear();
1833
235k
        closeObject(new_id);
1834
235k
    } else {
1835
88.4k
        unparseObject(object, 0, f_in_ostream);
1836
88.4k
        write("\n");
1837
88.4k
    }
1838
1839
324k
    if (!m->direct_stream_lengths && object.isStream()) {
1840
0
        if (m->qdf_mode) {
1841
0
            if (m->added_newline) {
1842
0
                write("%QDF: ignore_newline\n");
1843
0
            }
1844
0
        }
1845
0
        openObject(new_id + 1);
1846
0
        write(m->cur_stream_length);
1847
0
        closeObject(new_id + 1);
1848
0
    }
1849
324k
}
1850
1851
std::string
1852
QPDFWriter::getOriginalID1()
1853
25.6k
{
1854
25.6k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1855
25.6k
    if (trailer.hasKey("/ID")) {
1856
3.05k
        return trailer.getKey("/ID").getArrayItem(0).getStringValue();
1857
22.6k
    } else {
1858
22.6k
        return "";
1859
22.6k
    }
1860
25.6k
}
1861
1862
void
1863
QPDFWriter::generateID(bool encrypted)
1864
25.1k
{
1865
    // Generate the ID lazily so that we can handle the user's preference to use static or
1866
    // deterministic ID generation.
1867
1868
25.1k
    if (!m->id2.empty()) {
1869
15.6k
        return;
1870
15.6k
    }
1871
1872
9.51k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1873
1874
9.51k
    std::string result;
1875
1876
9.51k
    if (m->static_id) {
1877
        // For test suite use only...
1878
9.51k
        static unsigned char tmp[] = {
1879
9.51k
            0x31,
1880
9.51k
            0x41,
1881
9.51k
            0x59,
1882
9.51k
            0x26,
1883
9.51k
            0x53,
1884
9.51k
            0x58,
1885
9.51k
            0x97,
1886
9.51k
            0x93,
1887
9.51k
            0x23,
1888
9.51k
            0x84,
1889
9.51k
            0x62,
1890
9.51k
            0x64,
1891
9.51k
            0x33,
1892
9.51k
            0x83,
1893
9.51k
            0x27,
1894
9.51k
            0x95,
1895
9.51k
            0x00};
1896
9.51k
        result = reinterpret_cast<char*>(tmp);
1897
9.51k
    } else {
1898
        // The PDF specification has guidelines for creating IDs, but it states clearly that the
1899
        // only thing that's really important is that it is very likely to be unique.  We can't
1900
        // really follow the guidelines in the spec exactly because we haven't written the file yet.
1901
        // This scheme should be fine though.  The deterministic ID case uses a digest of a
1902
        // sufficient portion of the file's contents such no two non-matching files would match in
1903
        // the subsets used for this computation.  Note that we explicitly omit the filename from
1904
        // the digest calculation for deterministic ID so that the same file converted with qpdf, in
1905
        // that case, would have the same ID regardless of the output file's name.
1906
1907
0
        std::string seed;
1908
0
        if (m->deterministic_id) {
1909
0
            if (encrypted) {
1910
0
                throw std::runtime_error(
1911
0
                    "QPDFWriter: unable to generated a deterministic ID because the file to be "
1912
0
                    "written is encrypted (even though the file may not require a password)");
1913
0
            }
1914
0
            if (m->deterministic_id_data.empty()) {
1915
0
                throw std::logic_error(
1916
0
                    "INTERNAL ERROR: QPDFWriter::generateID has no data for deterministic ID");
1917
0
            }
1918
0
            seed += m->deterministic_id_data;
1919
0
        } else {
1920
0
            seed += std::to_string(QUtil::get_current_time());
1921
0
            seed += m->filename;
1922
0
            seed += " ";
1923
0
        }
1924
0
        seed += " QPDF ";
1925
0
        if (trailer.hasKey("/Info")) {
1926
0
            for (auto const& item: trailer.getKey("/Info").as_dictionary()) {
1927
0
                if (item.second.isString()) {
1928
0
                    seed += " ";
1929
0
                    seed += item.second.getStringValue();
1930
0
                }
1931
0
            }
1932
0
        }
1933
1934
0
        MD5 m;
1935
0
        m.encodeString(seed.c_str());
1936
0
        MD5::Digest digest;
1937
0
        m.digest(digest);
1938
0
        result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest));
1939
0
    }
1940
1941
    // If /ID already exists, follow the spec: use the original first word and generate a new second
1942
    // word.  Otherwise, we'll use the generated ID for both.
1943
1944
9.51k
    m->id2 = result;
1945
    // Note: keep /ID from old file even if --static-id was given.
1946
9.51k
    m->id1 = getOriginalID1();
1947
9.51k
    if (m->id1.empty()) {
1948
8.32k
        m->id1 = m->id2;
1949
8.32k
    }
1950
9.51k
}
1951
1952
void
1953
QPDFWriter::initializeSpecialStreams()
1954
0
{
1955
    // Mark all page content streams in case we are filtering or normalizing.
1956
0
    std::vector<QPDFObjectHandle> pages = m->pdf.getAllPages();
1957
0
    int num = 0;
1958
0
    for (auto& page: pages) {
1959
0
        m->page_object_to_seq[page.getObjGen()] = ++num;
1960
0
        QPDFObjectHandle contents = page.getKey("/Contents");
1961
0
        std::vector<QPDFObjGen> contents_objects;
1962
0
        if (contents.isArray()) {
1963
0
            int n = static_cast<int>(contents.size());
1964
0
            for (int i = 0; i < n; ++i) {
1965
0
                contents_objects.push_back(contents.getArrayItem(i).getObjGen());
1966
0
            }
1967
0
        } else if (contents.isStream()) {
1968
0
            contents_objects.push_back(contents.getObjGen());
1969
0
        }
1970
1971
0
        for (auto const& c: contents_objects) {
1972
0
            m->contents_to_page_seq[c] = num;
1973
0
            m->normalized_streams.insert(c);
1974
0
        }
1975
0
    }
1976
0
}
1977
1978
void
1979
QPDFWriter::preserveObjectStreams()
1980
9.50k
{
1981
9.50k
    auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
1982
    // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
1983
    // streams out of old objects that have generation numbers greater than zero. However in an
1984
    // existing PDF, all object stream objects and all objects in them must have generation 0
1985
    // because the PDF spec does not provide any way to do otherwise. This code filters out objects
1986
    // that are not allowed to be in object streams. In addition to removing objects that were
1987
    // erroneously included in object streams in the source PDF, it also prevents unreferenced
1988
    // objects from being included.
1989
9.50k
    auto end = xref.cend();
1990
9.50k
    m->obj.streams_empty = true;
1991
9.50k
    if (m->preserve_unreferenced_objects) {
1992
0
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
1993
0
            if (iter->second.getType() == 2) {
1994
                // Pdf contains object streams.
1995
0
                QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
1996
0
                m->obj.streams_empty = false;
1997
0
                m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
1998
0
            }
1999
0
        }
2000
9.50k
    } else {
2001
        // Start by scanning for first compressed object in case we don't have any object streams to
2002
        // process.
2003
97.7k
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
2004
89.6k
            if (iter->second.getType() == 2) {
2005
                // Pdf contains object streams.
2006
1.43k
                QTC::TC("qpdf", "QPDFWriter preserve object streams");
2007
1.43k
                m->obj.streams_empty = false;
2008
1.43k
                auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
2009
                // The object pointed to by iter may be a previous generation, in which case it is
2010
                // removed by getCompressibleObjSet. We need to restart the loop (while the object
2011
                // table may contain multiple generations of an object).
2012
401k
                for (iter = xref.cbegin(); iter != end; ++iter) {
2013
400k
                    if (iter->second.getType() == 2) {
2014
375k
                        auto id = static_cast<size_t>(iter->first.getObj());
2015
375k
                        if (id < eligible.size() && eligible[id]) {
2016
55.8k
                            m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
2017
319k
                        } else {
2018
319k
                            QTC::TC("qpdf", "QPDFWriter exclude from object stream");
2019
319k
                        }
2020
375k
                    }
2021
400k
                }
2022
1.43k
                return;
2023
1.43k
            }
2024
89.6k
        }
2025
9.50k
    }
2026
9.50k
}
2027
2028
void
2029
QPDFWriter::generateObjectStreams()
2030
0
{
2031
    // Basic strategy: make a list of objects that can go into an object stream.  Then figure out
2032
    // how many object streams are needed so that we can distribute objects approximately evenly
2033
    // without having any object stream exceed 100 members.  We don't have to worry about linearized
2034
    // files here -- if the file is linearized, we take care of excluding things that aren't allowed
2035
    // here later.
2036
2037
    // This code doesn't do anything with /Extends.
2038
2039
0
    std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf);
2040
0
    size_t n_object_streams = (eligible.size() + 99U) / 100U;
2041
2042
0
    initializeTables(2U * n_object_streams);
2043
0
    if (n_object_streams == 0) {
2044
0
        m->obj.streams_empty = true;
2045
0
        return;
2046
0
    }
2047
0
    size_t n_per = eligible.size() / n_object_streams;
2048
0
    if (n_per * n_object_streams < eligible.size()) {
2049
0
        ++n_per;
2050
0
    }
2051
0
    unsigned int n = 0;
2052
0
    int cur_ostream = m->pdf.newIndirectNull().getObjectID();
2053
0
    for (auto const& item: eligible) {
2054
0
        if (n == n_per) {
2055
0
            QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
2056
0
            n = 0;
2057
            // Construct a new null object as the "original" object stream.  The rest of the code
2058
            // knows that this means we're creating the object stream from scratch.
2059
0
            cur_ostream = m->pdf.newIndirectNull().getObjectID();
2060
0
        }
2061
0
        auto& obj = m->obj[item];
2062
0
        obj.object_stream = cur_ostream;
2063
0
        obj.gen = item.getGen();
2064
0
        ++n;
2065
0
    }
2066
0
}
2067
2068
QPDFObjectHandle
2069
QPDFWriter::getTrimmedTrailer()
2070
31.7k
{
2071
    // Remove keys from the trailer that necessarily have to be replaced when writing the file.
2072
2073
31.7k
    QPDFObjectHandle trailer = m->pdf.getTrailer().unsafeShallowCopy();
2074
2075
    // Remove encryption keys
2076
31.7k
    trailer.removeKey("/ID");
2077
31.7k
    trailer.removeKey("/Encrypt");
2078
2079
    // Remove modification information
2080
31.7k
    trailer.removeKey("/Prev");
2081
2082
    // Remove all trailer keys that potentially come from a cross-reference stream
2083
31.7k
    trailer.removeKey("/Index");
2084
31.7k
    trailer.removeKey("/W");
2085
31.7k
    trailer.removeKey("/Length");
2086
31.7k
    trailer.removeKey("/Filter");
2087
31.7k
    trailer.removeKey("/DecodeParms");
2088
31.7k
    trailer.removeKey("/Type");
2089
31.7k
    trailer.removeKey("/XRefStm");
2090
2091
31.7k
    return trailer;
2092
31.7k
}
2093
2094
// Make document extension level information direct as required by the spec.
2095
void
2096
QPDFWriter::prepareFileForWrite()
2097
9.44k
{
2098
9.44k
    m->pdf.fixDanglingReferences();
2099
9.44k
    auto root = m->pdf.getRoot();
2100
9.44k
    auto oh = root.getKey("/Extensions");
2101
9.44k
    if (oh.isDictionary()) {
2102
318
        const bool extensions_indirect = oh.isIndirect();
2103
318
        if (extensions_indirect) {
2104
108
            QTC::TC("qpdf", "QPDFWriter make Extensions direct");
2105
108
            oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy());
2106
108
        }
2107
318
        if (oh.hasKey("/ADBE")) {
2108
175
            auto adbe = oh.getKey("/ADBE");
2109
175
            if (adbe.isIndirect()) {
2110
120
                QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1);
2111
120
                adbe.makeDirect();
2112
120
                oh.replaceKey("/ADBE", adbe);
2113
120
            }
2114
175
        }
2115
318
    }
2116
9.44k
}
2117
2118
void
2119
QPDFWriter::initializeTables(size_t extra)
2120
9.50k
{
2121
9.50k
    auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra;
2122
9.50k
    m->obj.resize(size);
2123
9.50k
    m->new_obj.resize(size);
2124
9.50k
}
2125
2126
void
2127
QPDFWriter::doWriteSetup()
2128
9.50k
{
2129
9.50k
    if (m->did_write_setup) {
2130
0
        return;
2131
0
    }
2132
9.50k
    m->did_write_setup = true;
2133
2134
    // Do preliminary setup
2135
2136
9.50k
    if (m->linearized) {
2137
9.50k
        m->qdf_mode = false;
2138
9.50k
    }
2139
2140
9.50k
    if (m->pclm) {
2141
0
        m->stream_decode_level = qpdf_dl_none;
2142
0
        m->compress_streams = false;
2143
0
        m->encryption = nullptr;
2144
0
    }
2145
2146
9.50k
    if (m->qdf_mode) {
2147
0
        if (!m->normalize_content_set) {
2148
0
            m->normalize_content = true;
2149
0
        }
2150
0
        if (!m->compress_streams_set) {
2151
0
            m->compress_streams = false;
2152
0
        }
2153
0
        if (!m->stream_decode_level_set) {
2154
0
            m->stream_decode_level = qpdf_dl_generalized;
2155
0
        }
2156
0
    }
2157
2158
9.50k
    if (m->encryption) {
2159
        // Encryption has been explicitly set
2160
9.50k
        m->preserve_encryption = false;
2161
9.50k
    } else if (m->normalize_content || !m->compress_streams || m->pclm || m->qdf_mode) {
2162
        // Encryption makes looking at contents pretty useless.  If the user explicitly encrypted
2163
        // though, we still obey that.
2164
0
        m->preserve_encryption = false;
2165
0
    }
2166
2167
9.50k
    if (m->preserve_encryption) {
2168
0
        copyEncryptionParameters(m->pdf);
2169
0
    }
2170
2171
9.50k
    if (!m->forced_pdf_version.empty()) {
2172
0
        int major = 0;
2173
0
        int minor = 0;
2174
0
        parseVersion(m->forced_pdf_version, major, minor);
2175
0
        disableIncompatibleEncryption(major, minor, m->forced_extension_level);
2176
0
        if (compareVersions(major, minor, 1, 5) < 0) {
2177
0
            QTC::TC("qpdf", "QPDFWriter forcing object stream disable");
2178
0
            m->object_stream_mode = qpdf_o_disable;
2179
0
        }
2180
0
    }
2181
2182
9.50k
    if (m->qdf_mode || m->normalize_content) {
2183
0
        initializeSpecialStreams();
2184
0
    }
2185
2186
9.50k
    if (m->qdf_mode) {
2187
        // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing
2188
        // recomputed stream length data. Certain streams such as object streams, xref streams, and
2189
        // hint streams always get direct stream lengths.
2190
0
        m->direct_stream_lengths = false;
2191
0
    }
2192
2193
9.50k
    switch (m->object_stream_mode) {
2194
0
    case qpdf_o_disable:
2195
0
        initializeTables();
2196
0
        m->obj.streams_empty = true;
2197
0
        break;
2198
2199
9.50k
    case qpdf_o_preserve:
2200
9.50k
        initializeTables();
2201
9.50k
        preserveObjectStreams();
2202
9.50k
        break;
2203
2204
0
    case qpdf_o_generate:
2205
0
        generateObjectStreams();
2206
0
        break;
2207
2208
        // no default so gcc will warn for missing case tag
2209
9.50k
    }
2210
2211
9.49k
    if (!m->obj.streams_empty) {
2212
1.42k
        if (m->linearized) {
2213
            // Page dictionaries are not allowed to be compressed objects.
2214
1.98k
            for (auto& page: m->pdf.getAllPages()) {
2215
1.98k
                if (m->obj[page].object_stream > 0) {
2216
166
                    QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
2217
166
                    m->obj[page].object_stream = 0;
2218
166
                }
2219
1.98k
            }
2220
1.42k
        }
2221
2222
1.42k
        if (m->linearized || m->encryption) {
2223
            // The document catalog is not allowed to be compressed in linearized files either.  It
2224
            // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
2225
            // handle encrypted files with compressed document catalogs, so we disable them in that
2226
            // case as well.
2227
1.42k
            if (m->obj[m->root_og].object_stream > 0) {
2228
6
                QTC::TC("qpdf", "QPDFWriter uncompressing root");
2229
6
                m->obj[m->root_og].object_stream = 0;
2230
6
            }
2231
1.42k
        }
2232
2233
        // Generate reverse mapping from object stream to objects
2234
3.04M
        m->obj.forEach([this](auto id, auto const& item) -> void {
2235
3.04M
            if (item.object_stream > 0) {
2236
55.5k
                auto& vec = m->object_stream_to_objects[item.object_stream];
2237
55.5k
                vec.emplace_back(id, item.gen);
2238
55.5k
                if (m->max_ostream_index < vec.size()) {
2239
18.6k
                    ++m->max_ostream_index;
2240
18.6k
                }
2241
55.5k
            }
2242
3.04M
        });
2243
1.42k
        --m->max_ostream_index;
2244
2245
1.42k
        if (m->object_stream_to_objects.empty()) {
2246
423
            m->obj.streams_empty = true;
2247
1.00k
        } else {
2248
1.00k
            setMinimumPDFVersion("1.5");
2249
1.00k
        }
2250
1.42k
    }
2251
2252
9.49k
    setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel());
2253
9.49k
    m->final_pdf_version = m->min_pdf_version;
2254
9.49k
    m->final_extension_level = m->min_extension_level;
2255
9.49k
    if (!m->forced_pdf_version.empty()) {
2256
0
        QTC::TC("qpdf", "QPDFWriter using forced PDF version");
2257
0
        m->final_pdf_version = m->forced_pdf_version;
2258
0
        m->final_extension_level = m->forced_extension_level;
2259
0
    }
2260
9.49k
}
2261
2262
void
2263
QPDFWriter::write()
2264
9.50k
{
2265
9.50k
    doWriteSetup();
2266
2267
    // Set up progress reporting. For linearized files, we write two passes. events_expected is an
2268
    // approximation, but it's good enough for progress reporting, which is mostly a guess anyway.
2269
9.50k
    m->events_expected = QIntC::to_int(m->pdf.getObjectCount() * (m->linearized ? 2 : 1));
2270
2271
9.50k
    prepareFileForWrite();
2272
2273
9.50k
    if (m->linearized) {
2274
9.40k
        writeLinearized();
2275
9.40k
    } else {
2276
97
        writeStandard();
2277
97
    }
2278
2279
9.50k
    m->pipeline->finish();
2280
9.50k
    if (m->close_file) {
2281
0
        fclose(m->file);
2282
0
    }
2283
9.50k
    m->file = nullptr;
2284
9.50k
    if (m->buffer_pipeline) {
2285
0
        m->output_buffer = m->buffer_pipeline->getBuffer();
2286
0
        m->buffer_pipeline = nullptr;
2287
0
    }
2288
9.50k
    indicateProgress(false, true);
2289
9.50k
}
2290
2291
QPDFObjGen
2292
QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
2293
0
{
2294
0
    return {m->obj[og].renumber, 0};
2295
0
}
2296
2297
std::map<QPDFObjGen, QPDFXRefEntry>
2298
QPDFWriter::getWrittenXRefTable()
2299
0
{
2300
0
    std::map<QPDFObjGen, QPDFXRefEntry> result;
2301
2302
0
    auto it = result.begin();
2303
0
    m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void {
2304
0
        if (item.xref.getType() != 0) {
2305
0
            it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref);
2306
0
        }
2307
0
    });
2308
0
    return result;
2309
0
}
2310
2311
void
2312
QPDFWriter::enqueuePart(std::vector<QPDFObjectHandle>& part)
2313
42.7k
{
2314
139k
    for (auto const& oh: part) {
2315
139k
        enqueueObject(oh);
2316
139k
    }
2317
42.7k
}
2318
2319
void
2320
QPDFWriter::writeEncryptionDictionary()
2321
16.0k
{
2322
16.0k
    m->encryption_dict_objid = openObject(m->encryption_dict_objid);
2323
16.0k
    auto& enc = *m->encryption;
2324
16.0k
    auto const V = enc.getV();
2325
2326
16.0k
    write("<<");
2327
16.0k
    if (V >= 4) {
2328
16.0k
        write(" /CF << /StdCF << /AuthEvent /DocOpen /CFM ");
2329
16.0k
        write(m->encrypt_use_aes ? ((V < 5) ? "/AESV2" : "/AESV3") : "/V2");
2330
        // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of
2331
        // MacOS won't open encrypted files without it.
2332
16.0k
        write((V < 5) ? " /Length 16 >> >>" : " /Length 32 >> >>");
2333
16.0k
        if (!m->encryption->getEncryptMetadata()) {
2334
0
            write(" /EncryptMetadata false");
2335
0
        }
2336
16.0k
    }
2337
16.0k
    write(" /Filter /Standard /Length ").write(enc.getLengthBytes() * 8);
2338
16.0k
    write(" /O ").write_string(enc.getO(), true);
2339
16.0k
    if (V >= 4) {
2340
16.0k
        write(" /OE ").write_string(enc.getOE(), true);
2341
16.0k
    }
2342
16.0k
    write(" /P ").write(enc.getP());
2343
16.0k
    if (V >= 5) {
2344
16.0k
        write(" /Perms ").write_string(enc.getPerms(), true);
2345
16.0k
    }
2346
16.0k
    write(" /R ").write(enc.getR());
2347
2348
16.0k
    if (V >= 4) {
2349
16.0k
        write(" /StmF /StdCF /StrF /StdCF");
2350
16.0k
    }
2351
16.0k
    write(" /U ").write_string(enc.getU(), true);
2352
16.0k
    if (V >= 4) {
2353
16.0k
        write(" /UE ").write_string(enc.getUE(), true);
2354
16.0k
    }
2355
16.0k
    write(" /V ").write(enc.getV()).write(" >>");
2356
16.0k
    closeObject(m->encryption_dict_objid);
2357
16.0k
}
2358
2359
std::string
2360
QPDFWriter::getFinalVersion()
2361
0
{
2362
0
    doWriteSetup();
2363
0
    return m->final_pdf_version;
2364
0
}
2365
2366
void
2367
QPDFWriter::writeHeader()
2368
16.1k
{
2369
16.1k
    write("%PDF-").write(m->final_pdf_version);
2370
16.1k
    if (m->pclm) {
2371
        // PCLm version
2372
0
        write("\n%PCLm 1.0\n");
2373
16.1k
    } else {
2374
        // This string of binary characters would not be valid UTF-8, so it really should be treated
2375
        // as binary.
2376
16.1k
        write("\n%\xbf\xf7\xa2\xfe\n");
2377
16.1k
    }
2378
16.1k
    write_qdf("%QDF-1.0\n\n");
2379
2380
    // Note: do not write extra header text here.  Linearized PDFs must include the entire
2381
    // linearization parameter dictionary within the first 1024 characters of the PDF file, so for
2382
    // linearized files, we have to write extra header text after the linearization parameter
2383
    // dictionary.
2384
16.1k
}
2385
2386
void
2387
QPDFWriter::writeHintStream(int hint_id)
2388
7.82k
{
2389
7.82k
    std::string hint_buffer;
2390
7.82k
    int S = 0;
2391
7.82k
    int O = 0;
2392
7.82k
    bool compressed = m->compress_streams && !m->qdf_mode;
2393
7.82k
    QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed);
2394
2395
7.82k
    openObject(hint_id);
2396
7.82k
    setDataKey(hint_id);
2397
2398
7.82k
    size_t hlen = hint_buffer.size();
2399
2400
7.82k
    write("<< ");
2401
7.82k
    if (compressed) {
2402
7.82k
        write("/Filter /FlateDecode ");
2403
7.82k
    }
2404
7.82k
    write("/S ").write(S);
2405
7.82k
    if (O) {
2406
265
        write(" /O ").write(O);
2407
265
    }
2408
7.82k
    adjustAESStreamLength(hlen);
2409
7.82k
    write(" /Length ").write(hlen);
2410
7.82k
    write(" >>\nstream\n").write_encrypted(hint_buffer);
2411
2412
7.82k
    if (m->encryption) {
2413
7.82k
        QTC::TC("qpdf", "QPDFWriter encrypted hint stream");
2414
7.82k
    }
2415
2416
7.82k
    write(hint_buffer.empty() || hint_buffer.back() != '\n' ? "\nendstream" : "endstream");
2417
7.82k
    closeObject(hint_id);
2418
7.82k
}
2419
2420
qpdf_offset_t
2421
QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size)
2422
0
{
2423
    // There are too many extra arguments to replace overloaded function with defaults in the header
2424
    // file...too much risk of leaving something off.
2425
0
    return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0);
2426
0
}
2427
2428
qpdf_offset_t
2429
QPDFWriter::writeXRefTable(
2430
    trailer_e which,
2431
    int first,
2432
    int last,
2433
    int size,
2434
    qpdf_offset_t prev,
2435
    bool suppress_offsets,
2436
    int hint_id,
2437
    qpdf_offset_t hint_offset,
2438
    qpdf_offset_t hint_length,
2439
    int linearization_pass)
2440
29.5k
{
2441
29.5k
    write("xref\n").write(first).write(" ").write(last - first + 1);
2442
29.5k
    qpdf_offset_t space_before_zero = m->pipeline->getCount();
2443
29.5k
    write("\n");
2444
29.5k
    if (first == 0) {
2445
14.6k
        write("0000000000 65535 f \n");
2446
14.6k
        ++first;
2447
14.6k
    }
2448
250k
    for (int i = first; i <= last; ++i) {
2449
220k
        qpdf_offset_t offset = 0;
2450
220k
        if (!suppress_offsets) {
2451
141k
            offset = m->new_obj[i].xref.getOffset();
2452
141k
            if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2453
40.2k
                offset += hint_length;
2454
40.2k
            }
2455
141k
        }
2456
220k
        write(QUtil::int_to_string(offset, 10)).write(" 00000 n \n");
2457
220k
    }
2458
29.5k
    writeTrailer(which, size, false, prev, linearization_pass);
2459
29.5k
    write("\n");
2460
29.5k
    return space_before_zero;
2461
29.5k
}
2462
2463
qpdf_offset_t
2464
QPDFWriter::writeXRefStream(
2465
    int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size)
2466
0
{
2467
    // There are too many extra arguments to replace overloaded function with defaults in the header
2468
    // file...too much risk of leaving something off.
2469
0
    return writeXRefStream(
2470
0
        objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0);
2471
0
}
2472
2473
qpdf_offset_t
2474
QPDFWriter::writeXRefStream(
2475
    int xref_id,
2476
    int max_id,
2477
    qpdf_offset_t max_offset,
2478
    trailer_e which,
2479
    int first,
2480
    int last,
2481
    int size,
2482
    qpdf_offset_t prev,
2483
    int hint_id,
2484
    qpdf_offset_t hint_offset,
2485
    qpdf_offset_t hint_length,
2486
    bool skip_compression,
2487
    int linearization_pass)
2488
2.24k
{
2489
2.24k
    qpdf_offset_t xref_offset = m->pipeline->getCount();
2490
2.24k
    qpdf_offset_t space_before_zero = xref_offset - 1;
2491
2492
    // field 1 contains offsets and object stream identifiers
2493
2.24k
    unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id));
2494
2495
    // field 2 contains object stream indices
2496
2.24k
    unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index));
2497
2498
2.24k
    unsigned int esize = 1 + f1_size + f2_size;
2499
2500
    // Must store in xref table in advance of writing the actual data rather than waiting for
2501
    // openObject to do it.
2502
2.24k
    m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2503
2504
2.24k
    std::string xref_data;
2505
2.24k
    const bool compressed = m->compress_streams && !m->qdf_mode;
2506
2.24k
    {
2507
2.24k
        auto pp_xref = m->pipeline_stack.activate(xref_data);
2508
2509
163k
        for (int i = first; i <= last; ++i) {
2510
161k
            QPDFXRefEntry& e = m->new_obj[i].xref;
2511
161k
            switch (e.getType()) {
2512
44.1k
            case 0:
2513
44.1k
                writeBinary(0, 1);
2514
44.1k
                writeBinary(0, f1_size);
2515
44.1k
                writeBinary(0, f2_size);
2516
44.1k
                break;
2517
2518
43.9k
            case 1:
2519
43.9k
                {
2520
43.9k
                    qpdf_offset_t offset = e.getOffset();
2521
43.9k
                    if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2522
14.9k
                        offset += hint_length;
2523
14.9k
                    }
2524
43.9k
                    writeBinary(1, 1);
2525
43.9k
                    writeBinary(QIntC::to_ulonglong(offset), f1_size);
2526
43.9k
                    writeBinary(0, f2_size);
2527
43.9k
                }
2528
43.9k
                break;
2529
2530
73.1k
            case 2:
2531
73.1k
                writeBinary(2, 1);
2532
73.1k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size);
2533
73.1k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size);
2534
73.1k
                break;
2535
2536
0
            default:
2537
0
                throw std::logic_error("invalid type writing xref stream");
2538
0
                break;
2539
161k
            }
2540
161k
        }
2541
2.24k
    }
2542
2543
2.24k
    if (compressed) {
2544
2.24k
        xref_data = pl::pipe<Pl_PNGFilter>(xref_data, Pl_PNGFilter::a_encode, esize);
2545
2.24k
        if (!skip_compression) {
2546
            // Write the stream dictionary for compression but don't actually compress.  This
2547
            // helps us with computation of padding for pass 1 of linearization.
2548
1.02k
            xref_data = pl::pipe<Pl_Flate>(xref_data, Pl_Flate::a_deflate);
2549
1.02k
        }
2550
2.24k
    }
2551
2552
2.24k
    openObject(xref_id);
2553
2.24k
    write("<<").write_qdf("\n ").write(" /Type /XRef").write_qdf("\n ");
2554
2.24k
    write(" /Length ").write(xref_data.size());
2555
2.24k
    if (compressed) {
2556
2.24k
        write_qdf("\n ").write(" /Filter /FlateDecode").write_qdf("\n ");
2557
2.24k
        write(" /DecodeParms << /Columns ").write(esize).write(" /Predictor 12 >>");
2558
2.24k
    }
2559
2.24k
    write_qdf("\n ").write(" /W [ 1 ").write(f1_size).write(" ").write(f2_size).write(" ]");
2560
2.24k
    if (!(first == 0 && last == (size - 1))) {
2561
1.21k
        write(" /Index [ ").write(first).write(" ").write(last - first + 1).write(" ]");
2562
1.21k
    }
2563
2.24k
    writeTrailer(which, size, true, prev, linearization_pass);
2564
2.24k
    write("\nstream\n").write(xref_data).write("\nendstream");
2565
2.24k
    closeObject(xref_id);
2566
2.24k
    return space_before_zero;
2567
2.24k
}
2568
2569
size_t
2570
QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
2571
1.21k
{
2572
    // This routine is called right after a linearization first pass xref stream has been written
2573
    // without compression.  Calculate the amount of padding that would be required in the worst
2574
    // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is
2575
    // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add
2576
    // 10 extra bytes for number length increases.
2577
2578
1.21k
    return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384)));
2579
1.21k
}
2580
2581
void
2582
QPDFWriter::writeLinearized()
2583
9.40k
{
2584
    // Optimize file and enqueue objects in order
2585
2586
9.40k
    std::map<int, int> stream_cache;
2587
2588
61.5k
    auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) {
2589
61.5k
        auto& result = stream_cache[stream.getObjectID()];
2590
61.5k
        if (result == 0) {
2591
23.5k
            bool compress_stream;
2592
23.5k
            bool is_metadata;
2593
23.5k
            if (willFilterStream(stream, compress_stream, is_metadata, nullptr)) {
2594
11.0k
                result = 2;
2595
12.5k
            } else {
2596
12.5k
                result = 1;
2597
12.5k
            }
2598
23.5k
        }
2599
61.5k
        return result;
2600
61.5k
    };
2601
2602
9.40k
    QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters);
2603
2604
9.40k
    std::vector<QPDFObjectHandle> part4;
2605
9.40k
    std::vector<QPDFObjectHandle> part6;
2606
9.40k
    std::vector<QPDFObjectHandle> part7;
2607
9.40k
    std::vector<QPDFObjectHandle> part8;
2608
9.40k
    std::vector<QPDFObjectHandle> part9;
2609
9.40k
    QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9);
2610
2611
    // Object number sequence:
2612
    //
2613
    //  second half
2614
    //    second half uncompressed objects
2615
    //    second half xref stream, if any
2616
    //    second half compressed objects
2617
    //  first half
2618
    //    linearization dictionary
2619
    //    first half xref stream, if any
2620
    //    part 4 uncompresesd objects
2621
    //    encryption dictionary, if any
2622
    //    hint stream
2623
    //    part 6 uncompressed objects
2624
    //    first half compressed objects
2625
    //
2626
2627
    // Second half objects
2628
9.40k
    int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size());
2629
9.40k
    int second_half_first_obj = 1;
2630
9.40k
    int after_second_half = 1 + second_half_uncompressed;
2631
9.40k
    m->next_objid = after_second_half;
2632
9.40k
    int second_half_xref = 0;
2633
9.40k
    bool need_xref_stream = !m->obj.streams_empty;
2634
9.40k
    if (need_xref_stream) {
2635
782
        second_half_xref = m->next_objid++;
2636
782
    }
2637
    // Assign numbers to all compressed objects in the second half.
2638
9.40k
    std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9};
2639
35.5k
    for (int i = 0; i < 3; ++i) {
2640
50.5k
        for (auto const& oh: *vecs2[i]) {
2641
50.5k
            assignCompressedObjectNumbers(oh.getObjGen());
2642
50.5k
        }
2643
26.1k
    }
2644
9.40k
    int second_half_end = m->next_objid - 1;
2645
9.40k
    int second_trailer_size = m->next_objid;
2646
2647
    // First half objects
2648
9.40k
    int first_half_start = m->next_objid;
2649
9.40k
    int lindict_id = m->next_objid++;
2650
9.40k
    int first_half_xref = 0;
2651
9.40k
    if (need_xref_stream) {
2652
782
        first_half_xref = m->next_objid++;
2653
782
    }
2654
9.40k
    int part4_first_obj = m->next_objid;
2655
9.40k
    m->next_objid += QIntC::to_int(part4.size());
2656
9.40k
    int after_part4 = m->next_objid;
2657
9.40k
    if (m->encryption) {
2658
8.70k
        m->encryption_dict_objid = m->next_objid++;
2659
8.70k
    }
2660
9.40k
    int hint_id = m->next_objid++;
2661
9.40k
    int part6_first_obj = m->next_objid;
2662
9.40k
    m->next_objid += QIntC::to_int(part6.size());
2663
9.40k
    int after_part6 = m->next_objid;
2664
    // Assign numbers to all compressed objects in the first half
2665
9.40k
    std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6};
2666
26.8k
    for (int i = 0; i < 2; ++i) {
2667
89.5k
        for (auto const& oh: *vecs1[i]) {
2668
89.5k
            assignCompressedObjectNumbers(oh.getObjGen());
2669
89.5k
        }
2670
17.4k
    }
2671
9.40k
    int first_half_end = m->next_objid - 1;
2672
9.40k
    int first_trailer_size = m->next_objid;
2673
2674
9.40k
    int part4_end_marker = part4.back().getObjectID();
2675
9.40k
    int part6_end_marker = part6.back().getObjectID();
2676
9.40k
    qpdf_offset_t space_before_zero = 0;
2677
9.40k
    qpdf_offset_t file_size = 0;
2678
9.40k
    qpdf_offset_t part6_end_offset = 0;
2679
9.40k
    qpdf_offset_t first_half_max_obj_offset = 0;
2680
9.40k
    qpdf_offset_t second_xref_offset = 0;
2681
9.40k
    qpdf_offset_t first_xref_end = 0;
2682
9.40k
    qpdf_offset_t second_xref_end = 0;
2683
2684
9.40k
    m->next_objid = part4_first_obj;
2685
9.40k
    enqueuePart(part4);
2686
9.40k
    if (m->next_objid != after_part4) {
2687
        // This can happen with very botched files as in the fuzzer test. There are likely some
2688
        // faulty assumptions in calculateLinearizationData
2689
9
        throw std::runtime_error("error encountered after writing part 4 of linearized data");
2690
9
    }
2691
9.39k
    m->next_objid = part6_first_obj;
2692
9.39k
    enqueuePart(part6);
2693
9.39k
    if (m->next_objid != after_part6) {
2694
186
        throw std::runtime_error("error encountered after writing part 6 of linearized data");
2695
186
    }
2696
9.20k
    m->next_objid = second_half_first_obj;
2697
9.20k
    enqueuePart(part7);
2698
9.20k
    enqueuePart(part8);
2699
9.20k
    enqueuePart(part9);
2700
9.20k
    if (m->next_objid != after_second_half) {
2701
147
        throw std::runtime_error("error encountered after writing part 9 of linearized data");
2702
147
    }
2703
2704
9.06k
    qpdf_offset_t hint_length = 0;
2705
9.06k
    std::string hint_buffer;
2706
2707
    // Write file in two passes.  Part numbers refer to PDF spec 1.4.
2708
2709
9.06k
    FILE* lin_pass1_file = nullptr;
2710
9.06k
    auto pp_pass1 = m->pipeline_stack.popper();
2711
9.06k
    auto pp_md5 = m->pipeline_stack.popper();
2712
16.1k
    for (int pass: {1, 2}) {
2713
16.1k
        if (pass == 1) {
2714
8.32k
            if (!m->lin_pass1_filename.empty()) {
2715
0
                lin_pass1_file = QUtil::safe_fopen(m->lin_pass1_filename.c_str(), "wb");
2716
0
                m->pipeline_stack.activate(
2717
0
                    pp_pass1,
2718
0
                    std::make_unique<Pl_StdioFile>("linearization pass1", lin_pass1_file));
2719
8.32k
            } else {
2720
8.32k
                m->pipeline_stack.activate(pp_pass1, true);
2721
8.32k
            }
2722
8.32k
            if (m->deterministic_id) {
2723
0
                m->pipeline_stack.activate_md5(pp_md5);
2724
0
            }
2725
8.32k
        }
2726
2727
        // Part 1: header
2728
2729
16.1k
        writeHeader();
2730
2731
        // Part 2: linearization parameter dictionary.  Save enough space to write real dictionary.
2732
        // 200 characters is enough space if all numerical values in the parameter dictionary that
2733
        // contain offsets are 20 digits long plus a few extra characters for safety.  The entire
2734
        // linearization parameter dictionary must appear within the first 1024 characters of the
2735
        // file.
2736
2737
16.1k
        qpdf_offset_t pos = m->pipeline->getCount();
2738
16.1k
        openObject(lindict_id);
2739
16.1k
        write("<<");
2740
16.1k
        if (pass == 2) {
2741
7.82k
            std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages();
2742
7.82k
            int first_page_object = m->obj[pages.at(0)].renumber;
2743
2744
7.82k
            write(" /Linearized 1 /L ").write(file_size + hint_length);
2745
            // Implementation note 121 states that a space is mandatory after this open bracket.
2746
7.82k
            write(" /H [ ").write(m->new_obj[hint_id].xref.getOffset()).write(" ");
2747
7.82k
            write(hint_length);
2748
7.82k
            write(" ] /O ").write(first_page_object);
2749
7.82k
            write(" /E ").write(part6_end_offset + hint_length);
2750
7.82k
            write(" /N ").write(pages.size());
2751
7.82k
            write(" /T ").write(space_before_zero + hint_length);
2752
7.82k
        }
2753
16.1k
        write(" >>");
2754
16.1k
        closeObject(lindict_id);
2755
16.1k
        static int const pad = 200;
2756
16.1k
        write(QIntC::to_size(pos - m->pipeline->getCount() + pad), ' ').write("\n");
2757
2758
        // If the user supplied any additional header text, write it here after the linearization
2759
        // parameter dictionary.
2760
16.1k
        write(m->extra_header_text);
2761
2762
        // Part 3: first page cross reference table and trailer.
2763
2764
16.1k
        qpdf_offset_t first_xref_offset = m->pipeline->getCount();
2765
16.1k
        qpdf_offset_t hint_offset = 0;
2766
16.1k
        if (pass == 2) {
2767
7.82k
            hint_offset = m->new_obj[hint_id].xref.getOffset();
2768
7.82k
        }
2769
16.1k
        if (need_xref_stream) {
2770
            // Must pad here too.
2771
1.21k
            if (pass == 1) {
2772
                // Set first_half_max_obj_offset to a value large enough to force four bytes to be
2773
                // reserved for each file offset.  This would provide adequate space for the xref
2774
                // stream as long as the last object in page 1 starts with in the first 4 GB of the
2775
                // file, which is extremely likely.  In the second pass, we will know the actual
2776
                // value for this, but it's okay if it's smaller.
2777
699
                first_half_max_obj_offset = 1 << 25;
2778
699
            }
2779
1.21k
            pos = m->pipeline->getCount();
2780
1.21k
            writeXRefStream(
2781
1.21k
                first_half_xref,
2782
1.21k
                first_half_end,
2783
1.21k
                first_half_max_obj_offset,
2784
1.21k
                t_lin_first,
2785
1.21k
                first_half_start,
2786
1.21k
                first_half_end,
2787
1.21k
                first_trailer_size,
2788
1.21k
                hint_length + second_xref_offset,
2789
1.21k
                hint_id,
2790
1.21k
                hint_offset,
2791
1.21k
                hint_length,
2792
1.21k
                (pass == 1),
2793
1.21k
                pass);
2794
1.21k
            qpdf_offset_t endpos = m->pipeline->getCount();
2795
1.21k
            if (pass == 1) {
2796
                // Pad so we have enough room for the real xref stream.
2797
698
                write(calculateXrefStreamPadding(endpos - pos), ' ');
2798
698
                first_xref_end = m->pipeline->getCount();
2799
698
            } else {
2800
                // Pad so that the next object starts at the same place as in pass 1.
2801
517
                write(QIntC::to_size(first_xref_end - endpos), ' ');
2802
2803
517
                if (m->pipeline->getCount() != first_xref_end) {
2804
0
                    throw std::logic_error(
2805
0
                        "insufficient padding for first pass xref stream; first_xref_end=" +
2806
0
                        std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos));
2807
0
                }
2808
517
            }
2809
1.21k
            write("\n");
2810
14.9k
        } else {
2811
14.9k
            writeXRefTable(
2812
14.9k
                t_lin_first,
2813
14.9k
                first_half_start,
2814
14.9k
                first_half_end,
2815
14.9k
                first_trailer_size,
2816
14.9k
                hint_length + second_xref_offset,
2817
14.9k
                (pass == 1),
2818
14.9k
                hint_id,
2819
14.9k
                hint_offset,
2820
14.9k
                hint_length,
2821
14.9k
                pass);
2822
14.9k
            write("startxref\n0\n%%EOF\n");
2823
14.9k
        }
2824
2825
        // Parts 4 through 9
2826
2827
239k
        for (auto const& cur_object: m->object_queue) {
2828
239k
            if (cur_object.getObjectID() == part6_end_marker) {
2829
15.9k
                first_half_max_obj_offset = m->pipeline->getCount();
2830
15.9k
            }
2831
239k
            writeObject(cur_object);
2832
239k
            if (cur_object.getObjectID() == part4_end_marker) {
2833
16.0k
                if (m->encryption) {
2834
16.0k
                    writeEncryptionDictionary();
2835
16.0k
                }
2836
16.0k
                if (pass == 1) {
2837
8.25k
                    m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2838
8.25k
                } else {
2839
                    // Part 5: hint stream
2840
7.82k
                    write(hint_buffer);
2841
7.82k
                }
2842
16.0k
            }
2843
239k
            if (cur_object.getObjectID() == part6_end_marker) {
2844
15.7k
                part6_end_offset = m->pipeline->getCount();
2845
15.7k
            }
2846
239k
        }
2847
2848
        // Part 10: overflow hint stream -- not used
2849
2850
        // Part 11: main cross reference table and trailer
2851
2852
16.1k
        second_xref_offset = m->pipeline->getCount();
2853
16.1k
        if (need_xref_stream) {
2854
1.03k
            pos = m->pipeline->getCount();
2855
1.03k
            space_before_zero = writeXRefStream(
2856
1.03k
                second_half_xref,
2857
1.03k
                second_half_end,
2858
1.03k
                second_xref_offset,
2859
1.03k
                t_lin_second,
2860
1.03k
                0,
2861
1.03k
                second_half_end,
2862
1.03k
                second_trailer_size,
2863
1.03k
                0,
2864
1.03k
                0,
2865
1.03k
                0,
2866
1.03k
                0,
2867
1.03k
                (pass == 1),
2868
1.03k
                pass);
2869
1.03k
            qpdf_offset_t endpos = m->pipeline->getCount();
2870
2871
1.03k
            if (pass == 1) {
2872
                // Pad so we have enough room for the real xref stream.  See comments for previous
2873
                // xref stream on how we calculate the padding.
2874
516
                write(calculateXrefStreamPadding(endpos - pos), ' ').write("\n");
2875
516
                second_xref_end = m->pipeline->getCount();
2876
516
            } else {
2877
                // Make the file size the same.
2878
516
                auto padding =
2879
516
                    QIntC::to_size(second_xref_end + hint_length - 1 - m->pipeline->getCount());
2880
516
                write(padding, ' ').write("\n");
2881
2882
                // If this assertion fails, maybe we didn't have enough padding above.
2883
516
                if (m->pipeline->getCount() != second_xref_end + hint_length) {
2884
0
                    throw std::logic_error(
2885
0
                        "count mismatch after xref stream; possible insufficient padding?");
2886
0
                }
2887
516
            }
2888
15.1k
        } else {
2889
15.1k
            space_before_zero = writeXRefTable(
2890
15.1k
                t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass);
2891
15.1k
        }
2892
16.1k
        write("startxref\n").write(first_xref_offset).write("\n%%EOF\n");
2893
2894
16.1k
        if (pass == 1) {
2895
7.82k
            if (m->deterministic_id) {
2896
0
                QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1);
2897
0
                computeDeterministicIDData();
2898
0
                pp_md5.pop();
2899
0
            }
2900
2901
            // Close first pass pipeline
2902
7.82k
            file_size = m->pipeline->getCount();
2903
7.82k
            pp_pass1.pop();
2904
2905
            // Save hint offset since it will be set to zero by calling openObject.
2906
7.82k
            qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset();
2907
2908
            // Write hint stream to a buffer
2909
7.82k
            {
2910
7.82k
                auto pp_hint = m->pipeline_stack.activate(hint_buffer);
2911
7.82k
                writeHintStream(hint_id);
2912
7.82k
            }
2913
7.82k
            hint_length = QIntC::to_offset(hint_buffer.size());
2914
2915
            // Restore hint offset
2916
7.82k
            m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1);
2917
7.82k
            if (lin_pass1_file) {
2918
                // Write some debugging information
2919
0
                fprintf(
2920
0
                    lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str());
2921
0
                fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str());
2922
0
                fprintf(
2923
0
                    lin_pass1_file,
2924
0
                    "%% second_xref_offset=%s\n",
2925
0
                    std::to_string(second_xref_offset).c_str());
2926
0
                fprintf(
2927
0
                    lin_pass1_file,
2928
0
                    "%% second_xref_end=%s\n",
2929
0
                    std::to_string(second_xref_end).c_str());
2930
0
                fclose(lin_pass1_file);
2931
0
                lin_pass1_file = nullptr;
2932
0
            }
2933
7.82k
        }
2934
16.1k
    }
2935
9.06k
}
2936
2937
void
2938
QPDFWriter::enqueueObjectsStandard()
2939
0
{
2940
0
    if (m->preserve_unreferenced_objects) {
2941
0
        QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard");
2942
0
        for (auto const& oh: m->pdf.getAllObjects()) {
2943
0
            enqueueObject(oh);
2944
0
        }
2945
0
    }
2946
2947
    // Put root first on queue.
2948
0
    QPDFObjectHandle trailer = getTrimmedTrailer();
2949
0
    enqueueObject(trailer.getKey("/Root"));
2950
2951
    // Next place any other objects referenced from the trailer dictionary into the queue, handling
2952
    // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op.
2953
0
    for (auto& item: trailer.as_dictionary()) {
2954
0
        if (!item.second.null()) {
2955
0
            enqueueObject(item.second);
2956
0
        }
2957
0
    }
2958
0
}
2959
2960
void
2961
QPDFWriter::enqueueObjectsPCLm()
2962
0
{
2963
    // Image transform stream content for page strip images. Each of this new stream has to come
2964
    // after every page image strip written in the pclm file.
2965
0
    std::string image_transform_content = "q /image Do Q\n";
2966
2967
    // enqueue all pages first
2968
0
    std::vector<QPDFObjectHandle> all = m->pdf.getAllPages();
2969
0
    for (auto& page: all) {
2970
        // enqueue page
2971
0
        enqueueObject(page);
2972
2973
        // enqueue page contents stream
2974
0
        enqueueObject(page.getKey("/Contents"));
2975
2976
        // enqueue all the strips for each page
2977
0
        QPDFObjectHandle strips = page.getKey("/Resources").getKey("/XObject");
2978
0
        for (auto& image: strips.as_dictionary()) {
2979
0
            if (!image.second.null()) {
2980
0
                enqueueObject(image.second);
2981
0
                enqueueObject(QPDFObjectHandle::newStream(&m->pdf, image_transform_content));
2982
0
            }
2983
0
        }
2984
0
    }
2985
2986
    // Put root in queue.
2987
0
    QPDFObjectHandle trailer = getTrimmedTrailer();
2988
0
    enqueueObject(trailer.getKey("/Root"));
2989
0
}
2990
2991
void
2992
QPDFWriter::indicateProgress(bool decrement, bool finished)
2993
420k
{
2994
420k
    if (decrement) {
2995
88.4k
        --m->events_seen;
2996
88.4k
        return;
2997
88.4k
    }
2998
2999
331k
    ++m->events_seen;
3000
3001
331k
    if (!m->progress_reporter.get()) {
3002
331k
        return;
3003
331k
    }
3004
3005
0
    if (finished || (m->events_seen >= m->next_progress_report)) {
3006
0
        int percentage =
3007
0
            (finished ? 100
3008
0
                 : m->next_progress_report == 0
3009
0
                 ? 0
3010
0
                 : std::min(99, 1 + ((100 * m->events_seen) / m->events_expected)));
3011
0
        m->progress_reporter->reportProgress(percentage);
3012
0
    }
3013
0
    int increment = std::max(1, (m->events_expected / 100));
3014
0
    while (m->events_seen >= m->next_progress_report) {
3015
0
        m->next_progress_report += increment;
3016
0
    }
3017
0
}
3018
3019
void
3020
QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr)
3021
0
{
3022
0
    m->progress_reporter = pr;
3023
0
}
3024
3025
void
3026
QPDFWriter::writeStandard()
3027
0
{
3028
0
    auto pp_md5 = m->pipeline_stack.popper();
3029
0
    if (m->deterministic_id) {
3030
0
        m->pipeline_stack.activate_md5(pp_md5);
3031
0
    }
3032
3033
    // Start writing
3034
3035
0
    writeHeader();
3036
0
    write(m->extra_header_text);
3037
3038
0
    if (m->pclm) {
3039
0
        enqueueObjectsPCLm();
3040
0
    } else {
3041
0
        enqueueObjectsStandard();
3042
0
    }
3043
3044
    // Now start walking queue, outputting each object.
3045
0
    while (m->object_queue_front < m->object_queue.size()) {
3046
0
        QPDFObjectHandle cur_object = m->object_queue.at(m->object_queue_front);
3047
0
        ++m->object_queue_front;
3048
0
        writeObject(cur_object);
3049
0
    }
3050
3051
    // Write out the encryption dictionary, if any
3052
0
    if (m->encryption) {
3053
0
        writeEncryptionDictionary();
3054
0
    }
3055
3056
    // Now write out xref.  next_objid is now the number of objects.
3057
0
    qpdf_offset_t xref_offset = m->pipeline->getCount();
3058
0
    if (m->object_stream_to_objects.empty()) {
3059
        // Write regular cross-reference table
3060
0
        writeXRefTable(t_normal, 0, m->next_objid - 1, m->next_objid);
3061
0
    } else {
3062
        // Write cross-reference stream.
3063
0
        int xref_id = m->next_objid++;
3064
0
        writeXRefStream(
3065
0
            xref_id, xref_id, xref_offset, t_normal, 0, m->next_objid - 1, m->next_objid);
3066
0
    }
3067
0
    write("startxref\n").write(xref_offset).write("\n%%EOF\n");
3068
3069
0
    if (m->deterministic_id) {
3070
0
        QTC::TC(
3071
0
            "qpdf",
3072
0
            "QPDFWriter standard deterministic ID",
3073
0
            m->object_stream_to_objects.empty() ? 0 : 1);
3074
0
    }
3075
0
}