Coverage Report

Created: 2025-08-26 07:09

/src/qpdf/libqpdf/QPDFWriter.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/assert_debug.h>
2
3
#include <qpdf/qpdf-config.h> // include early for large file support
4
5
#include <qpdf/QPDFWriter_private.hh>
6
7
#include <qpdf/MD5.hh>
8
#include <qpdf/Pl_AES_PDF.hh>
9
#include <qpdf/Pl_Flate.hh>
10
#include <qpdf/Pl_MD5.hh>
11
#include <qpdf/Pl_PNGFilter.hh>
12
#include <qpdf/Pl_RC4.hh>
13
#include <qpdf/Pl_StdioFile.hh>
14
#include <qpdf/Pl_String.hh>
15
#include <qpdf/QIntC.hh>
16
#include <qpdf/QPDFObjectHandle_private.hh>
17
#include <qpdf/QPDFObject_private.hh>
18
#include <qpdf/QPDF_private.hh>
19
#include <qpdf/QTC.hh>
20
#include <qpdf/QUtil.hh>
21
#include <qpdf/RC4.hh>
22
#include <qpdf/Util.hh>
23
24
#include <algorithm>
25
#include <cstdlib>
26
#include <stdexcept>
27
28
using namespace std::literals;
29
using namespace qpdf;
30
31
QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default)
32
0
{
33
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
34
0
}
35
36
QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) :
37
0
    handler(handler)
38
0
{
39
0
}
40
41
QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT
42
                                                                  // (modernize-use-equals-default)
43
0
{
44
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
45
0
}
46
47
void
48
QPDFWriter::FunctionProgressReporter::reportProgress(int progress)
49
0
{
50
0
    handler(progress);
51
0
}
52
53
namespace
54
{
55
    class Pl_stack
56
    {
57
        // A pipeline Popper is normally returned by Pl_stack::activate, or, if necessary, a
58
        // reference to a Popper instance can be passed into activate. When the Popper goes out of
59
        // scope, the pipeline stack is popped. This causes finish to be called on the current
60
        // pipeline and the pipeline stack to be popped until the top of stack is a previous active
61
        // top of stack and restores the pipeline to that point. It deletes any pipelines that it
62
        // pops.
63
        class Popper
64
        {
65
            friend class Pl_stack;
66
67
          public:
68
            Popper() = default;
69
            Popper(Popper const&) = delete;
70
            Popper(Popper&& other) noexcept
71
0
            {
72
0
                // For MSVC, default pops the stack
73
0
                if (this != &other) {
74
0
                    stack = other.stack;
75
0
                    stack_id = other.stack_id;
76
0
                    other.stack = nullptr;
77
0
                    other.stack_id = 0;
78
0
                };
79
0
            }
80
            Popper& operator=(Popper const&) = delete;
81
            Popper&
82
            operator=(Popper&& other) noexcept
83
0
            {
84
0
                // For MSVC, default pops the stack
85
0
                if (this != &other) {
86
0
                    stack = other.stack;
87
0
                    stack_id = other.stack_id;
88
0
                    other.stack = nullptr;
89
0
                    other.stack_id = 0;
90
0
                };
91
0
                return *this;
92
0
            }
93
94
            ~Popper();
95
96
            // Manually pop pipeline from the pipeline stack.
97
            void pop();
98
99
          private:
100
            Popper(Pl_stack& stack) :
101
51.0k
                stack(&stack)
102
51.0k
            {
103
51.0k
            }
104
105
            Pl_stack* stack{nullptr};
106
            unsigned long stack_id{0};
107
        };
108
109
      public:
110
        Pl_stack(pl::Count*& top) :
111
9.33k
            top(top)
112
9.33k
        {
113
9.33k
        }
114
115
        Popper
116
        popper()
117
9.20k
        {
118
9.20k
            return {*this};
119
9.20k
        }
120
121
        void
122
        initialize(Pipeline* p)
123
9.33k
        {
124
9.33k
            auto c = std::make_unique<pl::Count>(++last_id, p);
125
9.33k
            top = c.get();
126
9.33k
            stack.emplace_back(std::move(c));
127
9.33k
        }
128
129
        Popper
130
        activate(std::string& str)
131
40.3k
        {
132
40.3k
            Popper pp{*this};
133
40.3k
            activate(pp, str);
134
40.3k
            return pp;
135
40.3k
        }
136
137
        void
138
        activate(Popper& pp, std::string& str)
139
40.3k
        {
140
40.3k
            activate(pp, false, &str, nullptr);
141
40.3k
        }
142
143
        void
144
        activate(Popper& pp, std::unique_ptr<Pipeline> next)
145
0
        {
146
0
            count_buffer.clear();
147
0
            activate(pp, false, &count_buffer, std::move(next));
148
0
        }
149
150
        Popper
151
        activate(
152
            bool discard = false,
153
            std::string* str = nullptr,
154
            std::unique_ptr<Pipeline> next = nullptr)
155
1.52k
        {
156
1.52k
            Popper pp{*this};
157
1.52k
            activate(pp, discard, str, std::move(next));
158
1.52k
            return pp;
159
1.52k
        }
160
161
        void
162
        activate(
163
            Popper& pp,
164
            bool discard = false,
165
            std::string* str = nullptr,
166
            std::unique_ptr<Pipeline> next = nullptr)
167
41.8k
        {
168
41.8k
            std::unique_ptr<pl::Count> c;
169
41.8k
            if (next) {
170
0
                c = std::make_unique<pl::Count>(++last_id, count_buffer, std::move(next));
171
41.8k
            } else if (discard) {
172
1.52k
                c = std::make_unique<pl::Count>(++last_id, nullptr);
173
40.3k
            } else if (!str) {
174
0
                c = std::make_unique<pl::Count>(++last_id, top);
175
40.3k
            } else {
176
40.3k
                c = std::make_unique<pl::Count>(++last_id, *str);
177
40.3k
            }
178
41.8k
            pp.stack_id = last_id;
179
41.8k
            top = c.get();
180
41.8k
            stack.emplace_back(std::move(c));
181
41.8k
        }
182
        void
183
        activate_md5(Popper& pp)
184
9.20k
        {
185
9.20k
            qpdf_assert_debug(!md5_pipeline);
186
9.20k
            qpdf_assert_debug(md5_id == 0);
187
9.20k
            qpdf_assert_debug(top->getCount() == 0);
188
9.20k
            md5_pipeline = std::make_unique<Pl_MD5>("qpdf md5", top);
189
9.20k
            md5_pipeline->persistAcrossFinish(true);
190
            // Special case code in pop clears m->md5_pipeline upon deletion.
191
9.20k
            auto c = std::make_unique<pl::Count>(++last_id, md5_pipeline.get());
192
9.20k
            pp.stack_id = last_id;
193
9.20k
            md5_id = last_id;
194
9.20k
            top = c.get();
195
9.20k
            stack.emplace_back(std::move(c));
196
9.20k
        }
197
198
        // Return the hex digest and disable the MD5 pipeline.
199
        std::string
200
        hex_digest()
201
9.01k
        {
202
9.01k
            qpdf_assert_debug(md5_pipeline);
203
9.01k
            auto digest = md5_pipeline->getHexDigest();
204
9.01k
            md5_pipeline->enable(false);
205
9.01k
            return digest;
206
9.01k
        }
207
208
        void
209
        clear_buffer()
210
0
        {
211
0
            count_buffer.clear();
212
0
        }
213
214
      private:
215
        void
216
        pop(unsigned long stack_id)
217
51.0k
        {
218
51.0k
            if (!stack_id) {
219
0
                return;
220
0
            }
221
51.0k
            qpdf_assert_debug(stack.size() >= 2);
222
51.0k
            top->finish();
223
51.0k
            qpdf_assert_debug(stack.back().get() == top);
224
            // It used to be possible for this assertion to fail if writeLinearized exits by
225
            // exception when deterministic ID. There are no longer any cases in which two
226
            // dynamically allocated pipeline Popper objects ever exist at the same time, so the
227
            // assertion will fail if they get popped out of order from automatic destruction.
228
51.0k
            qpdf_assert_debug(top->id() == stack_id);
229
51.0k
            if (stack_id == md5_id) {
230
9.20k
                md5_pipeline = nullptr;
231
9.20k
                md5_id = 0;
232
9.20k
            }
233
51.0k
            stack.pop_back();
234
51.0k
            top = stack.back().get();
235
51.0k
        }
236
237
        std::vector<std::unique_ptr<pl::Count>> stack;
238
        pl::Count*& top;
239
        std::unique_ptr<Pl_MD5> md5_pipeline{nullptr};
240
        unsigned long last_id{0};
241
        unsigned long md5_id{0};
242
        std::string count_buffer;
243
    };
244
} // namespace
245
246
Pl_stack::Popper::~Popper()
247
51.0k
{
248
51.0k
    if (stack) {
249
51.0k
        stack->pop(stack_id);
250
51.0k
    }
251
51.0k
}
252
253
void
254
Pl_stack::Popper::pop()
255
0
{
256
0
    if (stack) {
257
0
        stack->pop(stack_id);
258
0
    }
259
0
    stack_id = 0;
260
0
    stack = nullptr;
261
0
}
262
263
class QPDFWriter::Members
264
{
265
    friend class QPDFWriter;
266
267
  public:
268
    ~Members();
269
270
  private:
271
    Members(QPDF& pdf);
272
    Members(Members const&) = delete;
273
274
    QPDF& pdf;
275
    QPDFObjGen root_og{-1, 0};
276
    char const* filename{"unspecified"};
277
    FILE* file{nullptr};
278
    bool close_file{false};
279
    std::unique_ptr<Pl_Buffer> buffer_pipeline{nullptr};
280
    Buffer* output_buffer{nullptr};
281
    bool normalize_content_set{false};
282
    bool normalize_content{false};
283
    bool compress_streams{true};
284
    bool compress_streams_set{false};
285
    qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_generalized};
286
    bool stream_decode_level_set{false};
287
    bool recompress_flate{false};
288
    bool qdf_mode{false};
289
    bool preserve_unreferenced_objects{false};
290
    bool newline_before_endstream{false};
291
    bool static_id{false};
292
    bool suppress_original_object_ids{false};
293
    bool direct_stream_lengths{true};
294
    bool preserve_encryption{true};
295
    bool linearized{false};
296
    bool pclm{false};
297
    qpdf_object_stream_e object_stream_mode{qpdf_o_preserve};
298
299
    std::unique_ptr<QPDF::EncryptionData> encryption;
300
    std::string encryption_key;
301
    bool encrypt_use_aes{false};
302
303
    std::string id1; // for /ID key of
304
    std::string id2; // trailer dictionary
305
    std::string final_pdf_version;
306
    int final_extension_level{0};
307
    std::string min_pdf_version;
308
    int min_extension_level{0};
309
    std::string forced_pdf_version;
310
    int forced_extension_level{0};
311
    std::string extra_header_text;
312
    int encryption_dict_objid{0};
313
    std::string cur_data_key;
314
    std::unique_ptr<Pipeline> file_pl;
315
    qpdf::pl::Count* pipeline{nullptr};
316
    std::vector<QPDFObjectHandle> object_queue;
317
    size_t object_queue_front{0};
318
    QPDFWriter::ObjTable obj;
319
    QPDFWriter::NewObjTable new_obj;
320
    int next_objid{1};
321
    int cur_stream_length_id{0};
322
    size_t cur_stream_length{0};
323
    bool added_newline{false};
324
    size_t max_ostream_index{0};
325
    std::set<QPDFObjGen> normalized_streams;
326
    std::map<QPDFObjGen, int> page_object_to_seq;
327
    std::map<QPDFObjGen, int> contents_to_page_seq;
328
    std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects;
329
    Pl_stack pipeline_stack;
330
    bool deterministic_id{false};
331
    std::string deterministic_id_data;
332
    bool did_write_setup{false};
333
334
    // For linearization only
335
    std::string lin_pass1_filename;
336
337
    // For progress reporting
338
    std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter;
339
    int events_expected{0};
340
    int events_seen{0};
341
    int next_progress_report{0};
342
};
343
344
QPDFWriter::Members::Members(QPDF& pdf) :
345
9.61k
    pdf(pdf),
346
9.61k
    root_og(pdf.getRoot().getObjGen().isIndirect() ? pdf.getRoot().getObjGen() : QPDFObjGen(-1, 0)),
347
9.61k
    pipeline_stack(pipeline)
348
9.61k
{
349
9.61k
}
350
351
QPDFWriter::Members::~Members()
352
9.33k
{
353
9.33k
    if (file && close_file) {
354
0
        fclose(file);
355
0
    }
356
9.33k
    delete output_buffer;
357
9.33k
}
358
359
QPDFWriter::QPDFWriter(QPDF& pdf) :
360
9.61k
    m(new Members(pdf))
361
9.61k
{
362
9.61k
}
363
364
QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) :
365
0
    m(new Members(pdf))
366
0
{
367
0
    setOutputFilename(filename);
368
0
}
369
370
QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) :
371
0
    m(new Members(pdf))
372
0
{
373
0
    setOutputFile(description, file, close_file);
374
0
}
375
376
void
377
QPDFWriter::setOutputFilename(char const* filename)
378
0
{
379
0
    char const* description = filename;
380
0
    FILE* f = nullptr;
381
0
    bool close_file = false;
382
0
    if (filename == nullptr) {
383
0
        description = "standard output";
384
0
        QTC::TC("qpdf", "QPDFWriter write to stdout");
385
0
        f = stdout;
386
0
        QUtil::binary_stdout();
387
0
    } else {
388
0
        QTC::TC("qpdf", "QPDFWriter write to file");
389
0
        f = QUtil::safe_fopen(filename, "wb+");
390
0
        close_file = true;
391
0
    }
392
0
    setOutputFile(description, f, close_file);
393
0
}
394
395
void
396
QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file)
397
0
{
398
0
    m->filename = description;
399
0
    m->file = file;
400
0
    m->close_file = close_file;
401
0
    m->file_pl = std::make_unique<Pl_StdioFile>("qpdf output", file);
402
0
    m->pipeline_stack.initialize(m->file_pl.get());
403
0
}
404
405
void
406
QPDFWriter::setOutputMemory()
407
0
{
408
0
    m->filename = "memory buffer";
409
0
    m->buffer_pipeline = std::make_unique<Pl_Buffer>("qpdf output");
410
0
    m->pipeline_stack.initialize(m->buffer_pipeline.get());
411
0
}
412
413
Buffer*
414
QPDFWriter::getBuffer()
415
0
{
416
0
    Buffer* result = m->output_buffer;
417
0
    m->output_buffer = nullptr;
418
0
    return result;
419
0
}
420
421
std::shared_ptr<Buffer>
422
QPDFWriter::getBufferSharedPointer()
423
0
{
424
0
    return std::shared_ptr<Buffer>(getBuffer());
425
0
}
426
427
void
428
QPDFWriter::setOutputPipeline(Pipeline* p)
429
9.33k
{
430
9.33k
    m->filename = "custom pipeline";
431
9.33k
    m->pipeline_stack.initialize(p);
432
9.33k
}
433
434
void
435
QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode)
436
0
{
437
0
    m->object_stream_mode = mode;
438
0
}
439
440
void
441
QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode)
442
0
{
443
0
    switch (mode) {
444
0
    case qpdf_s_uncompress:
445
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
446
0
        m->compress_streams = false;
447
0
        break;
448
449
0
    case qpdf_s_preserve:
450
0
        m->stream_decode_level = qpdf_dl_none;
451
0
        m->compress_streams = false;
452
0
        break;
453
454
0
    case qpdf_s_compress:
455
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
456
0
        m->compress_streams = true;
457
0
        break;
458
0
    }
459
0
    m->stream_decode_level_set = true;
460
0
    m->compress_streams_set = true;
461
0
}
462
463
void
464
QPDFWriter::setCompressStreams(bool val)
465
0
{
466
0
    m->compress_streams = val;
467
0
    m->compress_streams_set = true;
468
0
}
469
470
void
471
QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
472
9.33k
{
473
9.33k
    m->stream_decode_level = val;
474
9.33k
    m->stream_decode_level_set = true;
475
9.33k
}
476
477
void
478
QPDFWriter::setRecompressFlate(bool val)
479
0
{
480
0
    m->recompress_flate = val;
481
0
}
482
483
void
484
QPDFWriter::setContentNormalization(bool val)
485
0
{
486
0
    m->normalize_content_set = true;
487
0
    m->normalize_content = val;
488
0
}
489
490
void
491
QPDFWriter::setQDFMode(bool val)
492
9.33k
{
493
9.33k
    m->qdf_mode = val;
494
9.33k
}
495
496
void
497
QPDFWriter::setPreserveUnreferencedObjects(bool val)
498
0
{
499
0
    m->preserve_unreferenced_objects = val;
500
0
}
501
502
void
503
QPDFWriter::setNewlineBeforeEndstream(bool val)
504
0
{
505
0
    m->newline_before_endstream = val;
506
0
}
507
508
void
509
QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level)
510
9.65k
{
511
9.65k
    bool set_version = false;
512
9.65k
    bool set_extension_level = false;
513
9.65k
    if (m->min_pdf_version.empty()) {
514
9.25k
        set_version = true;
515
9.25k
        set_extension_level = true;
516
9.25k
    } else {
517
394
        int old_major = 0;
518
394
        int old_minor = 0;
519
394
        int min_major = 0;
520
394
        int min_minor = 0;
521
394
        parseVersion(version, old_major, old_minor);
522
394
        parseVersion(m->min_pdf_version, min_major, min_minor);
523
394
        int compare = compareVersions(old_major, old_minor, min_major, min_minor);
524
394
        if (compare > 0) {
525
104
            QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1);
526
104
            set_version = true;
527
104
            set_extension_level = true;
528
290
        } else if (compare == 0) {
529
4
            if (extension_level > m->min_extension_level) {
530
1
                QTC::TC("qpdf", "QPDFWriter increasing extension level");
531
1
                set_extension_level = true;
532
1
            }
533
4
        }
534
394
    }
535
536
9.65k
    if (set_version) {
537
9.36k
        m->min_pdf_version = version;
538
9.36k
    }
539
9.65k
    if (set_extension_level) {
540
9.36k
        m->min_extension_level = extension_level;
541
9.36k
    }
542
9.65k
}
543
544
void
545
QPDFWriter::setMinimumPDFVersion(PDFVersion const& v)
546
0
{
547
0
    std::string version;
548
0
    int extension_level;
549
0
    v.getVersion(version, extension_level);
550
0
    setMinimumPDFVersion(version, extension_level);
551
0
}
552
553
void
554
QPDFWriter::forcePDFVersion(std::string const& version, int extension_level)
555
0
{
556
0
    m->forced_pdf_version = version;
557
0
    m->forced_extension_level = extension_level;
558
0
}
559
560
void
561
QPDFWriter::setExtraHeaderText(std::string const& text)
562
0
{
563
0
    m->extra_header_text = text;
564
0
    if (!m->extra_header_text.empty() && *m->extra_header_text.rbegin() != '\n') {
565
0
        QTC::TC("qpdf", "QPDFWriter extra header text add newline");
566
0
        m->extra_header_text += "\n";
567
0
    } else {
568
0
        QTC::TC("qpdf", "QPDFWriter extra header text no newline");
569
0
    }
570
0
}
571
572
void
573
QPDFWriter::setStaticID(bool val)
574
0
{
575
0
    m->static_id = val;
576
0
}
577
578
void
579
QPDFWriter::setDeterministicID(bool val)
580
9.33k
{
581
9.33k
    m->deterministic_id = val;
582
9.33k
}
583
584
void
585
QPDFWriter::setStaticAesIV(bool val)
586
0
{
587
0
    if (val) {
588
0
        Pl_AES_PDF::useStaticIV();
589
0
    }
590
0
}
591
592
void
593
QPDFWriter::setSuppressOriginalObjectIDs(bool val)
594
0
{
595
0
    m->suppress_original_object_ids = val;
596
0
}
597
598
void
599
QPDFWriter::setPreserveEncryption(bool val)
600
0
{
601
0
    m->preserve_encryption = val;
602
0
}
603
604
void
605
QPDFWriter::setLinearization(bool val)
606
0
{
607
0
    m->linearized = val;
608
0
    if (val) {
609
0
        m->pclm = false;
610
0
    }
611
0
}
612
613
void
614
QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
615
0
{
616
0
    m->lin_pass1_filename = filename;
617
0
}
618
619
void
620
QPDFWriter::setPCLm(bool val)
621
0
{
622
0
    m->pclm = val;
623
0
    if (val) {
624
0
        m->linearized = false;
625
0
    }
626
0
}
627
628
void
629
QPDFWriter::setR2EncryptionParametersInsecure(
630
    char const* user_password,
631
    char const* owner_password,
632
    bool allow_print,
633
    bool allow_modify,
634
    bool allow_extract,
635
    bool allow_annotate)
636
0
{
637
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(1, 2, 5, true);
638
0
    if (!allow_print) {
639
0
        m->encryption->setP(3, false);
640
0
    }
641
0
    if (!allow_modify) {
642
0
        m->encryption->setP(4, false);
643
0
    }
644
0
    if (!allow_extract) {
645
0
        m->encryption->setP(5, false);
646
0
    }
647
0
    if (!allow_annotate) {
648
0
        m->encryption->setP(6, false);
649
0
    }
650
0
    setEncryptionParameters(user_password, owner_password);
651
0
}
652
653
void
654
QPDFWriter::setR3EncryptionParametersInsecure(
655
    char const* user_password,
656
    char const* owner_password,
657
    bool allow_accessibility,
658
    bool allow_extract,
659
    bool allow_assemble,
660
    bool allow_annotate_and_form,
661
    bool allow_form_filling,
662
    bool allow_modify_other,
663
    qpdf_r3_print_e print)
664
0
{
665
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(2, 3, 16, true);
666
0
    interpretR3EncryptionParameters(
667
0
        allow_accessibility,
668
0
        allow_extract,
669
0
        allow_assemble,
670
0
        allow_annotate_and_form,
671
0
        allow_form_filling,
672
0
        allow_modify_other,
673
0
        print,
674
0
        qpdf_r3m_all);
675
0
    setEncryptionParameters(user_password, owner_password);
676
0
}
677
678
void
679
QPDFWriter::setR4EncryptionParametersInsecure(
680
    char const* user_password,
681
    char const* owner_password,
682
    bool allow_accessibility,
683
    bool allow_extract,
684
    bool allow_assemble,
685
    bool allow_annotate_and_form,
686
    bool allow_form_filling,
687
    bool allow_modify_other,
688
    qpdf_r3_print_e print,
689
    bool encrypt_metadata,
690
    bool use_aes)
691
0
{
692
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(4, 4, 16, encrypt_metadata);
693
0
    m->encrypt_use_aes = use_aes;
694
0
    interpretR3EncryptionParameters(
695
0
        allow_accessibility,
696
0
        allow_extract,
697
0
        allow_assemble,
698
0
        allow_annotate_and_form,
699
0
        allow_form_filling,
700
0
        allow_modify_other,
701
0
        print,
702
0
        qpdf_r3m_all);
703
0
    setEncryptionParameters(user_password, owner_password);
704
0
}
705
706
void
707
QPDFWriter::setR5EncryptionParameters(
708
    char const* user_password,
709
    char const* owner_password,
710
    bool allow_accessibility,
711
    bool allow_extract,
712
    bool allow_assemble,
713
    bool allow_annotate_and_form,
714
    bool allow_form_filling,
715
    bool allow_modify_other,
716
    qpdf_r3_print_e print,
717
    bool encrypt_metadata)
718
0
{
719
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(5, 5, 32, encrypt_metadata);
720
0
    m->encrypt_use_aes = true;
721
0
    interpretR3EncryptionParameters(
722
0
        allow_accessibility,
723
0
        allow_extract,
724
0
        allow_assemble,
725
0
        allow_annotate_and_form,
726
0
        allow_form_filling,
727
0
        allow_modify_other,
728
0
        print,
729
0
        qpdf_r3m_all);
730
0
    setEncryptionParameters(user_password, owner_password);
731
0
}
732
733
void
734
QPDFWriter::setR6EncryptionParameters(
735
    char const* user_password,
736
    char const* owner_password,
737
    bool allow_accessibility,
738
    bool allow_extract,
739
    bool allow_assemble,
740
    bool allow_annotate_and_form,
741
    bool allow_form_filling,
742
    bool allow_modify_other,
743
    qpdf_r3_print_e print,
744
    bool encrypt_metadata)
745
0
{
746
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(5, 6, 32, encrypt_metadata);
747
0
    interpretR3EncryptionParameters(
748
0
        allow_accessibility,
749
0
        allow_extract,
750
0
        allow_assemble,
751
0
        allow_annotate_and_form,
752
0
        allow_form_filling,
753
0
        allow_modify_other,
754
0
        print,
755
0
        qpdf_r3m_all);
756
0
    m->encrypt_use_aes = true;
757
0
    setEncryptionParameters(user_password, owner_password);
758
0
}
759
760
void
761
QPDFWriter::interpretR3EncryptionParameters(
762
    bool allow_accessibility,
763
    bool allow_extract,
764
    bool allow_assemble,
765
    bool allow_annotate_and_form,
766
    bool allow_form_filling,
767
    bool allow_modify_other,
768
    qpdf_r3_print_e print,
769
    qpdf_r3_modify_e modify)
770
0
{
771
    // Acrobat 5 security options:
772
773
    // Checkboxes:
774
    //   Enable Content Access for the Visually Impaired
775
    //   Allow Content Copying and Extraction
776
777
    // Allowed changes menu:
778
    //   None
779
    //   Only Document Assembly
780
    //   Only Form Field Fill-in or Signing
781
    //   Comment Authoring, Form Field Fill-in or Signing
782
    //   General Editing, Comment and Form Field Authoring
783
784
    // Allowed printing menu:
785
    //   None
786
    //   Low Resolution
787
    //   Full printing
788
789
    // Meanings of bits in P when R >= 3
790
    //
791
    //  3: low-resolution printing
792
    //  4: document modification except as controlled by 6, 9, and 11
793
    //  5: extraction
794
    //  6: add/modify annotations (comment), fill in forms
795
    //     if 4+6 are set, also allows modification of form fields
796
    //  9: fill in forms even if 6 is clear
797
    // 10: accessibility; ignored by readers, should always be set
798
    // 11: document assembly even if 4 is clear
799
    // 12: high-resolution printing
800
0
    if (!allow_accessibility && m->encryption->getR() <= 3) {
801
        // Bit 10 is deprecated and should always be set.  This used to mean accessibility.  There
802
        // is no way to disable accessibility with R > 3.
803
0
        m->encryption->setP(10, false);
804
0
    }
805
0
    if (!allow_extract) {
806
0
        m->encryption->setP(5, false);
807
0
    }
808
809
0
    switch (print) {
810
0
    case qpdf_r3p_none:
811
0
        m->encryption->setP(3, false); // any printing
812
0
        [[fallthrough]];
813
0
    case qpdf_r3p_low:
814
0
        m->encryption->setP(12, false); // high resolution printing
815
0
        [[fallthrough]];
816
0
    case qpdf_r3p_full:
817
0
        break;
818
        // no default so gcc warns for missing cases
819
0
    }
820
821
    // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full
822
    // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're
823
    // stuck with it. See also allow checks below to control the bits individually.
824
825
    // NOT EXERCISED IN TEST SUITE
826
0
    switch (modify) {
827
0
    case qpdf_r3m_none:
828
0
        m->encryption->setP(11, false); // document assembly
829
0
        [[fallthrough]];
830
0
    case qpdf_r3m_assembly:
831
0
        m->encryption->setP(9, false); // filling in form fields
832
0
        [[fallthrough]];
833
0
    case qpdf_r3m_form:
834
0
        m->encryption->setP(6, false); // modify annotations, fill in form fields
835
0
        [[fallthrough]];
836
0
    case qpdf_r3m_annotate:
837
0
        m->encryption->setP(4, false); // other modifications
838
0
        [[fallthrough]];
839
0
    case qpdf_r3m_all:
840
0
        break;
841
        // no default so gcc warns for missing cases
842
0
    }
843
    // END NOT EXERCISED IN TEST SUITE
844
845
0
    if (!allow_assemble) {
846
0
        m->encryption->setP(11, false);
847
0
    }
848
0
    if (!allow_annotate_and_form) {
849
0
        m->encryption->setP(6, false);
850
0
    }
851
0
    if (!allow_form_filling) {
852
0
        m->encryption->setP(9, false);
853
0
    }
854
0
    if (!allow_modify_other) {
855
0
        m->encryption->setP(4, false);
856
0
    }
857
0
}
858
859
void
860
QPDFWriter::setEncryptionParameters(char const* user_password, char const* owner_password)
861
0
{
862
0
    generateID(true);
863
0
    m->encryption->setId1(m->id1);
864
0
    m->encryption_key = m->encryption->compute_parameters(user_password, owner_password);
865
0
    setEncryptionMinimumVersion();
866
0
}
867
868
void
869
QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
870
0
{
871
0
    m->preserve_encryption = false;
872
0
    QPDFObjectHandle trailer = qpdf.getTrailer();
873
0
    if (trailer.hasKey("/Encrypt")) {
874
0
        generateID(true);
875
0
        m->id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue();
876
0
        QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
877
0
        int V = encrypt.getKey("/V").getIntValueAsInt();
878
0
        int key_len = 5;
879
0
        if (V > 1) {
880
0
            key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8;
881
0
        }
882
0
        const bool encrypt_metadata =
883
0
            encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool()
884
0
            ? encrypt.getKey("/EncryptMetadata").getBoolValue()
885
0
            : true;
886
0
        if (V >= 4) {
887
            // When copying encryption parameters, use AES even if the original file did not.
888
            // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of
889
            // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF
890
            // all potentially having different values.
891
0
            m->encrypt_use_aes = true;
892
0
        }
893
0
        QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", encrypt_metadata ? 0 : 1);
894
0
        QTC::TC("qpdf", "QPDFWriter copy use_aes", m->encrypt_use_aes ? 0 : 1);
895
896
0
        m->encryption = std::make_unique<QPDF::EncryptionData>(
897
0
            V,
898
0
            encrypt.getKey("/R").getIntValueAsInt(),
899
0
            key_len,
900
0
            static_cast<int>(encrypt.getKey("/P").getIntValue()),
901
0
            encrypt.getKey("/O").getStringValue(),
902
0
            encrypt.getKey("/U").getStringValue(),
903
0
            V < 5 ? "" : encrypt.getKey("/OE").getStringValue(),
904
0
            V < 5 ? "" : encrypt.getKey("/UE").getStringValue(),
905
0
            V < 5 ? "" : encrypt.getKey("/Perms").getStringValue(),
906
0
            m->id1, // m->id1 == the other file's id1
907
0
            encrypt_metadata);
908
0
        m->encryption_key = V >= 5
909
0
            ? qpdf.getEncryptionKey()
910
0
            : m->encryption->compute_encryption_key(qpdf.getPaddedUserPassword());
911
0
        setEncryptionMinimumVersion();
912
0
    }
913
0
}
914
915
void
916
QPDFWriter::disableIncompatibleEncryption(int major, int minor, int extension_level)
917
0
{
918
0
    if (!m->encryption) {
919
0
        return;
920
0
    }
921
0
    if (compareVersions(major, minor, 1, 3) < 0) {
922
0
        m->encryption = nullptr;
923
0
        return;
924
0
    }
925
0
    int V = m->encryption->getV();
926
0
    int R = m->encryption->getR();
927
0
    if (compareVersions(major, minor, 1, 4) < 0) {
928
0
        if (V > 1 || R > 2) {
929
0
            m->encryption = nullptr;
930
0
        }
931
0
    } else if (compareVersions(major, minor, 1, 5) < 0) {
932
0
        if (V > 2 || R > 3) {
933
0
            m->encryption = nullptr;
934
0
        }
935
0
    } else if (compareVersions(major, minor, 1, 6) < 0) {
936
0
        if (m->encrypt_use_aes) {
937
0
            m->encryption = nullptr;
938
0
        }
939
0
    } else if (
940
0
        (compareVersions(major, minor, 1, 7) < 0) ||
941
0
        ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) {
942
0
        if (V >= 5 || R >= 5) {
943
0
            m->encryption = nullptr;
944
0
        }
945
0
    }
946
947
0
    if (!m->encryption) {
948
0
        QTC::TC("qpdf", "QPDFWriter forced version disabled encryption");
949
0
    }
950
0
}
951
952
void
953
QPDFWriter::parseVersion(std::string const& version, int& major, int& minor) const
954
782
{
955
782
    major = QUtil::string_to_int(version.c_str());
956
782
    minor = 0;
957
782
    size_t p = version.find('.');
958
782
    if ((p != std::string::npos) && (version.length() > p)) {
959
782
        minor = QUtil::string_to_int(version.substr(p + 1).c_str());
960
782
    }
961
782
    std::string tmp = std::to_string(major) + "." + std::to_string(minor);
962
782
    if (tmp != version) {
963
        // The version number in the input is probably invalid. This happens with some files that
964
        // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately
965
        // QPDFWriter doesn't have a way to give a warning, so we just ignore this case.
966
22
    }
967
782
}
968
969
int
970
QPDFWriter::compareVersions(int major1, int minor1, int major2, int minor2) const
971
388
{
972
388
    if (major1 < major2) {
973
3
        return -1;
974
385
    } else if (major1 > major2) {
975
7
        return 1;
976
378
    } else if (minor1 < minor2) {
977
277
        return -1;
978
277
    } else if (minor1 > minor2) {
979
97
        return 1;
980
97
    } else {
981
4
        return 0;
982
4
    }
983
388
}
984
985
void
986
QPDFWriter::setEncryptionMinimumVersion()
987
0
{
988
0
    auto const R = m->encryption->getR();
989
0
    if (R >= 6) {
990
0
        setMinimumPDFVersion("1.7", 8);
991
0
    } else if (R == 5) {
992
0
        setMinimumPDFVersion("1.7", 3);
993
0
    } else if (R == 4) {
994
0
        setMinimumPDFVersion(m->encrypt_use_aes ? "1.6" : "1.5");
995
0
    } else if (R == 3) {
996
0
        setMinimumPDFVersion("1.4");
997
0
    } else {
998
0
        setMinimumPDFVersion("1.3");
999
0
    }
1000
0
}
1001
1002
void
1003
QPDFWriter::setDataKey(int objid)
1004
82.0k
{
1005
82.0k
    if (m->encryption) {
1006
0
        m->cur_data_key = QPDF::compute_data_key(
1007
0
            m->encryption_key,
1008
0
            objid,
1009
0
            0,
1010
0
            m->encrypt_use_aes,
1011
0
            m->encryption->getV(),
1012
0
            m->encryption->getR());
1013
0
    }
1014
82.0k
}
1015
1016
unsigned int
1017
QPDFWriter::bytesNeeded(long long n)
1018
1.03k
{
1019
1.03k
    unsigned int bytes = 0;
1020
2.35k
    while (n) {
1021
1.32k
        ++bytes;
1022
1.32k
        n >>= 8;
1023
1.32k
    }
1024
1.03k
    return bytes;
1025
1.03k
}
1026
1027
void
1028
QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes)
1029
137k
{
1030
137k
    if (bytes > sizeof(unsigned long long)) {
1031
0
        throw std::logic_error("QPDFWriter::writeBinary called with too many bytes");
1032
0
    }
1033
137k
    unsigned char data[sizeof(unsigned long long)];
1034
344k
    for (unsigned int i = 0; i < bytes; ++i) {
1035
206k
        data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff);
1036
206k
        val >>= 8;
1037
206k
    }
1038
137k
    m->pipeline->write(data, bytes);
1039
137k
}
1040
1041
QPDFWriter&
1042
QPDFWriter::write(std::string_view str)
1043
18.0M
{
1044
18.0M
    m->pipeline->write(str);
1045
18.0M
    return *this;
1046
18.0M
}
1047
1048
QPDFWriter&
1049
QPDFWriter::write(std::integral auto val)
1050
507k
{
1051
507k
    m->pipeline->write(std::to_string(val));
1052
507k
    return *this;
1053
507k
}
_ZN10QPDFWriter5writeITkNSt3__18integralEiEERS_T_
Line
Count
Source
1050
413k
{
1051
413k
    m->pipeline->write(std::to_string(val));
1052
413k
    return *this;
1053
413k
}
_ZN10QPDFWriter5writeITkNSt3__18integralExEERS_T_
Line
Count
Source
1050
64.5k
{
1051
64.5k
    m->pipeline->write(std::to_string(val));
1052
64.5k
    return *this;
1053
64.5k
}
_ZN10QPDFWriter5writeITkNSt3__18integralEmEERS_T_
Line
Count
Source
1050
29.2k
{
1051
29.2k
    m->pipeline->write(std::to_string(val));
1052
29.2k
    return *this;
1053
29.2k
}
_ZN10QPDFWriter5writeITkNSt3__18integralEjEERS_T_
Line
Count
Source
1050
692
{
1051
692
    m->pipeline->write(std::to_string(val));
1052
692
    return *this;
1053
692
}
1054
1055
QPDFWriter&
1056
QPDFWriter::write(size_t count, char c)
1057
0
{
1058
0
    m->pipeline->write(count, c);
1059
0
    return *this;
1060
0
}
1061
1062
QPDFWriter&
1063
QPDFWriter::write_name(std::string const& str)
1064
371k
{
1065
371k
    m->pipeline->write(Name::normalize(str));
1066
371k
    return *this;
1067
371k
}
1068
1069
QPDFWriter&
1070
QPDFWriter::write_string(std::string const& str, bool force_binary)
1071
17.8k
{
1072
17.8k
    m->pipeline->write(QPDF_String(str).unparse(force_binary));
1073
17.8k
    return *this;
1074
17.8k
}
1075
1076
template <typename... Args>
1077
QPDFWriter&
1078
QPDFWriter::write_qdf(Args&&... args)
1079
237k
{
1080
237k
    if (m->qdf_mode) {
1081
237k
        m->pipeline->write(std::forward<Args>(args)...);
1082
237k
    }
1083
237k
    return *this;
1084
237k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1079
195k
{
1080
195k
    if (m->qdf_mode) {
1081
195k
        m->pipeline->write(std::forward<Args>(args)...);
1082
195k
    }
1083
195k
    return *this;
1084
195k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [3]>(char const (&) [3])
Line
Count
Source
1079
24.0k
{
1080
24.0k
    if (m->qdf_mode) {
1081
24.0k
        m->pipeline->write(std::forward<Args>(args)...);
1082
24.0k
    }
1083
24.0k
    return *this;
1084
24.0k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1079
8.90k
{
1080
8.90k
    if (m->qdf_mode) {
1081
8.90k
        m->pipeline->write(std::forward<Args>(args)...);
1082
8.90k
    }
1083
8.90k
    return *this;
1084
8.90k
}
QPDFWriter& QPDFWriter::write_qdf<char const (&) [11]>(char const (&) [11])
Line
Count
Source
1079
9.20k
{
1080
9.20k
    if (m->qdf_mode) {
1081
9.20k
        m->pipeline->write(std::forward<Args>(args)...);
1082
9.20k
    }
1083
9.20k
    return *this;
1084
9.20k
}
1085
1086
template <typename... Args>
1087
QPDFWriter&
1088
QPDFWriter::write_no_qdf(Args&&... args)
1089
78.1k
{
1090
78.1k
    if (!m->qdf_mode) {
1091
0
        m->pipeline->write(std::forward<Args>(args)...);
1092
0
    }
1093
78.1k
    return *this;
1094
78.1k
}
QPDFWriter& QPDFWriter::write_no_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1089
69.2k
{
1090
69.2k
    if (!m->qdf_mode) {
1091
0
        m->pipeline->write(std::forward<Args>(args)...);
1092
0
    }
1093
69.2k
    return *this;
1094
69.2k
}
QPDFWriter& QPDFWriter::write_no_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1089
8.90k
{
1090
8.90k
    if (!m->qdf_mode) {
1091
0
        m->pipeline->write(std::forward<Args>(args)...);
1092
0
    }
1093
8.90k
    return *this;
1094
8.90k
}
1095
1096
void
1097
QPDFWriter::adjustAESStreamLength(size_t& length)
1098
27.4k
{
1099
27.4k
    if (m->encryption && !m->cur_data_key.empty() && m->encrypt_use_aes) {
1100
        // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16.  It will
1101
        // also be prepended by 16 bits of random data.
1102
0
        length += 32 - (length & 0xf);
1103
0
    }
1104
27.4k
}
1105
1106
QPDFWriter&
1107
QPDFWriter::write_encrypted(std::string_view str)
1108
27.3k
{
1109
27.3k
    if (!(m->encryption && !m->cur_data_key.empty())) {
1110
27.3k
        write(str);
1111
27.3k
    } else if (m->encrypt_use_aes) {
1112
0
        write(pl::pipe<Pl_AES_PDF>(str, true, m->cur_data_key));
1113
0
    } else {
1114
0
        write(pl::pipe<Pl_RC4>(str, m->cur_data_key));
1115
0
    }
1116
1117
27.3k
    return *this;
1118
27.3k
}
1119
1120
void
1121
QPDFWriter::computeDeterministicIDData()
1122
9.01k
{
1123
9.01k
    if (!m->id2.empty()) {
1124
        // Can't happen in the code
1125
0
        throw std::logic_error(
1126
0
            "Deterministic ID computation enabled after ID generation has already occurred.");
1127
0
    }
1128
9.01k
    qpdf_assert_debug(m->deterministic_id_data.empty());
1129
9.01k
    m->deterministic_id_data = m->pipeline_stack.hex_digest();
1130
9.01k
}
1131
1132
int
1133
QPDFWriter::openObject(int objid)
1134
108k
{
1135
108k
    if (objid == 0) {
1136
0
        objid = m->next_objid++;
1137
0
    }
1138
108k
    m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount());
1139
108k
    write(objid).write(" 0 obj\n");
1140
108k
    return objid;
1141
108k
}
1142
1143
void
1144
QPDFWriter::closeObject(int objid)
1145
108k
{
1146
    // Write a newline before endobj as it makes the file easier to repair.
1147
108k
    write("\nendobj\n").write_qdf("\n");
1148
108k
    auto& new_obj = m->new_obj[objid];
1149
108k
    new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset();
1150
108k
}
1151
1152
void
1153
QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen og)
1154
1.52k
{
1155
1.52k
    int objid = og.getObj();
1156
1.52k
    if ((og.getGen() != 0) || (!m->object_stream_to_objects.contains(objid))) {
1157
        // This is not an object stream.
1158
0
        return;
1159
0
    }
1160
1161
    // Reserve numbers for the objects that belong to this object stream.
1162
27.0k
    for (auto const& iter: m->object_stream_to_objects[objid]) {
1163
27.0k
        m->obj[iter].renumber = m->next_objid++;
1164
27.0k
    }
1165
1.52k
}
1166
1167
void
1168
QPDFWriter::enqueueObject(QPDFObjectHandle object)
1169
19.1M
{
1170
19.1M
    if (object.isIndirect()) {
1171
        // This owner check can only be done for indirect objects. It is possible for a direct
1172
        // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle from
1173
        // one file was insert into another file without copying. Doing that is safe even if the
1174
        // original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from its owner.
1175
1.27M
        if (object.getOwningQPDF() != &(m->pdf)) {
1176
0
            QTC::TC("qpdf", "QPDFWriter foreign object");
1177
0
            throw std::logic_error(
1178
0
                "QPDFObjectHandle from different QPDF found while writing.  Use "
1179
0
                "QPDF::copyForeignObject to add objects from another file.");
1180
0
        }
1181
1182
1.27M
        if (m->qdf_mode && object.isStreamOfType("/XRef")) {
1183
            // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so
1184
            // will confuse fix-qdf, which expects to see only one XRef stream at the end of the
1185
            // file. This case can occur when creating a QDF from a file with object streams when
1186
            // preserving unreferenced objects since the old cross reference streams are not
1187
            // actually referenced by object number.
1188
748
            QTC::TC("qpdf", "QPDFWriter ignore XRef in qdf mode");
1189
748
            return;
1190
748
        }
1191
1192
1.27M
        QPDFObjGen og = object.getObjGen();
1193
1.27M
        auto& obj = m->obj[og];
1194
1195
1.27M
        if (obj.renumber == 0) {
1196
84.4k
            if (obj.object_stream > 0) {
1197
                // This is in an object stream.  Don't process it here.  Instead, enqueue the object
1198
                // stream.  Object streams always have generation 0.
1199
                // Detect loops by storing invalid object ID -1, which will get overwritten later.
1200
2.26k
                obj.renumber = -1;
1201
2.26k
                enqueueObject(m->pdf.getObject(obj.object_stream, 0));
1202
82.2k
            } else {
1203
82.2k
                m->object_queue.push_back(object);
1204
82.2k
                obj.renumber = m->next_objid++;
1205
1206
82.2k
                if ((og.getGen() == 0) && m->object_stream_to_objects.contains(og.getObj())) {
1207
                    // For linearized files, uncompressed objects go at end, and we take care of
1208
                    // assigning numbers to them elsewhere.
1209
1.52k
                    if (!m->linearized) {
1210
1.52k
                        assignCompressedObjectNumbers(og);
1211
1.52k
                    }
1212
80.7k
                } else if ((!m->direct_stream_lengths) && object.isStream()) {
1213
                    // reserve next object ID for length
1214
26.0k
                    ++m->next_objid;
1215
26.0k
                }
1216
82.2k
            }
1217
1.19M
        } else if (obj.renumber == -1) {
1218
            // This can happen if a specially constructed file indicates that an object stream is
1219
            // inside itself.
1220
827
        }
1221
1.27M
        return;
1222
17.8M
    } else if (!m->linearized) {
1223
17.8M
        if (object.isArray()) {
1224
8.78M
            for (auto& item: object.as_array()) {
1225
8.78M
                enqueueObject(item);
1226
8.78M
            }
1227
17.6M
        } else if (auto d = object.as_dictionary()) {
1228
17.6M
            for (auto const& item: d) {
1229
2.34M
                if (!item.second.null()) {
1230
2.23M
                    enqueueObject(item.second);
1231
2.23M
                }
1232
2.34M
            }
1233
17.6M
        }
1234
17.8M
    } else {
1235
        // ignore
1236
0
    }
1237
19.1M
}
1238
1239
void
1240
QPDFWriter::unparseChild(QPDFObjectHandle const& child, size_t level, int flags)
1241
8.05M
{
1242
8.05M
    if (!m->linearized) {
1243
8.05M
        enqueueObject(child);
1244
8.05M
    }
1245
8.05M
    if (child.isIndirect()) {
1246
158k
        write(m->obj[child].renumber).write(" 0 R");
1247
7.89M
    } else {
1248
7.89M
        unparseObject(child, level, flags);
1249
7.89M
    }
1250
8.05M
}
1251
1252
void
1253
QPDFWriter::writeTrailer(
1254
    trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass)
1255
9.01k
{
1256
9.01k
    QPDFObjectHandle trailer = getTrimmedTrailer();
1257
9.01k
    if (xref_stream) {
1258
346
        m->cur_data_key.clear();
1259
8.67k
    } else {
1260
8.67k
        write("trailer <<");
1261
8.67k
    }
1262
9.01k
    write_qdf("\n");
1263
9.01k
    if (which == t_lin_second) {
1264
0
        write(" /Size ").write(size);
1265
9.01k
    } else {
1266
20.5k
        for (auto const& [key, value]: trailer.as_dictionary()) {
1267
20.5k
            if (value.null()) {
1268
3.85k
                continue;
1269
3.85k
            }
1270
16.7k
            write_qdf("  ").write_no_qdf(" ").write_name(key).write(" ");
1271
16.7k
            if (key == "/Size") {
1272
2.60k
                write(size);
1273
2.60k
                if (which == t_lin_first) {
1274
0
                    write(" /Prev ");
1275
0
                    qpdf_offset_t pos = m->pipeline->getCount();
1276
0
                    write(prev).write(QIntC::to_size(pos - m->pipeline->getCount() + 21), ' ');
1277
0
                }
1278
14.1k
            } else {
1279
14.1k
                unparseChild(value, 1, 0);
1280
14.1k
            }
1281
16.7k
            write_qdf("\n");
1282
16.7k
        }
1283
9.01k
    }
1284
1285
    // Write ID
1286
9.01k
    write_qdf(" ").write(" /ID [");
1287
9.01k
    if (linearization_pass == 1) {
1288
0
        std::string original_id1 = getOriginalID1();
1289
0
        if (original_id1.empty()) {
1290
0
            write("<00000000000000000000000000000000>");
1291
0
        } else {
1292
            // Write a string of zeroes equal in length to the representation of the original ID.
1293
            // While writing the original ID would have the same number of bytes, it would cause a
1294
            // change to the deterministic ID generated by older versions of the software that
1295
            // hard-coded the length of the ID to 16 bytes.
1296
0
            size_t len = QPDF_String(original_id1).unparse(true).length() - 2;
1297
0
            write("<").write(len, '0').write(">");
1298
0
        }
1299
0
        write("<00000000000000000000000000000000>");
1300
9.01k
    } else {
1301
9.01k
        if (linearization_pass == 0 && m->deterministic_id) {
1302
9.01k
            computeDeterministicIDData();
1303
9.01k
        }
1304
9.01k
        generateID(m->encryption.get());
1305
9.01k
        write_string(m->id1, true).write_string(m->id2, true);
1306
9.01k
    }
1307
9.01k
    write("]");
1308
1309
9.01k
    if (which != t_lin_second) {
1310
        // Write reference to encryption dictionary
1311
8.90k
        if (m->encryption) {
1312
0
            write(" /Encrypt ").write(m->encryption_dict_objid).write(" 0 R");
1313
0
        }
1314
8.90k
    }
1315
1316
9.01k
    write_qdf("\n>>").write_no_qdf(" >>");
1317
9.01k
}
1318
1319
bool
1320
QPDFWriter::willFilterStream(
1321
    QPDFObjectHandle stream,
1322
    bool& compress_stream,  // out only
1323
    bool& is_root_metadata, // out only
1324
    std::string* stream_data)
1325
26.0k
{
1326
26.0k
    compress_stream = false;
1327
26.0k
    is_root_metadata = false;
1328
1329
26.0k
    QPDFObjGen old_og = stream.getObjGen();
1330
26.0k
    QPDFObjectHandle stream_dict = stream.getDict();
1331
1332
26.0k
    if (stream.isRootMetadata()) {
1333
117
        is_root_metadata = true;
1334
117
    }
1335
26.0k
    bool filter = stream.isDataModified() || m->compress_streams || m->stream_decode_level;
1336
26.0k
    bool filter_on_write = stream.getFilterOnWrite();
1337
26.0k
    if (!filter_on_write) {
1338
0
        QTC::TC("qpdf", "QPDFWriter getFilterOnWrite false");
1339
0
        filter = false;
1340
0
    }
1341
26.0k
    if (filter_on_write && m->compress_streams) {
1342
        // Don't filter if the stream is already compressed with FlateDecode. This way we don't make
1343
        // it worse if the original file used a better Flate algorithm, and we don't spend time and
1344
        // CPU cycles uncompressing and recompressing stuff. This can be overridden with
1345
        // setRecompressFlate(true).
1346
0
        QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
1347
0
        if (!m->recompress_flate && !stream.isDataModified() && filter_obj.isName() &&
1348
0
            (filter_obj.getName() == "/FlateDecode" || filter_obj.getName() == "/Fl")) {
1349
0
            QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode");
1350
0
            filter = false;
1351
0
        }
1352
0
    }
1353
26.0k
    bool normalize = false;
1354
26.0k
    bool uncompress = false;
1355
26.0k
    if (filter_on_write && is_root_metadata &&
1356
26.0k
        (!m->encryption || !m->encryption->getEncryptMetadata())) {
1357
117
        QTC::TC("qpdf", "QPDFWriter not compressing metadata");
1358
117
        filter = true;
1359
117
        compress_stream = false;
1360
117
        uncompress = true;
1361
25.8k
    } else if (filter_on_write && m->normalize_content && m->normalized_streams.contains(old_og)) {
1362
3.65k
        normalize = true;
1363
3.65k
        filter = true;
1364
22.2k
    } else if (filter_on_write && filter && m->compress_streams) {
1365
0
        compress_stream = true;
1366
0
        QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");
1367
0
    }
1368
1369
    // Disable compression for empty streams to improve compatibility
1370
26.0k
    if (stream_dict.getKey("/Length").isInteger() &&
1371
26.0k
        stream_dict.getKey("/Length").getIntValue() == 0) {
1372
47
        filter = true;
1373
47
        compress_stream = false;
1374
47
    }
1375
1376
26.0k
    bool filtered = false;
1377
36.9k
    for (bool first_attempt: {true, false}) {
1378
36.9k
        auto pp_stream_data = stream_data ? m->pipeline_stack.activate(*stream_data)
1379
36.9k
                                          : m->pipeline_stack.activate(true);
1380
1381
36.9k
        try {
1382
36.9k
            filtered = stream.pipeStreamData(
1383
36.9k
                m->pipeline,
1384
36.9k
                !filter ? 0
1385
36.9k
                        : ((normalize ? qpdf_ef_normalize : 0) |
1386
26.0k
                           (compress_stream ? qpdf_ef_compress : 0)),
1387
36.9k
                !filter ? qpdf_dl_none : (uncompress ? qpdf_dl_all : m->stream_decode_level),
1388
36.9k
                false,
1389
36.9k
                first_attempt);
1390
36.9k
            if (filter && !filtered) {
1391
                // Try again
1392
10.9k
                filter = false;
1393
10.9k
                stream.setFilterOnWrite(false);
1394
26.0k
            } else {
1395
26.0k
                break;
1396
26.0k
            }
1397
36.9k
        } catch (std::runtime_error& e) {
1398
51
            if (filter && first_attempt) {
1399
40
                stream.warn("error while getting stream data: "s + e.what());
1400
40
                stream.warn("qpdf will attempt to write the damaged stream unchanged");
1401
40
                filter = false;
1402
40
                stream.setFilterOnWrite(false);
1403
40
                continue;
1404
40
            }
1405
11
            throw std::runtime_error(
1406
11
                "error while getting stream data for " + stream.unparse() + ": " + e.what());
1407
51
        }
1408
10.9k
        if (stream_data) {
1409
10.9k
            stream_data->clear();
1410
10.9k
        }
1411
10.9k
    }
1412
26.0k
    if (!filtered) {
1413
10.9k
        compress_stream = false;
1414
10.9k
    }
1415
26.0k
    return filtered;
1416
26.0k
}
1417
1418
void
1419
QPDFWriter::unparseObject(
1420
    QPDFObjectHandle object, size_t level, int flags, size_t stream_length, bool compress)
1421
8.02M
{
1422
8.02M
    QPDFObjGen old_og = object.getObjGen();
1423
8.02M
    int child_flags = flags & ~f_stream;
1424
    // For non-qdf, "indent" and "indent_large" are a single space between tokens. For qdf, they
1425
    // include the preceding newline.
1426
8.02M
    std::string indent_large = " ";
1427
8.02M
    if (m->qdf_mode) {
1428
8.02M
        indent_large.append(2 * (level + 1), ' ');
1429
8.02M
        indent_large[0] = '\n';
1430
8.02M
    }
1431
8.02M
    std::string_view indent{indent_large.data(), m->qdf_mode ? indent_large.size() - 2 : 1};
1432
1433
8.02M
    if (auto const tc = object.getTypeCode(); tc == ::ot_array) {
1434
        // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the
1435
        // [ in the /H key of the linearization parameter dictionary.  We'll do this unconditionally
1436
        // for all arrays because it looks nicer and doesn't make the files that much bigger.
1437
56.7k
        write("[");
1438
7.68M
        for (auto const& item: object.as_array()) {
1439
7.68M
            write(indent_large);
1440
7.68M
            unparseChild(item, level + 1, child_flags);
1441
7.68M
        }
1442
56.7k
        write(indent).write("]");
1443
7.96M
    } else if (tc == ::ot_dictionary) {
1444
        // Handle special cases for specific dictionaries.
1445
1446
114k
        if (old_og == m->root_og) {
1447
            // Extensions dictionaries.
1448
1449
            // We have one of several cases:
1450
            //
1451
            // * We need ADBE
1452
            //    - We already have Extensions
1453
            //       - If it has the right ADBE, preserve it
1454
            //       - Otherwise, replace ADBE
1455
            //    - We don't have Extensions: create one from scratch
1456
            // * We don't want ADBE
1457
            //    - We already have Extensions
1458
            //       - If it only has ADBE, remove it
1459
            //       - If it has other things, keep those and remove ADBE
1460
            //    - We have no extensions: no action required
1461
            //
1462
            // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE
1463
            // dictionary, so we can modify in place.
1464
1465
8.26k
            auto extensions = object.getKey("/Extensions");
1466
8.26k
            const bool has_extensions = extensions.isDictionary();
1467
8.26k
            const bool need_extensions_adbe = m->final_extension_level > 0;
1468
1469
8.26k
            if (has_extensions || need_extensions_adbe) {
1470
                // Make a shallow copy of this object so we can modify it safely without affecting
1471
                // the original. This code has logic to skip certain keys in agreement with
1472
                // prepareFileForWrite and with skip_stream_parameters so that replacing them
1473
                // doesn't leave unreferenced objects in the output. We can use unsafeShallowCopy
1474
                // here because all we are doing is removing or replacing top-level keys.
1475
315
                object = object.unsafeShallowCopy();
1476
315
                if (!has_extensions) {
1477
0
                    extensions = QPDFObjectHandle();
1478
0
                }
1479
1480
315
                const bool have_extensions_adbe = extensions && extensions.hasKey("/ADBE");
1481
315
                const bool have_extensions_other =
1482
315
                    extensions && extensions.getKeys().size() > (have_extensions_adbe ? 1u : 0u);
1483
1484
315
                if (need_extensions_adbe) {
1485
22
                    if (!(have_extensions_other || have_extensions_adbe)) {
1486
                        // We need Extensions and don't have it.  Create it here.
1487
0
                        QTC::TC("qpdf", "QPDFWriter create Extensions", m->qdf_mode ? 0 : 1);
1488
0
                        extensions = object.replaceKeyAndGetNew(
1489
0
                            "/Extensions", QPDFObjectHandle::newDictionary());
1490
0
                    }
1491
293
                } else if (!have_extensions_other) {
1492
                    // We have Extensions dictionary and don't want one.
1493
196
                    if (have_extensions_adbe) {
1494
193
                        QTC::TC("qpdf", "QPDFWriter remove existing Extensions");
1495
193
                        object.removeKey("/Extensions");
1496
193
                        extensions = QPDFObjectHandle(); // uninitialized
1497
193
                    }
1498
196
                }
1499
1500
315
                if (extensions) {
1501
122
                    QTC::TC("qpdf", "QPDFWriter preserve Extensions");
1502
122
                    QPDFObjectHandle adbe = extensions.getKey("/ADBE");
1503
122
                    if (adbe.isDictionary() &&
1504
122
                        adbe.getKey("/BaseVersion").isNameAndEquals("/" + m->final_pdf_version) &&
1505
122
                        adbe.getKey("/ExtensionLevel").isInteger() &&
1506
122
                        (adbe.getKey("/ExtensionLevel").getIntValue() ==
1507
1
                         m->final_extension_level)) {
1508
1
                        QTC::TC("qpdf", "QPDFWriter preserve ADBE");
1509
121
                    } else {
1510
121
                        if (need_extensions_adbe) {
1511
21
                            extensions.replaceKey(
1512
21
                                "/ADBE",
1513
21
                                QPDFObjectHandle::parse(
1514
21
                                    "<< /BaseVersion /" + m->final_pdf_version +
1515
21
                                    " /ExtensionLevel " + std::to_string(m->final_extension_level) +
1516
21
                                    " >>"));
1517
100
                        } else {
1518
100
                            QTC::TC("qpdf", "QPDFWriter remove ADBE");
1519
100
                            extensions.removeKey("/ADBE");
1520
100
                        }
1521
121
                    }
1522
122
                }
1523
315
            }
1524
8.26k
        }
1525
1526
        // Stream dictionaries.
1527
1528
114k
        if (flags & f_stream) {
1529
            // Suppress /Length since we will write it manually
1530
1531
            // Make a shallow copy of this object so we can modify it safely without affecting the
1532
            // original. This code has logic to skip certain keys in agreement with
1533
            // prepareFileForWrite and with skip_stream_parameters so that replacing them doesn't
1534
            // leave unreferenced objects in the output. We can use unsafeShallowCopy here because
1535
            // all we are doing is removing or replacing top-level keys.
1536
25.9k
            object = object.unsafeShallowCopy();
1537
1538
25.9k
            object.removeKey("/Length");
1539
1540
            // If /DecodeParms is an empty list, remove it.
1541
25.9k
            if (object.getKey("/DecodeParms").empty()) {
1542
25.4k
                object.removeKey("/DecodeParms");
1543
25.4k
            }
1544
1545
25.9k
            if (flags & f_filtered) {
1546
                // We will supply our own filter and decode parameters.
1547
15.0k
                object.removeKey("/Filter");
1548
15.0k
                object.removeKey("/DecodeParms");
1549
15.0k
            } else {
1550
                // Make sure, no matter what else we have, that we don't have /Crypt in the output
1551
                // filters.
1552
10.9k
                QPDFObjectHandle filter = object.getKey("/Filter");
1553
10.9k
                QPDFObjectHandle decode_parms = object.getKey("/DecodeParms");
1554
10.9k
                if (filter.isOrHasName("/Crypt")) {
1555
149
                    if (filter.isName()) {
1556
17
                        object.removeKey("/Filter");
1557
17
                        object.removeKey("/DecodeParms");
1558
132
                    } else {
1559
132
                        int idx = 0;
1560
5.79k
                        for (auto const& item: filter.as_array()) {
1561
5.79k
                            if (item.isNameAndEquals("/Crypt")) {
1562
                                // If filter is an array, then the code in QPDF_Stream has already
1563
                                // verified that DecodeParms and Filters are arrays of the same
1564
                                // length, but if they weren't for some reason, eraseItem does type
1565
                                // and bounds checking. Fuzzing tells us that this can actually
1566
                                // happen.
1567
132
                                filter.eraseItem(idx);
1568
132
                                decode_parms.eraseItem(idx);
1569
132
                                break;
1570
132
                            }
1571
5.66k
                            ++idx;
1572
5.66k
                        }
1573
132
                    }
1574
149
                }
1575
10.9k
            }
1576
25.9k
        }
1577
1578
114k
        write("<<");
1579
1580
431k
        for (auto const& [key, value]: object.as_dictionary()) {
1581
431k
            if (!value.null()) {
1582
354k
                write(indent_large).write_name(key).write(" ");
1583
354k
                if (key == "/Contents" && object.isDictionaryOfType("/Sig") &&
1584
354k
                    object.hasKey("/ByteRange")) {
1585
6
                    QTC::TC("qpdf", "QPDFWriter no encryption sig contents");
1586
6
                    unparseChild(value, level + 1, child_flags | f_hex_string | f_no_encryption);
1587
354k
                } else {
1588
354k
                    unparseChild(value, level + 1, child_flags);
1589
354k
                }
1590
354k
            }
1591
431k
        }
1592
1593
114k
        if (flags & f_stream) {
1594
25.8k
            write(indent_large).write("/Length ");
1595
1596
25.8k
            if (m->direct_stream_lengths) {
1597
0
                write(stream_length);
1598
25.8k
            } else {
1599
25.8k
                write(m->cur_stream_length_id).write(" 0 R");
1600
25.8k
            }
1601
25.8k
            if (compress && (flags & f_filtered)) {
1602
0
                write(indent_large).write("/Filter /FlateDecode");
1603
0
            }
1604
25.8k
        }
1605
1606
114k
        write(indent).write(">>");
1607
7.85M
    } else if (tc == ::ot_stream) {
1608
        // Write stream data to a buffer.
1609
26.0k
        if (!m->direct_stream_lengths) {
1610
26.0k
            m->cur_stream_length_id = m->obj[old_og].renumber + 1;
1611
26.0k
        }
1612
1613
26.0k
        flags |= f_stream;
1614
26.0k
        bool compress_stream = false;
1615
26.0k
        bool is_metadata = false;
1616
26.0k
        std::string stream_data;
1617
26.0k
        if (willFilterStream(object, compress_stream, is_metadata, &stream_data)) {
1618
15.0k
            flags |= f_filtered;
1619
15.0k
        }
1620
26.0k
        QPDFObjectHandle stream_dict = object.getDict();
1621
1622
26.0k
        m->cur_stream_length = stream_data.size();
1623
26.0k
        if (is_metadata && m->encryption && !m->encryption->getEncryptMetadata()) {
1624
            // Don't encrypt stream data for the metadata stream
1625
0
            m->cur_data_key.clear();
1626
0
        }
1627
26.0k
        adjustAESStreamLength(m->cur_stream_length);
1628
26.0k
        unparseObject(stream_dict, 0, flags, m->cur_stream_length, compress_stream);
1629
26.0k
        char last_char = stream_data.empty() ? '\0' : stream_data.back();
1630
26.0k
        write("\nstream\n").write_encrypted(stream_data);
1631
26.0k
        m->added_newline = m->newline_before_endstream || (m->qdf_mode && last_char != '\n');
1632
26.0k
        write(m->added_newline ? "\nendstream" : "endstream");
1633
7.82M
    } else if (tc == ::ot_string) {
1634
18.8k
        std::string val;
1635
18.8k
        if (m->encryption && !(flags & f_in_ostream) && !(flags & f_no_encryption) &&
1636
18.8k
            !m->cur_data_key.empty()) {
1637
0
            val = object.getStringValue();
1638
0
            if (m->encrypt_use_aes) {
1639
0
                Pl_Buffer bufpl("encrypted string");
1640
0
                Pl_AES_PDF pl("aes encrypt string", &bufpl, true, m->cur_data_key);
1641
0
                pl.writeString(val);
1642
0
                pl.finish();
1643
0
                val = QPDF_String(bufpl.getString()).unparse(true);
1644
0
            } else {
1645
0
                auto tmp_ph = QUtil::make_unique_cstr(val);
1646
0
                char* tmp = tmp_ph.get();
1647
0
                size_t vlen = val.length();
1648
0
                RC4 rc4(
1649
0
                    QUtil::unsigned_char_pointer(m->cur_data_key),
1650
0
                    QIntC::to_int(m->cur_data_key.length()));
1651
0
                auto data = QUtil::unsigned_char_pointer(tmp);
1652
0
                rc4.process(data, vlen, data);
1653
0
                val = QPDF_String(std::string(tmp, vlen)).unparse();
1654
0
            }
1655
18.8k
        } else if (flags & f_hex_string) {
1656
6
            val = QPDF_String(object.getStringValue()).unparse(true);
1657
18.8k
        } else {
1658
18.8k
            val = object.unparseResolved();
1659
18.8k
        }
1660
18.8k
        write(val);
1661
7.80M
    } else {
1662
7.80M
        write(object.unparseResolved());
1663
7.80M
    }
1664
8.02M
}
1665
1666
void
1667
QPDFWriter::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj)
1668
3.05k
{
1669
3.05k
    qpdf_assert_debug(first_obj > 0);
1670
3.05k
    bool is_first = true;
1671
3.05k
    auto id = std::to_string(first_obj) + ' ';
1672
54.1k
    for (auto& offset: offsets) {
1673
54.1k
        if (is_first) {
1674
3.05k
            is_first = false;
1675
51.0k
        } else {
1676
51.0k
            write_qdf("\n").write_no_qdf(" ");
1677
51.0k
        }
1678
54.1k
        write(id);
1679
54.1k
        util::increment(id, 1);
1680
54.1k
        write(offset);
1681
54.1k
    }
1682
3.05k
    write("\n");
1683
3.05k
}
1684
1685
void
1686
QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1687
1.52k
{
1688
    // Note: object might be null if this is a place-holder for an object stream that we are
1689
    // generating from scratch.
1690
1691
1.52k
    QPDFObjGen old_og = object.getObjGen();
1692
1.52k
    qpdf_assert_debug(old_og.getGen() == 0);
1693
1.52k
    int old_id = old_og.getObj();
1694
1.52k
    int new_stream_id = m->obj[old_og].renumber;
1695
1696
1.52k
    std::vector<qpdf_offset_t> offsets;
1697
1.52k
    qpdf_offset_t first = 0;
1698
1699
    // Generate stream itself.  We have to do this in two passes so we can calculate offsets in the
1700
    // first pass.
1701
1.52k
    std::string stream_buffer_pass1;
1702
1.52k
    std::string stream_buffer_pass2;
1703
1.52k
    int first_obj = -1;
1704
1.52k
    const bool compressed = m->compress_streams && !m->qdf_mode;
1705
1.52k
    {
1706
        // Pass 1
1707
1.52k
        auto pp_ostream_pass1 = m->pipeline_stack.activate(stream_buffer_pass1);
1708
1709
1.52k
        int count = -1;
1710
27.0k
        for (auto const& obj: m->object_stream_to_objects[old_id]) {
1711
27.0k
            ++count;
1712
27.0k
            int new_obj = m->obj[obj].renumber;
1713
27.0k
            if (first_obj == -1) {
1714
1.52k
                first_obj = new_obj;
1715
1.52k
            }
1716
27.0k
            if (m->qdf_mode) {
1717
27.0k
                write("%% Object stream: object ").write(new_obj).write(", index ").write(count);
1718
27.0k
                if (!m->suppress_original_object_ids) {
1719
27.0k
                    write("; original object ID: ").write(obj.getObj());
1720
                    // For compatibility, only write the generation if non-zero.  While object
1721
                    // streams only allow objects with generation 0, if we are generating object
1722
                    // streams, the old object could have a non-zero generation.
1723
27.0k
                    if (obj.getGen() != 0) {
1724
0
                        QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
1725
0
                        write(" ").write(obj.getGen());
1726
0
                    }
1727
27.0k
                }
1728
27.0k
                write("\n");
1729
27.0k
            }
1730
1731
27.0k
            offsets.push_back(m->pipeline->getCount());
1732
            // To avoid double-counting objects being written in object streams for progress
1733
            // reporting, decrement in pass 1.
1734
27.0k
            indicateProgress(true, false);
1735
1736
27.0k
            QPDFObjectHandle obj_to_write = m->pdf.getObject(obj);
1737
27.0k
            if (obj_to_write.isStream()) {
1738
                // This condition occurred in a fuzz input. Ideally we should block it at parse
1739
                // time, but it's not clear to me how to construct a case for this.
1740
1
                obj_to_write.warn("stream found inside object stream; treating as null");
1741
1
                obj_to_write = QPDFObjectHandle::newNull();
1742
1
            }
1743
27.0k
            writeObject(obj_to_write, count);
1744
1745
27.0k
            m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count);
1746
27.0k
        }
1747
1.52k
    }
1748
1.52k
    {
1749
        // Adjust offsets to skip over comment before first object
1750
1.52k
        first = offsets.at(0);
1751
27.0k
        for (auto& iter: offsets) {
1752
27.0k
            iter -= first;
1753
27.0k
        }
1754
1755
        // Take one pass at writing pairs of numbers so we can get their size information
1756
1.52k
        {
1757
1.52k
            auto pp_discard = m->pipeline_stack.activate(true);
1758
1.52k
            writeObjectStreamOffsets(offsets, first_obj);
1759
1.52k
            first += m->pipeline->getCount();
1760
1.52k
        }
1761
1762
        // Set up a stream to write the stream data into a buffer.
1763
1.52k
        auto pp_ostream = m->pipeline_stack.activate(stream_buffer_pass2);
1764
1765
1.52k
        writeObjectStreamOffsets(offsets, first_obj);
1766
1.52k
        write(stream_buffer_pass1);
1767
1.52k
        stream_buffer_pass1.clear();
1768
1.52k
        stream_buffer_pass1.shrink_to_fit();
1769
1.52k
        if (compressed) {
1770
0
            stream_buffer_pass2 = pl::pipe<Pl_Flate>(stream_buffer_pass2, Pl_Flate::a_deflate);
1771
0
        }
1772
1.52k
    }
1773
1774
    // Write the object
1775
1.52k
    openObject(new_stream_id);
1776
1.52k
    setDataKey(new_stream_id);
1777
1.52k
    write("<<").write_qdf("\n ").write(" /Type /ObjStm").write_qdf("\n ");
1778
1.52k
    size_t length = stream_buffer_pass2.size();
1779
1.52k
    adjustAESStreamLength(length);
1780
1.52k
    write(" /Length ").write(length).write_qdf("\n ");
1781
1.52k
    if (compressed) {
1782
0
        write(" /Filter /FlateDecode");
1783
0
    }
1784
1.52k
    write(" /N ").write(offsets.size()).write_qdf("\n ").write(" /First ").write(first);
1785
1.52k
    if (!object.isNull()) {
1786
        // If the original object has an /Extends key, preserve it.
1787
677
        QPDFObjectHandle dict = object.getDict();
1788
677
        QPDFObjectHandle extends = dict.getKey("/Extends");
1789
677
        if (extends.isIndirect()) {
1790
166
            QTC::TC("qpdf", "QPDFWriter copy Extends");
1791
166
            write_qdf("\n ").write(" /Extends ");
1792
166
            unparseChild(extends, 1, f_in_ostream);
1793
166
        }
1794
677
    }
1795
1.52k
    write_qdf("\n").write_no_qdf(" ").write(">>\nstream\n").write_encrypted(stream_buffer_pass2);
1796
1.52k
    if (m->encryption) {
1797
0
        QTC::TC("qpdf", "QPDFWriter encrypt object stream");
1798
0
    }
1799
1.52k
    write(m->newline_before_endstream ? "\nendstream" : "endstream");
1800
1.52k
    m->cur_data_key.clear();
1801
1.52k
    closeObject(new_stream_id);
1802
1.52k
}
1803
1804
void
1805
QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
1806
109k
{
1807
109k
    QPDFObjGen old_og = object.getObjGen();
1808
1809
109k
    if (object_stream_index == -1 && old_og.getGen() == 0 &&
1810
109k
        m->object_stream_to_objects.contains(old_og.getObj())) {
1811
1.52k
        writeObjectStream(object);
1812
1.52k
        return;
1813
1.52k
    }
1814
1815
107k
    indicateProgress(false, false);
1816
107k
    auto new_id = m->obj[old_og].renumber;
1817
107k
    if (m->qdf_mode) {
1818
107k
        if (m->page_object_to_seq.contains(old_og)) {
1819
10.4k
            write("%% Page ").write(m->page_object_to_seq[old_og]).write("\n");
1820
10.4k
        }
1821
107k
        if (m->contents_to_page_seq.contains(old_og)) {
1822
8.59k
            write("%% Contents for page ").write(m->contents_to_page_seq[old_og]).write("\n");
1823
8.59k
        }
1824
107k
    }
1825
107k
    if (object_stream_index == -1) {
1826
80.5k
        if (m->qdf_mode && (!m->suppress_original_object_ids)) {
1827
80.5k
            write("%% Original object ID: ").write(object.getObjGen().unparse(' ')).write("\n");
1828
80.5k
        }
1829
80.5k
        openObject(new_id);
1830
80.5k
        setDataKey(new_id);
1831
80.5k
        unparseObject(object, 0, 0);
1832
80.5k
        m->cur_data_key.clear();
1833
80.5k
        closeObject(new_id);
1834
80.5k
    } else {
1835
27.0k
        unparseObject(object, 0, f_in_ostream);
1836
27.0k
        write("\n");
1837
27.0k
    }
1838
1839
107k
    if (!m->direct_stream_lengths && object.isStream()) {
1840
25.8k
        if (m->qdf_mode) {
1841
25.8k
            if (m->added_newline) {
1842
14.0k
                write("%QDF: ignore_newline\n");
1843
14.0k
            }
1844
25.8k
        }
1845
25.8k
        openObject(new_id + 1);
1846
25.8k
        write(m->cur_stream_length);
1847
25.8k
        closeObject(new_id + 1);
1848
25.8k
    }
1849
107k
}
1850
1851
std::string
1852
QPDFWriter::getOriginalID1()
1853
9.01k
{
1854
9.01k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1855
9.01k
    if (trailer.hasKey("/ID")) {
1856
720
        return trailer.getKey("/ID").getArrayItem(0).getStringValue();
1857
8.29k
    } else {
1858
8.29k
        return "";
1859
8.29k
    }
1860
9.01k
}
1861
1862
void
1863
QPDFWriter::generateID(bool encrypted)
1864
9.01k
{
1865
    // Generate the ID lazily so that we can handle the user's preference to use static or
1866
    // deterministic ID generation.
1867
1868
9.01k
    if (!m->id2.empty()) {
1869
0
        return;
1870
0
    }
1871
1872
9.01k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1873
1874
9.01k
    std::string result;
1875
1876
9.01k
    if (m->static_id) {
1877
        // For test suite use only...
1878
0
        static unsigned char tmp[] = {
1879
0
            0x31,
1880
0
            0x41,
1881
0
            0x59,
1882
0
            0x26,
1883
0
            0x53,
1884
0
            0x58,
1885
0
            0x97,
1886
0
            0x93,
1887
0
            0x23,
1888
0
            0x84,
1889
0
            0x62,
1890
0
            0x64,
1891
0
            0x33,
1892
0
            0x83,
1893
0
            0x27,
1894
0
            0x95,
1895
0
            0x00};
1896
0
        result = reinterpret_cast<char*>(tmp);
1897
9.01k
    } else {
1898
        // The PDF specification has guidelines for creating IDs, but it states clearly that the
1899
        // only thing that's really important is that it is very likely to be unique.  We can't
1900
        // really follow the guidelines in the spec exactly because we haven't written the file yet.
1901
        // This scheme should be fine though.  The deterministic ID case uses a digest of a
1902
        // sufficient portion of the file's contents such no two non-matching files would match in
1903
        // the subsets used for this computation.  Note that we explicitly omit the filename from
1904
        // the digest calculation for deterministic ID so that the same file converted with qpdf, in
1905
        // that case, would have the same ID regardless of the output file's name.
1906
1907
9.01k
        std::string seed;
1908
9.01k
        if (m->deterministic_id) {
1909
9.01k
            if (encrypted) {
1910
0
                throw std::runtime_error(
1911
0
                    "QPDFWriter: unable to generated a deterministic ID because the file to be "
1912
0
                    "written is encrypted (even though the file may not require a password)");
1913
0
            }
1914
9.01k
            if (m->deterministic_id_data.empty()) {
1915
0
                throw std::logic_error(
1916
0
                    "INTERNAL ERROR: QPDFWriter::generateID has no data for deterministic ID");
1917
0
            }
1918
9.01k
            seed += m->deterministic_id_data;
1919
9.01k
        } else {
1920
0
            seed += std::to_string(QUtil::get_current_time());
1921
0
            seed += m->filename;
1922
0
            seed += " ";
1923
0
        }
1924
9.01k
        seed += " QPDF ";
1925
9.01k
        if (trailer.hasKey("/Info")) {
1926
2.81k
            for (auto const& item: trailer.getKey("/Info").as_dictionary()) {
1927
2.81k
                if (item.second.isString()) {
1928
1.02k
                    seed += " ";
1929
1.02k
                    seed += item.second.getStringValue();
1930
1.02k
                }
1931
2.81k
            }
1932
287
        }
1933
1934
9.01k
        MD5 m;
1935
9.01k
        m.encodeString(seed.c_str());
1936
9.01k
        MD5::Digest digest;
1937
9.01k
        m.digest(digest);
1938
9.01k
        result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest));
1939
9.01k
    }
1940
1941
    // If /ID already exists, follow the spec: use the original first word and generate a new second
1942
    // word.  Otherwise, we'll use the generated ID for both.
1943
1944
9.01k
    m->id2 = result;
1945
    // Note: keep /ID from old file even if --static-id was given.
1946
9.01k
    m->id1 = getOriginalID1();
1947
9.01k
    if (m->id1.empty()) {
1948
8.32k
        m->id1 = m->id2;
1949
8.32k
    }
1950
9.01k
}
1951
1952
void
1953
QPDFWriter::initializeSpecialStreams()
1954
9.33k
{
1955
    // Mark all page content streams in case we are filtering or normalizing.
1956
9.33k
    std::vector<QPDFObjectHandle> pages = m->pdf.getAllPages();
1957
9.33k
    int num = 0;
1958
10.6k
    for (auto& page: pages) {
1959
10.6k
        m->page_object_to_seq[page.getObjGen()] = ++num;
1960
10.6k
        QPDFObjectHandle contents = page.getKey("/Contents");
1961
10.6k
        std::vector<QPDFObjGen> contents_objects;
1962
10.6k
        if (contents.isArray()) {
1963
884
            int n = static_cast<int>(contents.size());
1964
20.3k
            for (int i = 0; i < n; ++i) {
1965
19.5k
                contents_objects.push_back(contents.getArrayItem(i).getObjGen());
1966
19.5k
            }
1967
9.79k
        } else if (contents.isStream()) {
1968
2.32k
            contents_objects.push_back(contents.getObjGen());
1969
2.32k
        }
1970
1971
21.8k
        for (auto const& c: contents_objects) {
1972
21.8k
            m->contents_to_page_seq[c] = num;
1973
21.8k
            m->normalized_streams.insert(c);
1974
21.8k
        }
1975
10.6k
    }
1976
9.33k
}
1977
1978
void
1979
QPDFWriter::preserveObjectStreams()
1980
9.28k
{
1981
9.28k
    auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
1982
    // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
1983
    // streams out of old objects that have generation numbers greater than zero. However in an
1984
    // existing PDF, all object stream objects and all objects in them must have generation 0
1985
    // because the PDF spec does not provide any way to do otherwise. This code filters out objects
1986
    // that are not allowed to be in object streams. In addition to removing objects that were
1987
    // erroneously included in object streams in the source PDF, it also prevents unreferenced
1988
    // objects from being included.
1989
9.28k
    auto end = xref.cend();
1990
9.28k
    m->obj.streams_empty = true;
1991
9.28k
    if (m->preserve_unreferenced_objects) {
1992
0
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
1993
0
            if (iter->second.getType() == 2) {
1994
                // Pdf contains object streams.
1995
0
                QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
1996
0
                m->obj.streams_empty = false;
1997
0
                m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
1998
0
            }
1999
0
        }
2000
9.28k
    } else {
2001
        // Start by scanning for first compressed object in case we don't have any object streams to
2002
        // process.
2003
91.9k
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
2004
83.5k
            if (iter->second.getType() == 2) {
2005
                // Pdf contains object streams.
2006
866
                QTC::TC("qpdf", "QPDFWriter preserve object streams");
2007
866
                m->obj.streams_empty = false;
2008
866
                auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
2009
                // The object pointed to by iter may be a previous generation, in which case it is
2010
                // removed by getCompressibleObjSet. We need to restart the loop (while the object
2011
                // table may contain multiple generations of an object).
2012
193k
                for (iter = xref.cbegin(); iter != end; ++iter) {
2013
192k
                    if (iter->second.getType() == 2) {
2014
177k
                        auto id = static_cast<size_t>(iter->first.getObj());
2015
177k
                        if (id < eligible.size() && eligible[id]) {
2016
28.2k
                            m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
2017
148k
                        } else {
2018
148k
                            QTC::TC("qpdf", "QPDFWriter exclude from object stream");
2019
148k
                        }
2020
177k
                    }
2021
192k
                }
2022
866
                return;
2023
866
            }
2024
83.5k
        }
2025
9.28k
    }
2026
9.28k
}
2027
2028
void
2029
QPDFWriter::generateObjectStreams()
2030
0
{
2031
    // Basic strategy: make a list of objects that can go into an object stream.  Then figure out
2032
    // how many object streams are needed so that we can distribute objects approximately evenly
2033
    // without having any object stream exceed 100 members.  We don't have to worry about linearized
2034
    // files here -- if the file is linearized, we take care of excluding things that aren't allowed
2035
    // here later.
2036
2037
    // This code doesn't do anything with /Extends.
2038
2039
0
    std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf);
2040
0
    size_t n_object_streams = (eligible.size() + 99U) / 100U;
2041
2042
0
    initializeTables(2U * n_object_streams);
2043
0
    if (n_object_streams == 0) {
2044
0
        m->obj.streams_empty = true;
2045
0
        return;
2046
0
    }
2047
0
    size_t n_per = eligible.size() / n_object_streams;
2048
0
    if (n_per * n_object_streams < eligible.size()) {
2049
0
        ++n_per;
2050
0
    }
2051
0
    unsigned int n = 0;
2052
0
    int cur_ostream = m->pdf.newIndirectNull().getObjectID();
2053
0
    for (auto const& item: eligible) {
2054
0
        if (n == n_per) {
2055
0
            QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
2056
0
            n = 0;
2057
            // Construct a new null object as the "original" object stream.  The rest of the code
2058
            // knows that this means we're creating the object stream from scratch.
2059
0
            cur_ostream = m->pdf.newIndirectNull().getObjectID();
2060
0
        }
2061
0
        auto& obj = m->obj[item];
2062
0
        obj.object_stream = cur_ostream;
2063
0
        obj.gen = item.getGen();
2064
0
        ++n;
2065
0
    }
2066
0
}
2067
2068
QPDFObjectHandle
2069
QPDFWriter::getTrimmedTrailer()
2070
18.2k
{
2071
    // Remove keys from the trailer that necessarily have to be replaced when writing the file.
2072
2073
18.2k
    QPDFObjectHandle trailer = m->pdf.getTrailer().unsafeShallowCopy();
2074
2075
    // Remove encryption keys
2076
18.2k
    trailer.removeKey("/ID");
2077
18.2k
    trailer.removeKey("/Encrypt");
2078
2079
    // Remove modification information
2080
18.2k
    trailer.removeKey("/Prev");
2081
2082
    // Remove all trailer keys that potentially come from a cross-reference stream
2083
18.2k
    trailer.removeKey("/Index");
2084
18.2k
    trailer.removeKey("/W");
2085
18.2k
    trailer.removeKey("/Length");
2086
18.2k
    trailer.removeKey("/Filter");
2087
18.2k
    trailer.removeKey("/DecodeParms");
2088
18.2k
    trailer.removeKey("/Type");
2089
18.2k
    trailer.removeKey("/XRefStm");
2090
2091
18.2k
    return trailer;
2092
18.2k
}
2093
2094
// Make document extension level information direct as required by the spec.
2095
void
2096
QPDFWriter::prepareFileForWrite()
2097
9.23k
{
2098
9.23k
    m->pdf.fixDanglingReferences();
2099
9.23k
    auto root = m->pdf.getRoot();
2100
9.23k
    auto oh = root.getKey("/Extensions");
2101
9.23k
    if (oh.isDictionary()) {
2102
341
        const bool extensions_indirect = oh.isIndirect();
2103
341
        if (extensions_indirect) {
2104
92
            QTC::TC("qpdf", "QPDFWriter make Extensions direct");
2105
92
            oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy());
2106
92
        }
2107
341
        if (oh.hasKey("/ADBE")) {
2108
244
            auto adbe = oh.getKey("/ADBE");
2109
244
            if (adbe.isIndirect()) {
2110
211
                QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1);
2111
211
                adbe.makeDirect();
2112
211
                oh.replaceKey("/ADBE", adbe);
2113
211
            }
2114
244
        }
2115
341
    }
2116
9.23k
}
2117
2118
void
2119
QPDFWriter::initializeTables(size_t extra)
2120
9.28k
{
2121
9.28k
    auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra;
2122
9.28k
    m->obj.resize(size);
2123
9.28k
    m->new_obj.resize(size);
2124
9.28k
}
2125
2126
void
2127
QPDFWriter::doWriteSetup()
2128
9.33k
{
2129
9.33k
    if (m->did_write_setup) {
2130
0
        return;
2131
0
    }
2132
9.33k
    m->did_write_setup = true;
2133
2134
    // Do preliminary setup
2135
2136
9.33k
    if (m->linearized) {
2137
0
        m->qdf_mode = false;
2138
0
    }
2139
2140
9.33k
    if (m->pclm) {
2141
0
        m->stream_decode_level = qpdf_dl_none;
2142
0
        m->compress_streams = false;
2143
0
        m->encryption = nullptr;
2144
0
    }
2145
2146
9.33k
    if (m->qdf_mode) {
2147
9.33k
        if (!m->normalize_content_set) {
2148
9.33k
            m->normalize_content = true;
2149
9.33k
        }
2150
9.33k
        if (!m->compress_streams_set) {
2151
9.33k
            m->compress_streams = false;
2152
9.33k
        }
2153
9.33k
        if (!m->stream_decode_level_set) {
2154
0
            m->stream_decode_level = qpdf_dl_generalized;
2155
0
        }
2156
9.33k
    }
2157
2158
9.33k
    if (m->encryption) {
2159
        // Encryption has been explicitly set
2160
0
        m->preserve_encryption = false;
2161
9.33k
    } else if (m->normalize_content || !m->compress_streams || m->pclm || m->qdf_mode) {
2162
        // Encryption makes looking at contents pretty useless.  If the user explicitly encrypted
2163
        // though, we still obey that.
2164
9.33k
        m->preserve_encryption = false;
2165
9.33k
    }
2166
2167
9.33k
    if (m->preserve_encryption) {
2168
0
        copyEncryptionParameters(m->pdf);
2169
0
    }
2170
2171
9.33k
    if (!m->forced_pdf_version.empty()) {
2172
0
        int major = 0;
2173
0
        int minor = 0;
2174
0
        parseVersion(m->forced_pdf_version, major, minor);
2175
0
        disableIncompatibleEncryption(major, minor, m->forced_extension_level);
2176
0
        if (compareVersions(major, minor, 1, 5) < 0) {
2177
0
            QTC::TC("qpdf", "QPDFWriter forcing object stream disable");
2178
0
            m->object_stream_mode = qpdf_o_disable;
2179
0
        }
2180
0
    }
2181
2182
9.33k
    if (m->qdf_mode || m->normalize_content) {
2183
9.33k
        initializeSpecialStreams();
2184
9.33k
    }
2185
2186
9.33k
    if (m->qdf_mode) {
2187
        // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing
2188
        // recomputed stream length data. Certain streams such as object streams, xref streams, and
2189
        // hint streams always get direct stream lengths.
2190
9.28k
        m->direct_stream_lengths = false;
2191
9.28k
    }
2192
2193
9.33k
    switch (m->object_stream_mode) {
2194
0
    case qpdf_o_disable:
2195
0
        initializeTables();
2196
0
        m->obj.streams_empty = true;
2197
0
        break;
2198
2199
9.28k
    case qpdf_o_preserve:
2200
9.28k
        initializeTables();
2201
9.28k
        preserveObjectStreams();
2202
9.28k
        break;
2203
2204
0
    case qpdf_o_generate:
2205
0
        generateObjectStreams();
2206
0
        break;
2207
2208
        // no default so gcc will warn for missing case tag
2209
9.33k
    }
2210
2211
9.27k
    if (!m->obj.streams_empty) {
2212
851
        if (m->linearized) {
2213
            // Page dictionaries are not allowed to be compressed objects.
2214
0
            for (auto& page: m->pdf.getAllPages()) {
2215
0
                if (m->obj[page].object_stream > 0) {
2216
0
                    QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
2217
0
                    m->obj[page].object_stream = 0;
2218
0
                }
2219
0
            }
2220
0
        }
2221
2222
851
        if (m->linearized || m->encryption) {
2223
            // The document catalog is not allowed to be compressed in linearized files either.  It
2224
            // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
2225
            // handle encrypted files with compressed document catalogs, so we disable them in that
2226
            // case as well.
2227
0
            if (m->obj[m->root_og].object_stream > 0) {
2228
0
                QTC::TC("qpdf", "QPDFWriter uncompressing root");
2229
0
                m->obj[m->root_og].object_stream = 0;
2230
0
            }
2231
0
        }
2232
2233
        // Generate reverse mapping from object stream to objects
2234
1.04M
        m->obj.forEach([this](auto id, auto const& item) -> void {
2235
1.04M
            if (item.object_stream > 0) {
2236
28.2k
                auto& vec = m->object_stream_to_objects[item.object_stream];
2237
28.2k
                vec.emplace_back(id, item.gen);
2238
28.2k
                if (m->max_ostream_index < vec.size()) {
2239
8.47k
                    ++m->max_ostream_index;
2240
8.47k
                }
2241
28.2k
            }
2242
1.04M
        });
2243
851
        --m->max_ostream_index;
2244
2245
851
        if (m->object_stream_to_objects.empty()) {
2246
457
            m->obj.streams_empty = true;
2247
457
        } else {
2248
394
            setMinimumPDFVersion("1.5");
2249
394
        }
2250
851
    }
2251
2252
9.27k
    setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel());
2253
9.27k
    m->final_pdf_version = m->min_pdf_version;
2254
9.27k
    m->final_extension_level = m->min_extension_level;
2255
9.27k
    if (!m->forced_pdf_version.empty()) {
2256
0
        QTC::TC("qpdf", "QPDFWriter using forced PDF version");
2257
0
        m->final_pdf_version = m->forced_pdf_version;
2258
0
        m->final_extension_level = m->forced_extension_level;
2259
0
    }
2260
9.27k
}
2261
2262
void
2263
QPDFWriter::write()
2264
9.33k
{
2265
9.33k
    doWriteSetup();
2266
2267
    // Set up progress reporting. For linearized files, we write two passes. events_expected is an
2268
    // approximation, but it's good enough for progress reporting, which is mostly a guess anyway.
2269
9.33k
    m->events_expected = QIntC::to_int(m->pdf.getObjectCount() * (m->linearized ? 2 : 1));
2270
2271
9.33k
    prepareFileForWrite();
2272
2273
9.33k
    if (m->linearized) {
2274
0
        writeLinearized();
2275
9.33k
    } else {
2276
9.33k
        writeStandard();
2277
9.33k
    }
2278
2279
9.33k
    m->pipeline->finish();
2280
9.33k
    if (m->close_file) {
2281
0
        fclose(m->file);
2282
0
    }
2283
9.33k
    m->file = nullptr;
2284
9.33k
    if (m->buffer_pipeline) {
2285
0
        m->output_buffer = m->buffer_pipeline->getBuffer();
2286
0
        m->buffer_pipeline = nullptr;
2287
0
    }
2288
9.33k
    indicateProgress(false, true);
2289
9.33k
}
2290
2291
QPDFObjGen
2292
QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
2293
0
{
2294
0
    return {m->obj[og].renumber, 0};
2295
0
}
2296
2297
std::map<QPDFObjGen, QPDFXRefEntry>
2298
QPDFWriter::getWrittenXRefTable()
2299
0
{
2300
0
    std::map<QPDFObjGen, QPDFXRefEntry> result;
2301
2302
0
    auto it = result.begin();
2303
0
    m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void {
2304
0
        if (item.xref.getType() != 0) {
2305
0
            it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref);
2306
0
        }
2307
0
    });
2308
0
    return result;
2309
0
}
2310
2311
void
2312
QPDFWriter::enqueuePart(std::vector<QPDFObjectHandle>& part)
2313
0
{
2314
0
    for (auto const& oh: part) {
2315
0
        enqueueObject(oh);
2316
0
    }
2317
0
}
2318
2319
void
2320
QPDFWriter::writeEncryptionDictionary()
2321
0
{
2322
0
    m->encryption_dict_objid = openObject(m->encryption_dict_objid);
2323
0
    auto& enc = *m->encryption;
2324
0
    auto const V = enc.getV();
2325
2326
0
    write("<<");
2327
0
    if (V >= 4) {
2328
0
        write(" /CF << /StdCF << /AuthEvent /DocOpen /CFM ");
2329
0
        write(m->encrypt_use_aes ? ((V < 5) ? "/AESV2" : "/AESV3") : "/V2");
2330
        // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of
2331
        // MacOS won't open encrypted files without it.
2332
0
        write((V < 5) ? " /Length 16 >> >>" : " /Length 32 >> >>");
2333
0
        if (!m->encryption->getEncryptMetadata()) {
2334
0
            write(" /EncryptMetadata false");
2335
0
        }
2336
0
    }
2337
0
    write(" /Filter /Standard /Length ").write(enc.getLengthBytes() * 8);
2338
0
    write(" /O ").write_string(enc.getO(), true);
2339
0
    if (V >= 4) {
2340
0
        write(" /OE ").write_string(enc.getOE(), true);
2341
0
    }
2342
0
    write(" /P ").write(enc.getP());
2343
0
    if (V >= 5) {
2344
0
        write(" /Perms ").write_string(enc.getPerms(), true);
2345
0
    }
2346
0
    write(" /R ").write(enc.getR());
2347
2348
0
    if (V >= 4) {
2349
0
        write(" /StmF /StdCF /StrF /StdCF");
2350
0
    }
2351
0
    write(" /U ").write_string(enc.getU(), true);
2352
0
    if (V >= 4) {
2353
0
        write(" /UE ").write_string(enc.getUE(), true);
2354
0
    }
2355
0
    write(" /V ").write(enc.getV()).write(" >>");
2356
0
    closeObject(m->encryption_dict_objid);
2357
0
}
2358
2359
std::string
2360
QPDFWriter::getFinalVersion()
2361
0
{
2362
0
    doWriteSetup();
2363
0
    return m->final_pdf_version;
2364
0
}
2365
2366
void
2367
QPDFWriter::writeHeader()
2368
9.20k
{
2369
9.20k
    write("%PDF-").write(m->final_pdf_version);
2370
9.20k
    if (m->pclm) {
2371
        // PCLm version
2372
0
        write("\n%PCLm 1.0\n");
2373
9.20k
    } else {
2374
        // This string of binary characters would not be valid UTF-8, so it really should be treated
2375
        // as binary.
2376
9.20k
        write("\n%\xbf\xf7\xa2\xfe\n");
2377
9.20k
    }
2378
9.20k
    write_qdf("%QDF-1.0\n\n");
2379
2380
    // Note: do not write extra header text here.  Linearized PDFs must include the entire
2381
    // linearization parameter dictionary within the first 1024 characters of the PDF file, so for
2382
    // linearized files, we have to write extra header text after the linearization parameter
2383
    // dictionary.
2384
9.20k
}
2385
2386
void
2387
QPDFWriter::writeHintStream(int hint_id)
2388
0
{
2389
0
    std::string hint_buffer;
2390
0
    int S = 0;
2391
0
    int O = 0;
2392
0
    bool compressed = m->compress_streams && !m->qdf_mode;
2393
0
    QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed);
2394
2395
0
    openObject(hint_id);
2396
0
    setDataKey(hint_id);
2397
2398
0
    size_t hlen = hint_buffer.size();
2399
2400
0
    write("<< ");
2401
0
    if (compressed) {
2402
0
        write("/Filter /FlateDecode ");
2403
0
    }
2404
0
    write("/S ").write(S);
2405
0
    if (O) {
2406
0
        write(" /O ").write(O);
2407
0
    }
2408
0
    adjustAESStreamLength(hlen);
2409
0
    write(" /Length ").write(hlen);
2410
0
    write(" >>\nstream\n").write_encrypted(hint_buffer);
2411
2412
0
    if (m->encryption) {
2413
0
        QTC::TC("qpdf", "QPDFWriter encrypted hint stream");
2414
0
    }
2415
2416
0
    write(hint_buffer.empty() || hint_buffer.back() != '\n' ? "\nendstream" : "endstream");
2417
0
    closeObject(hint_id);
2418
0
}
2419
2420
qpdf_offset_t
2421
QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size)
2422
8.67k
{
2423
    // There are too many extra arguments to replace overloaded function with defaults in the header
2424
    // file...too much risk of leaving something off.
2425
8.67k
    return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0);
2426
8.67k
}
2427
2428
qpdf_offset_t
2429
QPDFWriter::writeXRefTable(
2430
    trailer_e which,
2431
    int first,
2432
    int last,
2433
    int size,
2434
    qpdf_offset_t prev,
2435
    bool suppress_offsets,
2436
    int hint_id,
2437
    qpdf_offset_t hint_offset,
2438
    qpdf_offset_t hint_length,
2439
    int linearization_pass)
2440
8.67k
{
2441
8.67k
    write("xref\n").write(first).write(" ").write(last - first + 1);
2442
8.67k
    qpdf_offset_t space_before_zero = m->pipeline->getCount();
2443
8.67k
    write("\n");
2444
8.67k
    if (first == 0) {
2445
8.67k
        write("0000000000 65535 f \n");
2446
8.67k
        ++first;
2447
8.67k
    }
2448
95.3k
    for (int i = first; i <= last; ++i) {
2449
86.6k
        qpdf_offset_t offset = 0;
2450
86.6k
        if (!suppress_offsets) {
2451
86.6k
            offset = m->new_obj[i].xref.getOffset();
2452
86.6k
            if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2453
0
                offset += hint_length;
2454
0
            }
2455
86.6k
        }
2456
86.6k
        write(QUtil::int_to_string(offset, 10)).write(" 00000 n \n");
2457
86.6k
    }
2458
8.67k
    writeTrailer(which, size, false, prev, linearization_pass);
2459
8.67k
    write("\n");
2460
8.67k
    return space_before_zero;
2461
8.67k
}
2462
2463
qpdf_offset_t
2464
QPDFWriter::writeXRefStream(
2465
    int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size)
2466
346
{
2467
    // There are too many extra arguments to replace overloaded function with defaults in the header
2468
    // file...too much risk of leaving something off.
2469
346
    return writeXRefStream(
2470
346
        objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0);
2471
346
}
2472
2473
qpdf_offset_t
2474
QPDFWriter::writeXRefStream(
2475
    int xref_id,
2476
    int max_id,
2477
    qpdf_offset_t max_offset,
2478
    trailer_e which,
2479
    int first,
2480
    int last,
2481
    int size,
2482
    qpdf_offset_t prev,
2483
    int hint_id,
2484
    qpdf_offset_t hint_offset,
2485
    qpdf_offset_t hint_length,
2486
    bool skip_compression,
2487
    int linearization_pass)
2488
346
{
2489
346
    qpdf_offset_t xref_offset = m->pipeline->getCount();
2490
346
    qpdf_offset_t space_before_zero = xref_offset - 1;
2491
2492
    // field 1 contains offsets and object stream identifiers
2493
346
    unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id));
2494
2495
    // field 2 contains object stream indices
2496
346
    unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index));
2497
2498
346
    unsigned int esize = 1 + f1_size + f2_size;
2499
2500
    // Must store in xref table in advance of writing the actual data rather than waiting for
2501
    // openObject to do it.
2502
346
    m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2503
2504
346
    std::string xref_data;
2505
346
    const bool compressed = m->compress_streams && !m->qdf_mode;
2506
346
    {
2507
346
        auto pp_xref = m->pipeline_stack.activate(xref_data);
2508
2509
46.2k
        for (int i = first; i <= last; ++i) {
2510
45.8k
            QPDFXRefEntry& e = m->new_obj[i].xref;
2511
45.8k
            switch (e.getType()) {
2512
346
            case 0:
2513
346
                writeBinary(0, 1);
2514
346
                writeBinary(0, f1_size);
2515
346
                writeBinary(0, f2_size);
2516
346
                break;
2517
2518
19.3k
            case 1:
2519
19.3k
                {
2520
19.3k
                    qpdf_offset_t offset = e.getOffset();
2521
19.3k
                    if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2522
0
                        offset += hint_length;
2523
0
                    }
2524
19.3k
                    writeBinary(1, 1);
2525
19.3k
                    writeBinary(QIntC::to_ulonglong(offset), f1_size);
2526
19.3k
                    writeBinary(0, f2_size);
2527
19.3k
                }
2528
19.3k
                break;
2529
2530
26.1k
            case 2:
2531
26.1k
                writeBinary(2, 1);
2532
26.1k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size);
2533
26.1k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size);
2534
26.1k
                break;
2535
2536
0
            default:
2537
0
                throw std::logic_error("invalid type writing xref stream");
2538
0
                break;
2539
45.8k
            }
2540
45.8k
        }
2541
346
    }
2542
2543
346
    if (compressed) {
2544
0
        xref_data = pl::pipe<Pl_PNGFilter>(xref_data, Pl_PNGFilter::a_encode, esize);
2545
0
        if (!skip_compression) {
2546
            // Write the stream dictionary for compression but don't actually compress.  This
2547
            // helps us with computation of padding for pass 1 of linearization.
2548
0
            xref_data = pl::pipe<Pl_Flate>(xref_data, Pl_Flate::a_deflate);
2549
0
        }
2550
0
    }
2551
2552
346
    openObject(xref_id);
2553
346
    write("<<").write_qdf("\n ").write(" /Type /XRef").write_qdf("\n ");
2554
346
    write(" /Length ").write(xref_data.size());
2555
346
    if (compressed) {
2556
0
        write_qdf("\n ").write(" /Filter /FlateDecode").write_qdf("\n ");
2557
0
        write(" /DecodeParms << /Columns ").write(esize).write(" /Predictor 12 >>");
2558
0
    }
2559
346
    write_qdf("\n ").write(" /W [ 1 ").write(f1_size).write(" ").write(f2_size).write(" ]");
2560
346
    if (!(first == 0 && last == (size - 1))) {
2561
0
        write(" /Index [ ").write(first).write(" ").write(last - first + 1).write(" ]");
2562
0
    }
2563
346
    writeTrailer(which, size, true, prev, linearization_pass);
2564
346
    write("\nstream\n").write(xref_data).write("\nendstream");
2565
346
    closeObject(xref_id);
2566
346
    return space_before_zero;
2567
346
}
2568
2569
size_t
2570
QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
2571
0
{
2572
    // This routine is called right after a linearization first pass xref stream has been written
2573
    // without compression.  Calculate the amount of padding that would be required in the worst
2574
    // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is
2575
    // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add
2576
    // 10 extra bytes for number length increases.
2577
2578
0
    return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384)));
2579
0
}
2580
2581
void
2582
QPDFWriter::writeLinearized()
2583
0
{
2584
    // Optimize file and enqueue objects in order
2585
2586
0
    std::map<int, int> stream_cache;
2587
2588
0
    auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) {
2589
0
        auto& result = stream_cache[stream.getObjectID()];
2590
0
        if (result == 0) {
2591
0
            bool compress_stream;
2592
0
            bool is_metadata;
2593
0
            if (willFilterStream(stream, compress_stream, is_metadata, nullptr)) {
2594
0
                result = 2;
2595
0
            } else {
2596
0
                result = 1;
2597
0
            }
2598
0
        }
2599
0
        return result;
2600
0
    };
2601
2602
0
    QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters);
2603
2604
0
    std::vector<QPDFObjectHandle> part4;
2605
0
    std::vector<QPDFObjectHandle> part6;
2606
0
    std::vector<QPDFObjectHandle> part7;
2607
0
    std::vector<QPDFObjectHandle> part8;
2608
0
    std::vector<QPDFObjectHandle> part9;
2609
0
    QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9);
2610
2611
    // Object number sequence:
2612
    //
2613
    //  second half
2614
    //    second half uncompressed objects
2615
    //    second half xref stream, if any
2616
    //    second half compressed objects
2617
    //  first half
2618
    //    linearization dictionary
2619
    //    first half xref stream, if any
2620
    //    part 4 uncompresesd objects
2621
    //    encryption dictionary, if any
2622
    //    hint stream
2623
    //    part 6 uncompressed objects
2624
    //    first half compressed objects
2625
    //
2626
2627
    // Second half objects
2628
0
    int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size());
2629
0
    int second_half_first_obj = 1;
2630
0
    int after_second_half = 1 + second_half_uncompressed;
2631
0
    m->next_objid = after_second_half;
2632
0
    int second_half_xref = 0;
2633
0
    bool need_xref_stream = !m->obj.streams_empty;
2634
0
    if (need_xref_stream) {
2635
0
        second_half_xref = m->next_objid++;
2636
0
    }
2637
    // Assign numbers to all compressed objects in the second half.
2638
0
    std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9};
2639
0
    for (int i = 0; i < 3; ++i) {
2640
0
        for (auto const& oh: *vecs2[i]) {
2641
0
            assignCompressedObjectNumbers(oh.getObjGen());
2642
0
        }
2643
0
    }
2644
0
    int second_half_end = m->next_objid - 1;
2645
0
    int second_trailer_size = m->next_objid;
2646
2647
    // First half objects
2648
0
    int first_half_start = m->next_objid;
2649
0
    int lindict_id = m->next_objid++;
2650
0
    int first_half_xref = 0;
2651
0
    if (need_xref_stream) {
2652
0
        first_half_xref = m->next_objid++;
2653
0
    }
2654
0
    int part4_first_obj = m->next_objid;
2655
0
    m->next_objid += QIntC::to_int(part4.size());
2656
0
    int after_part4 = m->next_objid;
2657
0
    if (m->encryption) {
2658
0
        m->encryption_dict_objid = m->next_objid++;
2659
0
    }
2660
0
    int hint_id = m->next_objid++;
2661
0
    int part6_first_obj = m->next_objid;
2662
0
    m->next_objid += QIntC::to_int(part6.size());
2663
0
    int after_part6 = m->next_objid;
2664
    // Assign numbers to all compressed objects in the first half
2665
0
    std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6};
2666
0
    for (int i = 0; i < 2; ++i) {
2667
0
        for (auto const& oh: *vecs1[i]) {
2668
0
            assignCompressedObjectNumbers(oh.getObjGen());
2669
0
        }
2670
0
    }
2671
0
    int first_half_end = m->next_objid - 1;
2672
0
    int first_trailer_size = m->next_objid;
2673
2674
0
    int part4_end_marker = part4.back().getObjectID();
2675
0
    int part6_end_marker = part6.back().getObjectID();
2676
0
    qpdf_offset_t space_before_zero = 0;
2677
0
    qpdf_offset_t file_size = 0;
2678
0
    qpdf_offset_t part6_end_offset = 0;
2679
0
    qpdf_offset_t first_half_max_obj_offset = 0;
2680
0
    qpdf_offset_t second_xref_offset = 0;
2681
0
    qpdf_offset_t first_xref_end = 0;
2682
0
    qpdf_offset_t second_xref_end = 0;
2683
2684
0
    m->next_objid = part4_first_obj;
2685
0
    enqueuePart(part4);
2686
0
    if (m->next_objid != after_part4) {
2687
        // This can happen with very botched files as in the fuzzer test. There are likely some
2688
        // faulty assumptions in calculateLinearizationData
2689
0
        throw std::runtime_error("error encountered after writing part 4 of linearized data");
2690
0
    }
2691
0
    m->next_objid = part6_first_obj;
2692
0
    enqueuePart(part6);
2693
0
    if (m->next_objid != after_part6) {
2694
0
        throw std::runtime_error("error encountered after writing part 6 of linearized data");
2695
0
    }
2696
0
    m->next_objid = second_half_first_obj;
2697
0
    enqueuePart(part7);
2698
0
    enqueuePart(part8);
2699
0
    enqueuePart(part9);
2700
0
    if (m->next_objid != after_second_half) {
2701
0
        throw std::runtime_error("error encountered after writing part 9 of linearized data");
2702
0
    }
2703
2704
0
    qpdf_offset_t hint_length = 0;
2705
0
    std::string hint_buffer;
2706
2707
    // Write file in two passes.  Part numbers refer to PDF spec 1.4.
2708
2709
0
    FILE* lin_pass1_file = nullptr;
2710
0
    auto pp_pass1 = m->pipeline_stack.popper();
2711
0
    auto pp_md5 = m->pipeline_stack.popper();
2712
0
    for (int pass: {1, 2}) {
2713
0
        if (pass == 1) {
2714
0
            if (!m->lin_pass1_filename.empty()) {
2715
0
                lin_pass1_file = QUtil::safe_fopen(m->lin_pass1_filename.c_str(), "wb");
2716
0
                m->pipeline_stack.activate(
2717
0
                    pp_pass1,
2718
0
                    std::make_unique<Pl_StdioFile>("linearization pass1", lin_pass1_file));
2719
0
            } else {
2720
0
                m->pipeline_stack.activate(pp_pass1, true);
2721
0
            }
2722
0
            if (m->deterministic_id) {
2723
0
                m->pipeline_stack.activate_md5(pp_md5);
2724
0
            }
2725
0
        }
2726
2727
        // Part 1: header
2728
2729
0
        writeHeader();
2730
2731
        // Part 2: linearization parameter dictionary.  Save enough space to write real dictionary.
2732
        // 200 characters is enough space if all numerical values in the parameter dictionary that
2733
        // contain offsets are 20 digits long plus a few extra characters for safety.  The entire
2734
        // linearization parameter dictionary must appear within the first 1024 characters of the
2735
        // file.
2736
2737
0
        qpdf_offset_t pos = m->pipeline->getCount();
2738
0
        openObject(lindict_id);
2739
0
        write("<<");
2740
0
        if (pass == 2) {
2741
0
            std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages();
2742
0
            int first_page_object = m->obj[pages.at(0)].renumber;
2743
2744
0
            write(" /Linearized 1 /L ").write(file_size + hint_length);
2745
            // Implementation note 121 states that a space is mandatory after this open bracket.
2746
0
            write(" /H [ ").write(m->new_obj[hint_id].xref.getOffset()).write(" ");
2747
0
            write(hint_length);
2748
0
            write(" ] /O ").write(first_page_object);
2749
0
            write(" /E ").write(part6_end_offset + hint_length);
2750
0
            write(" /N ").write(pages.size());
2751
0
            write(" /T ").write(space_before_zero + hint_length);
2752
0
        }
2753
0
        write(" >>");
2754
0
        closeObject(lindict_id);
2755
0
        static int const pad = 200;
2756
0
        write(QIntC::to_size(pos - m->pipeline->getCount() + pad), ' ').write("\n");
2757
2758
        // If the user supplied any additional header text, write it here after the linearization
2759
        // parameter dictionary.
2760
0
        write(m->extra_header_text);
2761
2762
        // Part 3: first page cross reference table and trailer.
2763
2764
0
        qpdf_offset_t first_xref_offset = m->pipeline->getCount();
2765
0
        qpdf_offset_t hint_offset = 0;
2766
0
        if (pass == 2) {
2767
0
            hint_offset = m->new_obj[hint_id].xref.getOffset();
2768
0
        }
2769
0
        if (need_xref_stream) {
2770
            // Must pad here too.
2771
0
            if (pass == 1) {
2772
                // Set first_half_max_obj_offset to a value large enough to force four bytes to be
2773
                // reserved for each file offset.  This would provide adequate space for the xref
2774
                // stream as long as the last object in page 1 starts with in the first 4 GB of the
2775
                // file, which is extremely likely.  In the second pass, we will know the actual
2776
                // value for this, but it's okay if it's smaller.
2777
0
                first_half_max_obj_offset = 1 << 25;
2778
0
            }
2779
0
            pos = m->pipeline->getCount();
2780
0
            writeXRefStream(
2781
0
                first_half_xref,
2782
0
                first_half_end,
2783
0
                first_half_max_obj_offset,
2784
0
                t_lin_first,
2785
0
                first_half_start,
2786
0
                first_half_end,
2787
0
                first_trailer_size,
2788
0
                hint_length + second_xref_offset,
2789
0
                hint_id,
2790
0
                hint_offset,
2791
0
                hint_length,
2792
0
                (pass == 1),
2793
0
                pass);
2794
0
            qpdf_offset_t endpos = m->pipeline->getCount();
2795
0
            if (pass == 1) {
2796
                // Pad so we have enough room for the real xref stream.
2797
0
                write(calculateXrefStreamPadding(endpos - pos), ' ');
2798
0
                first_xref_end = m->pipeline->getCount();
2799
0
            } else {
2800
                // Pad so that the next object starts at the same place as in pass 1.
2801
0
                write(QIntC::to_size(first_xref_end - endpos), ' ');
2802
2803
0
                if (m->pipeline->getCount() != first_xref_end) {
2804
0
                    throw std::logic_error(
2805
0
                        "insufficient padding for first pass xref stream; first_xref_end=" +
2806
0
                        std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos));
2807
0
                }
2808
0
            }
2809
0
            write("\n");
2810
0
        } else {
2811
0
            writeXRefTable(
2812
0
                t_lin_first,
2813
0
                first_half_start,
2814
0
                first_half_end,
2815
0
                first_trailer_size,
2816
0
                hint_length + second_xref_offset,
2817
0
                (pass == 1),
2818
0
                hint_id,
2819
0
                hint_offset,
2820
0
                hint_length,
2821
0
                pass);
2822
0
            write("startxref\n0\n%%EOF\n");
2823
0
        }
2824
2825
        // Parts 4 through 9
2826
2827
0
        for (auto const& cur_object: m->object_queue) {
2828
0
            if (cur_object.getObjectID() == part6_end_marker) {
2829
0
                first_half_max_obj_offset = m->pipeline->getCount();
2830
0
            }
2831
0
            writeObject(cur_object);
2832
0
            if (cur_object.getObjectID() == part4_end_marker) {
2833
0
                if (m->encryption) {
2834
0
                    writeEncryptionDictionary();
2835
0
                }
2836
0
                if (pass == 1) {
2837
0
                    m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2838
0
                } else {
2839
                    // Part 5: hint stream
2840
0
                    write(hint_buffer);
2841
0
                }
2842
0
            }
2843
0
            if (cur_object.getObjectID() == part6_end_marker) {
2844
0
                part6_end_offset = m->pipeline->getCount();
2845
0
            }
2846
0
        }
2847
2848
        // Part 10: overflow hint stream -- not used
2849
2850
        // Part 11: main cross reference table and trailer
2851
2852
0
        second_xref_offset = m->pipeline->getCount();
2853
0
        if (need_xref_stream) {
2854
0
            pos = m->pipeline->getCount();
2855
0
            space_before_zero = writeXRefStream(
2856
0
                second_half_xref,
2857
0
                second_half_end,
2858
0
                second_xref_offset,
2859
0
                t_lin_second,
2860
0
                0,
2861
0
                second_half_end,
2862
0
                second_trailer_size,
2863
0
                0,
2864
0
                0,
2865
0
                0,
2866
0
                0,
2867
0
                (pass == 1),
2868
0
                pass);
2869
0
            qpdf_offset_t endpos = m->pipeline->getCount();
2870
2871
0
            if (pass == 1) {
2872
                // Pad so we have enough room for the real xref stream.  See comments for previous
2873
                // xref stream on how we calculate the padding.
2874
0
                write(calculateXrefStreamPadding(endpos - pos), ' ').write("\n");
2875
0
                second_xref_end = m->pipeline->getCount();
2876
0
            } else {
2877
                // Make the file size the same.
2878
0
                auto padding =
2879
0
                    QIntC::to_size(second_xref_end + hint_length - 1 - m->pipeline->getCount());
2880
0
                write(padding, ' ').write("\n");
2881
2882
                // If this assertion fails, maybe we didn't have enough padding above.
2883
0
                if (m->pipeline->getCount() != second_xref_end + hint_length) {
2884
0
                    throw std::logic_error(
2885
0
                        "count mismatch after xref stream; possible insufficient padding?");
2886
0
                }
2887
0
            }
2888
0
        } else {
2889
0
            space_before_zero = writeXRefTable(
2890
0
                t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass);
2891
0
        }
2892
0
        write("startxref\n").write(first_xref_offset).write("\n%%EOF\n");
2893
2894
0
        if (pass == 1) {
2895
0
            if (m->deterministic_id) {
2896
0
                QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1);
2897
0
                computeDeterministicIDData();
2898
0
                pp_md5.pop();
2899
0
            }
2900
2901
            // Close first pass pipeline
2902
0
            file_size = m->pipeline->getCount();
2903
0
            pp_pass1.pop();
2904
2905
            // Save hint offset since it will be set to zero by calling openObject.
2906
0
            qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset();
2907
2908
            // Write hint stream to a buffer
2909
0
            {
2910
0
                auto pp_hint = m->pipeline_stack.activate(hint_buffer);
2911
0
                writeHintStream(hint_id);
2912
0
            }
2913
0
            hint_length = QIntC::to_offset(hint_buffer.size());
2914
2915
            // Restore hint offset
2916
0
            m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1);
2917
0
            if (lin_pass1_file) {
2918
                // Write some debugging information
2919
0
                fprintf(
2920
0
                    lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str());
2921
0
                fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str());
2922
0
                fprintf(
2923
0
                    lin_pass1_file,
2924
0
                    "%% second_xref_offset=%s\n",
2925
0
                    std::to_string(second_xref_offset).c_str());
2926
0
                fprintf(
2927
0
                    lin_pass1_file,
2928
0
                    "%% second_xref_end=%s\n",
2929
0
                    std::to_string(second_xref_end).c_str());
2930
0
                fclose(lin_pass1_file);
2931
0
                lin_pass1_file = nullptr;
2932
0
            }
2933
0
        }
2934
0
    }
2935
0
}
2936
2937
void
2938
QPDFWriter::enqueueObjectsStandard()
2939
9.20k
{
2940
9.20k
    if (m->preserve_unreferenced_objects) {
2941
0
        QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard");
2942
0
        for (auto const& oh: m->pdf.getAllObjects()) {
2943
0
            enqueueObject(oh);
2944
0
        }
2945
0
    }
2946
2947
    // Put root first on queue.
2948
9.20k
    QPDFObjectHandle trailer = getTrimmedTrailer();
2949
9.20k
    enqueueObject(trailer.getKey("/Root"));
2950
2951
    // Next place any other objects referenced from the trailer dictionary into the queue, handling
2952
    // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op.
2953
20.9k
    for (auto& item: trailer.as_dictionary()) {
2954
20.9k
        if (!item.second.null()) {
2955
17.0k
            enqueueObject(item.second);
2956
17.0k
        }
2957
20.9k
    }
2958
9.20k
}
2959
2960
void
2961
QPDFWriter::enqueueObjectsPCLm()
2962
0
{
2963
    // Image transform stream content for page strip images. Each of this new stream has to come
2964
    // after every page image strip written in the pclm file.
2965
0
    std::string image_transform_content = "q /image Do Q\n";
2966
2967
    // enqueue all pages first
2968
0
    std::vector<QPDFObjectHandle> all = m->pdf.getAllPages();
2969
0
    for (auto& page: all) {
2970
        // enqueue page
2971
0
        enqueueObject(page);
2972
2973
        // enqueue page contents stream
2974
0
        enqueueObject(page.getKey("/Contents"));
2975
2976
        // enqueue all the strips for each page
2977
0
        QPDFObjectHandle strips = page.getKey("/Resources").getKey("/XObject");
2978
0
        for (auto& image: strips.as_dictionary()) {
2979
0
            if (!image.second.null()) {
2980
0
                enqueueObject(image.second);
2981
0
                enqueueObject(QPDFObjectHandle::newStream(&m->pdf, image_transform_content));
2982
0
            }
2983
0
        }
2984
0
    }
2985
2986
    // Put root in queue.
2987
0
    QPDFObjectHandle trailer = getTrimmedTrailer();
2988
0
    enqueueObject(trailer.getKey("/Root"));
2989
0
}
2990
2991
void
2992
QPDFWriter::indicateProgress(bool decrement, bool finished)
2993
143k
{
2994
143k
    if (decrement) {
2995
27.0k
        --m->events_seen;
2996
27.0k
        return;
2997
27.0k
    }
2998
2999
116k
    ++m->events_seen;
3000
3001
116k
    if (!m->progress_reporter.get()) {
3002
116k
        return;
3003
116k
    }
3004
3005
0
    if (finished || (m->events_seen >= m->next_progress_report)) {
3006
0
        int percentage =
3007
0
            (finished ? 100
3008
0
                 : m->next_progress_report == 0
3009
0
                 ? 0
3010
0
                 : std::min(99, 1 + ((100 * m->events_seen) / m->events_expected)));
3011
0
        m->progress_reporter->reportProgress(percentage);
3012
0
    }
3013
0
    int increment = std::max(1, (m->events_expected / 100));
3014
0
    while (m->events_seen >= m->next_progress_report) {
3015
0
        m->next_progress_report += increment;
3016
0
    }
3017
0
}
3018
3019
void
3020
QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr)
3021
0
{
3022
0
    m->progress_reporter = pr;
3023
0
}
3024
3025
void
3026
QPDFWriter::writeStandard()
3027
9.20k
{
3028
9.20k
    auto pp_md5 = m->pipeline_stack.popper();
3029
9.20k
    if (m->deterministic_id) {
3030
9.20k
        m->pipeline_stack.activate_md5(pp_md5);
3031
9.20k
    }
3032
3033
    // Start writing
3034
3035
9.20k
    writeHeader();
3036
9.20k
    write(m->extra_header_text);
3037
3038
9.20k
    if (m->pclm) {
3039
0
        enqueueObjectsPCLm();
3040
9.20k
    } else {
3041
9.20k
        enqueueObjectsStandard();
3042
9.20k
    }
3043
3044
    // Now start walking queue, outputting each object.
3045
91.2k
    while (m->object_queue_front < m->object_queue.size()) {
3046
82.0k
        QPDFObjectHandle cur_object = m->object_queue.at(m->object_queue_front);
3047
82.0k
        ++m->object_queue_front;
3048
82.0k
        writeObject(cur_object);
3049
82.0k
    }
3050
3051
    // Write out the encryption dictionary, if any
3052
9.20k
    if (m->encryption) {
3053
0
        writeEncryptionDictionary();
3054
0
    }
3055
3056
    // Now write out xref.  next_objid is now the number of objects.
3057
9.20k
    qpdf_offset_t xref_offset = m->pipeline->getCount();
3058
9.20k
    if (m->object_stream_to_objects.empty()) {
3059
        // Write regular cross-reference table
3060
8.67k
        writeXRefTable(t_normal, 0, m->next_objid - 1, m->next_objid);
3061
8.67k
    } else {
3062
        // Write cross-reference stream.
3063
533
        int xref_id = m->next_objid++;
3064
533
        writeXRefStream(
3065
533
            xref_id, xref_id, xref_offset, t_normal, 0, m->next_objid - 1, m->next_objid);
3066
533
    }
3067
9.20k
    write("startxref\n").write(xref_offset).write("\n%%EOF\n");
3068
3069
9.20k
    if (m->deterministic_id) {
3070
8.90k
        QTC::TC(
3071
8.90k
            "qpdf",
3072
8.90k
            "QPDFWriter standard deterministic ID",
3073
8.90k
            m->object_stream_to_objects.empty() ? 0 : 1);
3074
8.90k
    }
3075
9.20k
}