Coverage Report

Created: 2025-10-10 06:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDFWriter.cc
Line
Count
Source
1
#include <qpdf/qpdf-config.h> // include early for large file support
2
3
#include <qpdf/QPDFWriter_private.hh>
4
5
#include <qpdf/MD5.hh>
6
#include <qpdf/Pl_AES_PDF.hh>
7
#include <qpdf/Pl_Flate.hh>
8
#include <qpdf/Pl_MD5.hh>
9
#include <qpdf/Pl_PNGFilter.hh>
10
#include <qpdf/Pl_RC4.hh>
11
#include <qpdf/Pl_StdioFile.hh>
12
#include <qpdf/QIntC.hh>
13
#include <qpdf/QPDFObjectHandle_private.hh>
14
#include <qpdf/QPDFObject_private.hh>
15
#include <qpdf/QPDF_private.hh>
16
#include <qpdf/QTC.hh>
17
#include <qpdf/QUtil.hh>
18
#include <qpdf/RC4.hh>
19
#include <qpdf/Util.hh>
20
21
#include <algorithm>
22
#include <concepts>
23
#include <cstdlib>
24
#include <stdexcept>
25
#include <tuple>
26
27
using namespace std::literals;
28
using namespace qpdf;
29
30
using Encryption = QPDF::Doc::Encryption;
31
32
QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default)
33
0
{
34
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
35
0
}
36
37
QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) :
38
0
    handler(handler)
39
0
{
40
0
}
41
42
QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT
43
                                                                  // (modernize-use-equals-default)
44
0
{
45
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
46
0
}
47
48
void
49
QPDFWriter::FunctionProgressReporter::reportProgress(int progress)
50
0
{
51
0
    handler(progress);
52
0
}
53
54
namespace
55
{
56
    class Pl_stack
57
    {
58
        // A pipeline Popper is normally returned by Pl_stack::activate, or, if necessary, a
59
        // reference to a Popper instance can be passed into activate. When the Popper goes out of
60
        // scope, the pipeline stack is popped. This causes finish to be called on the current
61
        // pipeline and the pipeline stack to be popped until the top of stack is a previous active
62
        // top of stack and restores the pipeline to that point. It deletes any pipelines that it
63
        // pops.
64
        class Popper
65
        {
66
            friend class Pl_stack;
67
68
          public:
69
            Popper() = default;
70
            Popper(Popper const&) = delete;
71
            Popper(Popper&& other) noexcept
72
0
            {
73
0
                // For MSVC, default pops the stack
74
0
                if (this != &other) {
75
0
                    stack = other.stack;
76
0
                    stack_id = other.stack_id;
77
0
                    other.stack = nullptr;
78
0
                    other.stack_id = 0;
79
0
                };
80
0
            }
81
            Popper& operator=(Popper const&) = delete;
82
            Popper&
83
            operator=(Popper&& other) noexcept
84
0
            {
85
0
                // For MSVC, default pops the stack
86
0
                if (this != &other) {
87
0
                    stack = other.stack;
88
0
                    stack_id = other.stack_id;
89
0
                    other.stack = nullptr;
90
0
                    other.stack_id = 0;
91
0
                };
92
0
                return *this;
93
0
            }
94
95
            ~Popper();
96
97
            // Manually pop pipeline from the pipeline stack.
98
            void pop();
99
100
          private:
101
            Popper(Pl_stack& stack) :
102
108k
                stack(&stack)
103
108k
            {
104
108k
            }
105
106
            Pl_stack* stack{nullptr};
107
            unsigned long stack_id{0};
108
        };
109
110
      public:
111
        Pl_stack(pl::Count*& top) :
112
9.18k
            top(top)
113
9.18k
        {
114
9.18k
        }
115
116
        Popper
117
        popper()
118
15.8k
        {
119
15.8k
            return {*this};
120
15.8k
        }
121
122
        void
123
        initialize(Pipeline* p)
124
9.18k
        {
125
9.18k
            auto c = std::make_unique<pl::Count>(++last_id, p);
126
9.18k
            top = c.get();
127
9.18k
            stack.emplace_back(std::move(c));
128
9.18k
        }
129
130
        Popper
131
        activate(std::string& str)
132
87.9k
        {
133
87.9k
            Popper pp{*this};
134
87.9k
            activate(pp, str);
135
87.9k
            return pp;
136
87.9k
        }
137
138
        void
139
        activate(Popper& pp, std::string& str)
140
87.9k
        {
141
87.9k
            activate(pp, false, &str, nullptr);
142
87.9k
        }
143
144
        void
145
        activate(Popper& pp, std::unique_ptr<Pipeline> next)
146
0
        {
147
0
            count_buffer.clear();
148
0
            activate(pp, false, &count_buffer, std::move(next));
149
0
        }
150
151
        Popper
152
        activate(
153
            bool discard = false,
154
            std::string* str = nullptr,
155
            std::unique_ptr<Pipeline> next = nullptr)
156
4.68k
        {
157
4.68k
            Popper pp{*this};
158
4.68k
            activate(pp, discard, str, std::move(next));
159
4.68k
            return pp;
160
4.68k
        }
161
162
        void
163
        activate(
164
            Popper& pp,
165
            bool discard = false,
166
            std::string* str = nullptr,
167
            std::unique_ptr<Pipeline> next = nullptr)
168
100k
        {
169
100k
            std::unique_ptr<pl::Count> c;
170
100k
            if (next) {
171
0
                c = std::make_unique<pl::Count>(++last_id, count_buffer, std::move(next));
172
100k
            } else if (discard) {
173
12.6k
                c = std::make_unique<pl::Count>(++last_id, nullptr);
174
87.9k
            } else if (!str) {
175
0
                c = std::make_unique<pl::Count>(++last_id, top);
176
87.9k
            } else {
177
87.9k
                c = std::make_unique<pl::Count>(++last_id, *str);
178
87.9k
            }
179
100k
            pp.stack_id = last_id;
180
100k
            top = c.get();
181
100k
            stack.emplace_back(std::move(c));
182
100k
        }
183
        void
184
        activate_md5(Popper& pp)
185
0
        {
186
0
            qpdf_assert_debug(!md5_pipeline);
187
0
            qpdf_assert_debug(md5_id == 0);
188
0
            qpdf_assert_debug(top->getCount() == 0);
189
0
            md5_pipeline = std::make_unique<Pl_MD5>("qpdf md5", top);
190
0
            md5_pipeline->persistAcrossFinish(true);
191
            // Special case code in pop clears m->md5_pipeline upon deletion.
192
0
            auto c = std::make_unique<pl::Count>(++last_id, md5_pipeline.get());
193
0
            pp.stack_id = last_id;
194
0
            md5_id = last_id;
195
0
            top = c.get();
196
0
            stack.emplace_back(std::move(c));
197
0
        }
198
199
        // Return the hex digest and disable the MD5 pipeline.
200
        std::string
201
        hex_digest()
202
0
        {
203
0
            qpdf_assert_debug(md5_pipeline);
204
0
            auto digest = md5_pipeline->getHexDigest();
205
0
            md5_pipeline->enable(false);
206
0
            return digest;
207
0
        }
208
209
        void
210
        clear_buffer()
211
0
        {
212
0
            count_buffer.clear();
213
0
        }
214
215
      private:
216
        void
217
        pop(unsigned long stack_id)
218
108k
        {
219
108k
            if (!stack_id) {
220
7.91k
                return;
221
7.91k
            }
222
100k
            qpdf_assert_debug(stack.size() >= 2);
223
100k
            top->finish();
224
100k
            qpdf_assert_debug(stack.back().get() == top);
225
            // It used to be possible for this assertion to fail if writeLinearized exits by
226
            // exception when deterministic ID. There are no longer any cases in which two
227
            // dynamically allocated pipeline Popper objects ever exist at the same time, so the
228
            // assertion will fail if they get popped out of order from automatic destruction.
229
100k
            qpdf_assert_debug(top->id() == stack_id);
230
100k
            if (stack_id == md5_id) {
231
0
                md5_pipeline = nullptr;
232
0
                md5_id = 0;
233
0
            }
234
100k
            stack.pop_back();
235
100k
            top = stack.back().get();
236
100k
        }
237
238
        std::vector<std::unique_ptr<pl::Count>> stack;
239
        pl::Count*& top;
240
        std::unique_ptr<Pl_MD5> md5_pipeline{nullptr};
241
        unsigned long last_id{0};
242
        unsigned long md5_id{0};
243
        std::string count_buffer;
244
    };
245
} // namespace
246
247
Pl_stack::Popper::~Popper()
248
108k
{
249
108k
    if (stack) {
250
101k
        stack->pop(stack_id);
251
101k
    }
252
108k
}
253
254
void
255
Pl_stack::Popper::pop()
256
7.50k
{
257
7.50k
    if (stack) {
258
7.50k
        stack->pop(stack_id);
259
7.50k
    }
260
7.50k
    stack_id = 0;
261
7.50k
    stack = nullptr;
262
7.50k
}
263
264
// Writer class is restricted to QPDFWriter so that only it can call certain methods.
265
class QPDF::Doc::Writer
266
{
267
    friend class QPDFWriter;
268
    Writer(QPDF& pdf) :
269
9.43k
        pdf(pdf),
270
9.43k
        lin(pdf.m->lin),
271
9.43k
        objects(pdf.m->objects)
272
9.43k
    {
273
9.43k
    }
274
275
  protected:
276
    void
277
    optimize(
278
        QPDFWriter::ObjTable const& obj,
279
        std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
280
9.07k
    {
281
9.07k
        lin.optimize(obj, skip_stream_parameters);
282
9.07k
    }
283
284
    void
285
    getLinearizedParts(
286
        QPDFWriter::ObjTable const& obj,
287
        std::vector<QPDFObjectHandle>& part4,
288
        std::vector<QPDFObjectHandle>& part6,
289
        std::vector<QPDFObjectHandle>& part7,
290
        std::vector<QPDFObjectHandle>& part8,
291
        std::vector<QPDFObjectHandle>& part9)
292
9.01k
    {
293
9.01k
        lin.getLinearizedParts(obj, part4, part6, part7, part8, part9);
294
9.01k
    }
295
296
    void
297
    generateHintStream(
298
        QPDFWriter::NewObjTable const& new_obj,
299
        QPDFWriter::ObjTable const& obj,
300
        std::string& hint_stream,
301
        int& S,
302
        int& O,
303
        bool compressed)
304
7.50k
    {
305
7.50k
        lin.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
306
7.50k
    }
307
308
    std::vector<QPDFObjGen>
309
    getCompressibleObjGens()
310
0
    {
311
0
        return objects.getCompressibleObjVector();
312
0
    }
313
314
    std::vector<bool>
315
    getCompressibleObjSet()
316
1.29k
    {
317
1.29k
        return objects.getCompressibleObjSet();
318
1.29k
    }
319
320
    std::map<QPDFObjGen, QPDFXRefEntry> const&
321
    getXRefTable()
322
9.16k
    {
323
9.16k
        return objects.getXRefTableInternal();
324
9.16k
    }
325
326
    size_t
327
    tableSize()
328
9.16k
    {
329
9.16k
        return pdf.m->objects.tableSize();
330
9.16k
    }
331
332
    QPDF& pdf;
333
    QPDF::Doc::Linearization& lin;
334
    QPDF::Doc::Objects& objects;
335
};
336
337
class QPDFWriter::Members: QPDF::Doc::Writer
338
{
339
    friend class QPDFWriter;
340
341
  public:
342
    // flags used by unparseObject
343
    static int const f_stream = 1 << 0;
344
    static int const f_filtered = 1 << 1;
345
    static int const f_in_ostream = 1 << 2;
346
    static int const f_hex_string = 1 << 3;
347
    static int const f_no_encryption = 1 << 4;
348
349
    enum trailer_e { t_normal, t_lin_first, t_lin_second };
350
351
    Members(QPDFWriter& w, QPDF& pdf) :
352
9.43k
        QPDF::Doc::Writer(pdf),
353
9.43k
        w(w),
354
        root_og(
355
9.43k
            pdf.getRoot().getObjGen().isIndirect() ? pdf.getRoot().getObjGen() : QPDFObjGen(-1, 0)),
356
9.43k
        pipeline_stack(pipeline)
357
9.43k
    {
358
9.43k
    }
359
360
    Members(Members const&) = delete;
361
362
    ~Members()
363
9.18k
    {
364
9.18k
        if (file && close_file) {
365
0
            fclose(file);
366
0
        }
367
9.18k
        delete output_buffer;
368
9.18k
    }
369
370
    void write();
371
    std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable();
372
    void setMinimumPDFVersion(std::string const& version, int extension_level);
373
    void copyEncryptionParameters(QPDF&);
374
    void doWriteSetup();
375
    void prepareFileForWrite();
376
377
    void disableIncompatibleEncryption(int major, int minor, int extension_level);
378
    void interpretR3EncryptionParameters(
379
        bool allow_accessibility,
380
        bool allow_extract,
381
        bool allow_assemble,
382
        bool allow_annotate_and_form,
383
        bool allow_form_filling,
384
        bool allow_modify_other,
385
        qpdf_r3_print_e print,
386
        qpdf_r3_modify_e modify);
387
    void setEncryptionParameters(char const* user_password, char const* owner_password);
388
    void setEncryptionMinimumVersion();
389
    void parseVersion(std::string const& version, int& major, int& minor) const;
390
    int compareVersions(int major1, int minor1, int major2, int minor2) const;
391
    void generateID(bool encrypted);
392
    std::string getOriginalID1();
393
    void initializeTables(size_t extra = 0);
394
    void preserveObjectStreams();
395
    void generateObjectStreams();
396
    void initializeSpecialStreams();
397
    void enqueueObject(QPDFObjectHandle object);
398
    void enqueueObjectsStandard();
399
    void enqueueObjectsPCLm();
400
    void enqueuePart(std::vector<QPDFObjectHandle>& part);
401
    void assignCompressedObjectNumbers(QPDFObjGen og);
402
    Dictionary trimmed_trailer();
403
404
    // Returns tuple<filter, compress_stream, is_root_metadata>
405
    std::tuple<const bool, const bool, const bool>
406
    will_filter_stream(QPDFObjectHandle stream, std::string* stream_data);
407
408
    // Test whether stream would be filtered if it were written.
409
    bool will_filter_stream(QPDFObjectHandle stream);
410
    unsigned int bytesNeeded(long long n);
411
    void writeBinary(unsigned long long val, unsigned int bytes);
412
    Members& write(std::string_view str);
413
    Members& write(size_t count, char c);
414
    Members& write(std::integral auto val);
415
    Members& write_name(std::string const& str);
416
    Members& write_string(std::string const& str, bool force_binary = false);
417
    Members& write_encrypted(std::string_view str);
418
419
    template <typename... Args>
420
    Members& write_qdf(Args&&... args);
421
    template <typename... Args>
422
    Members& write_no_qdf(Args&&... args);
423
    void writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj);
424
    void writeObjectStream(QPDFObjectHandle object);
425
    void writeObject(QPDFObjectHandle object, int object_stream_index = -1);
426
    void writeTrailer(
427
        trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass);
428
    void unparseObject(
429
        QPDFObjectHandle object,
430
        size_t level,
431
        int flags,
432
        // for stream dictionaries
433
        size_t stream_length = 0,
434
        bool compress = false);
435
    void unparseChild(QPDFObjectHandle const& child, size_t level, int flags);
436
    int openObject(int objid = 0);
437
    void closeObject(int objid);
438
    void writeStandard();
439
    void writeLinearized();
440
    void writeEncryptionDictionary();
441
    void writeHeader();
442
    void writeHintStream(int hint_id);
443
    qpdf_offset_t writeXRefTable(trailer_e which, int first, int last, int size);
444
    qpdf_offset_t writeXRefTable(
445
        trailer_e which,
446
        int first,
447
        int last,
448
        int size,
449
        // for linearization
450
        qpdf_offset_t prev,
451
        bool suppress_offsets,
452
        int hint_id,
453
        qpdf_offset_t hint_offset,
454
        qpdf_offset_t hint_length,
455
        int linearization_pass);
456
    qpdf_offset_t writeXRefStream(
457
        int objid,
458
        int max_id,
459
        qpdf_offset_t max_offset,
460
        trailer_e which,
461
        int first,
462
        int last,
463
        int size);
464
    qpdf_offset_t writeXRefStream(
465
        int objid,
466
        int max_id,
467
        qpdf_offset_t max_offset,
468
        trailer_e which,
469
        int first,
470
        int last,
471
        int size,
472
        // for linearization
473
        qpdf_offset_t prev,
474
        int hint_id,
475
        qpdf_offset_t hint_offset,
476
        qpdf_offset_t hint_length,
477
        bool skip_compression,
478
        int linearization_pass);
479
480
    void setDataKey(int objid);
481
    void indicateProgress(bool decrement, bool finished);
482
    size_t calculateXrefStreamPadding(qpdf_offset_t xref_bytes);
483
484
    void adjustAESStreamLength(size_t& length);
485
    void computeDeterministicIDData();
486
487
  private:
488
    QPDFWriter& w;
489
    QPDFObjGen root_og{-1, 0};
490
    char const* filename{"unspecified"};
491
    FILE* file{nullptr};
492
    bool close_file{false};
493
    std::unique_ptr<Pl_Buffer> buffer_pipeline{nullptr};
494
    Buffer* output_buffer{nullptr};
495
    bool normalize_content_set{false};
496
    bool normalize_content{false};
497
    bool compress_streams{true};
498
    bool compress_streams_set{false};
499
    qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_generalized};
500
    bool stream_decode_level_set{false};
501
    bool recompress_flate{false};
502
    bool qdf_mode{false};
503
    bool preserve_unreferenced_objects{false};
504
    bool newline_before_endstream{false};
505
    bool static_id{false};
506
    bool suppress_original_object_ids{false};
507
    bool direct_stream_lengths{true};
508
    bool preserve_encryption{true};
509
    bool linearized{false};
510
    bool pclm{false};
511
    qpdf_object_stream_e object_stream_mode{qpdf_o_preserve};
512
513
    std::unique_ptr<QPDF::Doc::Encryption> encryption;
514
    std::string encryption_key;
515
    bool encrypt_use_aes{false};
516
517
    std::string id1; // for /ID key of
518
    std::string id2; // trailer dictionary
519
    std::string final_pdf_version;
520
    int final_extension_level{0};
521
    std::string min_pdf_version;
522
    int min_extension_level{0};
523
    std::string forced_pdf_version;
524
    int forced_extension_level{0};
525
    std::string extra_header_text;
526
    int encryption_dict_objid{0};
527
    std::string cur_data_key;
528
    std::unique_ptr<Pipeline> file_pl;
529
    qpdf::pl::Count* pipeline{nullptr};
530
    std::vector<QPDFObjectHandle> object_queue;
531
    size_t object_queue_front{0};
532
    QPDFWriter::ObjTable obj;
533
    QPDFWriter::NewObjTable new_obj;
534
    int next_objid{1};
535
    int cur_stream_length_id{0};
536
    size_t cur_stream_length{0};
537
    bool added_newline{false};
538
    size_t max_ostream_index{0};
539
    std::set<QPDFObjGen> normalized_streams;
540
    std::map<QPDFObjGen, int> page_object_to_seq;
541
    std::map<QPDFObjGen, int> contents_to_page_seq;
542
    std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects;
543
    Pl_stack pipeline_stack;
544
    bool deterministic_id{false};
545
    std::string deterministic_id_data;
546
    bool did_write_setup{false};
547
548
    // For linearization only
549
    std::string lin_pass1_filename;
550
551
    // For progress reporting
552
    std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter;
553
    int events_expected{0};
554
    int events_seen{0};
555
    int next_progress_report{0};
556
};
557
558
QPDFWriter::QPDFWriter(QPDF& pdf) :
559
9.43k
    m(std::make_shared<Members>(*this, pdf))
560
9.43k
{
561
9.43k
}
562
563
QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) :
564
0
    m(std::make_shared<Members>(*this, pdf))
565
0
{
566
0
    setOutputFilename(filename);
567
0
}
568
569
QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) :
570
0
    m(std::make_shared<Members>(*this, pdf))
571
0
{
572
0
    setOutputFile(description, file, close_file);
573
0
}
574
575
void
576
QPDFWriter::setOutputFilename(char const* filename)
577
0
{
578
0
    char const* description = filename;
579
0
    FILE* f = nullptr;
580
0
    bool close_file = false;
581
0
    if (filename == nullptr) {
582
0
        description = "standard output";
583
0
        f = stdout;
584
0
        QUtil::binary_stdout();
585
0
    } else {
586
0
        f = QUtil::safe_fopen(filename, "wb+");
587
0
        close_file = true;
588
0
    }
589
0
    setOutputFile(description, f, close_file);
590
0
}
591
592
void
593
QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file)
594
0
{
595
0
    m->filename = description;
596
0
    m->file = file;
597
0
    m->close_file = close_file;
598
0
    m->file_pl = std::make_unique<Pl_StdioFile>("qpdf output", file);
599
0
    m->pipeline_stack.initialize(m->file_pl.get());
600
0
}
601
602
void
603
QPDFWriter::setOutputMemory()
604
0
{
605
0
    m->filename = "memory buffer";
606
0
    m->buffer_pipeline = std::make_unique<Pl_Buffer>("qpdf output");
607
0
    m->pipeline_stack.initialize(m->buffer_pipeline.get());
608
0
}
609
610
Buffer*
611
QPDFWriter::getBuffer()
612
0
{
613
0
    Buffer* result = m->output_buffer;
614
0
    m->output_buffer = nullptr;
615
0
    return result;
616
0
}
617
618
std::shared_ptr<Buffer>
619
QPDFWriter::getBufferSharedPointer()
620
0
{
621
0
    return std::shared_ptr<Buffer>(getBuffer());
622
0
}
623
624
void
625
QPDFWriter::setOutputPipeline(Pipeline* p)
626
9.18k
{
627
9.18k
    m->filename = "custom pipeline";
628
9.18k
    m->pipeline_stack.initialize(p);
629
9.18k
}
630
631
void
632
QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode)
633
0
{
634
0
    m->object_stream_mode = mode;
635
0
}
636
637
void
638
QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode)
639
0
{
640
0
    switch (mode) {
641
0
    case qpdf_s_uncompress:
642
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
643
0
        m->compress_streams = false;
644
0
        break;
645
646
0
    case qpdf_s_preserve:
647
0
        m->stream_decode_level = qpdf_dl_none;
648
0
        m->compress_streams = false;
649
0
        break;
650
651
0
    case qpdf_s_compress:
652
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
653
0
        m->compress_streams = true;
654
0
        break;
655
0
    }
656
0
    m->stream_decode_level_set = true;
657
0
    m->compress_streams_set = true;
658
0
}
659
660
void
661
QPDFWriter::setCompressStreams(bool val)
662
0
{
663
0
    m->compress_streams = val;
664
0
    m->compress_streams_set = true;
665
0
}
666
667
void
668
QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
669
9.18k
{
670
9.18k
    m->stream_decode_level = val;
671
9.18k
    m->stream_decode_level_set = true;
672
9.18k
}
673
674
void
675
QPDFWriter::setRecompressFlate(bool val)
676
0
{
677
0
    m->recompress_flate = val;
678
0
}
679
680
void
681
QPDFWriter::setContentNormalization(bool val)
682
0
{
683
0
    m->normalize_content_set = true;
684
0
    m->normalize_content = val;
685
0
}
686
687
void
688
QPDFWriter::setQDFMode(bool val)
689
0
{
690
0
    m->qdf_mode = val;
691
0
}
692
693
void
694
QPDFWriter::setPreserveUnreferencedObjects(bool val)
695
0
{
696
0
    m->preserve_unreferenced_objects = val;
697
0
}
698
699
void
700
QPDFWriter::setNewlineBeforeEndstream(bool val)
701
0
{
702
0
    m->newline_before_endstream = val;
703
0
}
704
705
void
706
QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level)
707
10.0k
{
708
10.0k
    m->setMinimumPDFVersion(version, extension_level);
709
10.0k
}
710
711
void
712
QPDFWriter::Members::setMinimumPDFVersion(std::string const& version, int extension_level)
713
19.1k
{
714
19.1k
    bool set_version = false;
715
19.1k
    bool set_extension_level = false;
716
19.1k
    if (min_pdf_version.empty()) {
717
9.16k
        set_version = true;
718
9.16k
        set_extension_level = true;
719
10.0k
    } else {
720
10.0k
        int old_major = 0;
721
10.0k
        int old_minor = 0;
722
10.0k
        int min_major = 0;
723
10.0k
        int min_minor = 0;
724
10.0k
        parseVersion(version, old_major, old_minor);
725
10.0k
        parseVersion(min_pdf_version, min_major, min_minor);
726
10.0k
        int compare = compareVersions(old_major, old_minor, min_major, min_minor);
727
10.0k
        if (compare > 0) {
728
223
            QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1);
729
223
            set_version = true;
730
223
            set_extension_level = true;
731
9.79k
        } else if (compare == 0) {
732
1.03k
            if (extension_level > min_extension_level) {
733
3
                set_extension_level = true;
734
3
            }
735
1.03k
        }
736
10.0k
    }
737
738
19.1k
    if (set_version) {
739
9.38k
        min_pdf_version = version;
740
9.38k
    }
741
19.1k
    if (set_extension_level) {
742
9.39k
        min_extension_level = extension_level;
743
9.39k
    }
744
19.1k
}
745
746
void
747
QPDFWriter::setMinimumPDFVersion(PDFVersion const& v)
748
0
{
749
0
    std::string version;
750
0
    int extension_level;
751
0
    v.getVersion(version, extension_level);
752
0
    setMinimumPDFVersion(version, extension_level);
753
0
}
754
755
void
756
QPDFWriter::forcePDFVersion(std::string const& version, int extension_level)
757
0
{
758
0
    m->forced_pdf_version = version;
759
0
    m->forced_extension_level = extension_level;
760
0
}
761
762
void
763
QPDFWriter::setExtraHeaderText(std::string const& text)
764
0
{
765
0
    m->extra_header_text = text;
766
0
    if (!m->extra_header_text.empty() && *m->extra_header_text.rbegin() != '\n') {
767
0
        m->extra_header_text += "\n";
768
0
    } else {
769
0
        QTC::TC("qpdf", "QPDFWriter extra header text no newline");
770
0
    }
771
0
}
772
773
void
774
QPDFWriter::setStaticID(bool val)
775
9.18k
{
776
9.18k
    m->static_id = val;
777
9.18k
}
778
779
void
780
QPDFWriter::setDeterministicID(bool val)
781
0
{
782
0
    m->deterministic_id = val;
783
0
}
784
785
void
786
QPDFWriter::setStaticAesIV(bool val)
787
0
{
788
0
    if (val) {
789
0
        Pl_AES_PDF::useStaticIV();
790
0
    }
791
0
}
792
793
void
794
QPDFWriter::setSuppressOriginalObjectIDs(bool val)
795
0
{
796
0
    m->suppress_original_object_ids = val;
797
0
}
798
799
void
800
QPDFWriter::setPreserveEncryption(bool val)
801
0
{
802
0
    m->preserve_encryption = val;
803
0
}
804
805
void
806
QPDFWriter::setLinearization(bool val)
807
9.18k
{
808
9.18k
    m->linearized = val;
809
9.18k
    if (val) {
810
9.18k
        m->pclm = false;
811
9.18k
    }
812
9.18k
}
813
814
void
815
QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
816
0
{
817
0
    m->lin_pass1_filename = filename;
818
0
}
819
820
void
821
QPDFWriter::setPCLm(bool val)
822
0
{
823
0
    m->pclm = val;
824
0
    if (val) {
825
0
        m->linearized = false;
826
0
    }
827
0
}
828
829
void
830
QPDFWriter::setR2EncryptionParametersInsecure(
831
    char const* user_password,
832
    char const* owner_password,
833
    bool allow_print,
834
    bool allow_modify,
835
    bool allow_extract,
836
    bool allow_annotate)
837
0
{
838
0
    m->encryption = std::make_unique<Encryption>(1, 2, 5, true);
839
0
    if (!allow_print) {
840
0
        m->encryption->setP(3, false);
841
0
    }
842
0
    if (!allow_modify) {
843
0
        m->encryption->setP(4, false);
844
0
    }
845
0
    if (!allow_extract) {
846
0
        m->encryption->setP(5, false);
847
0
    }
848
0
    if (!allow_annotate) {
849
0
        m->encryption->setP(6, false);
850
0
    }
851
0
    m->setEncryptionParameters(user_password, owner_password);
852
0
}
853
854
void
855
QPDFWriter::setR3EncryptionParametersInsecure(
856
    char const* user_password,
857
    char const* owner_password,
858
    bool allow_accessibility,
859
    bool allow_extract,
860
    bool allow_assemble,
861
    bool allow_annotate_and_form,
862
    bool allow_form_filling,
863
    bool allow_modify_other,
864
    qpdf_r3_print_e print)
865
0
{
866
0
    m->encryption = std::make_unique<Encryption>(2, 3, 16, true);
867
0
    m->interpretR3EncryptionParameters(
868
0
        allow_accessibility,
869
0
        allow_extract,
870
0
        allow_assemble,
871
0
        allow_annotate_and_form,
872
0
        allow_form_filling,
873
0
        allow_modify_other,
874
0
        print,
875
0
        qpdf_r3m_all);
876
0
    m->setEncryptionParameters(user_password, owner_password);
877
0
}
878
879
void
880
QPDFWriter::setR4EncryptionParametersInsecure(
881
    char const* user_password,
882
    char const* owner_password,
883
    bool allow_accessibility,
884
    bool allow_extract,
885
    bool allow_assemble,
886
    bool allow_annotate_and_form,
887
    bool allow_form_filling,
888
    bool allow_modify_other,
889
    qpdf_r3_print_e print,
890
    bool encrypt_metadata,
891
    bool use_aes)
892
0
{
893
0
    m->encryption = std::make_unique<Encryption>(4, 4, 16, encrypt_metadata);
894
0
    m->encrypt_use_aes = use_aes;
895
0
    m->interpretR3EncryptionParameters(
896
0
        allow_accessibility,
897
0
        allow_extract,
898
0
        allow_assemble,
899
0
        allow_annotate_and_form,
900
0
        allow_form_filling,
901
0
        allow_modify_other,
902
0
        print,
903
0
        qpdf_r3m_all);
904
0
    m->setEncryptionParameters(user_password, owner_password);
905
0
}
906
907
void
908
QPDFWriter::setR5EncryptionParameters(
909
    char const* user_password,
910
    char const* owner_password,
911
    bool allow_accessibility,
912
    bool allow_extract,
913
    bool allow_assemble,
914
    bool allow_annotate_and_form,
915
    bool allow_form_filling,
916
    bool allow_modify_other,
917
    qpdf_r3_print_e print,
918
    bool encrypt_metadata)
919
0
{
920
0
    m->encryption = std::make_unique<Encryption>(5, 5, 32, encrypt_metadata);
921
0
    m->encrypt_use_aes = true;
922
0
    m->interpretR3EncryptionParameters(
923
0
        allow_accessibility,
924
0
        allow_extract,
925
0
        allow_assemble,
926
0
        allow_annotate_and_form,
927
0
        allow_form_filling,
928
0
        allow_modify_other,
929
0
        print,
930
0
        qpdf_r3m_all);
931
0
    m->setEncryptionParameters(user_password, owner_password);
932
0
}
933
934
void
935
QPDFWriter::setR6EncryptionParameters(
936
    char const* user_password,
937
    char const* owner_password,
938
    bool allow_accessibility,
939
    bool allow_extract,
940
    bool allow_assemble,
941
    bool allow_annotate_and_form,
942
    bool allow_form_filling,
943
    bool allow_modify_other,
944
    qpdf_r3_print_e print,
945
    bool encrypt_metadata)
946
9.18k
{
947
9.18k
    m->encryption = std::make_unique<Encryption>(5, 6, 32, encrypt_metadata);
948
9.18k
    m->interpretR3EncryptionParameters(
949
9.18k
        allow_accessibility,
950
9.18k
        allow_extract,
951
9.18k
        allow_assemble,
952
9.18k
        allow_annotate_and_form,
953
9.18k
        allow_form_filling,
954
9.18k
        allow_modify_other,
955
9.18k
        print,
956
9.18k
        qpdf_r3m_all);
957
9.18k
    m->encrypt_use_aes = true;
958
9.18k
    m->setEncryptionParameters(user_password, owner_password);
959
9.18k
}
960
961
void
962
QPDFWriter::Members::interpretR3EncryptionParameters(
963
    bool allow_accessibility,
964
    bool allow_extract,
965
    bool allow_assemble,
966
    bool allow_annotate_and_form,
967
    bool allow_form_filling,
968
    bool allow_modify_other,
969
    qpdf_r3_print_e print,
970
    qpdf_r3_modify_e modify)
971
9.18k
{
972
    // Acrobat 5 security options:
973
974
    // Checkboxes:
975
    //   Enable Content Access for the Visually Impaired
976
    //   Allow Content Copying and Extraction
977
978
    // Allowed changes menu:
979
    //   None
980
    //   Only Document Assembly
981
    //   Only Form Field Fill-in or Signing
982
    //   Comment Authoring, Form Field Fill-in or Signing
983
    //   General Editing, Comment and Form Field Authoring
984
985
    // Allowed printing menu:
986
    //   None
987
    //   Low Resolution
988
    //   Full printing
989
990
    // Meanings of bits in P when R >= 3
991
    //
992
    //  3: low-resolution printing
993
    //  4: document modification except as controlled by 6, 9, and 11
994
    //  5: extraction
995
    //  6: add/modify annotations (comment), fill in forms
996
    //     if 4+6 are set, also allows modification of form fields
997
    //  9: fill in forms even if 6 is clear
998
    // 10: accessibility; ignored by readers, should always be set
999
    // 11: document assembly even if 4 is clear
1000
    // 12: high-resolution printing
1001
9.18k
    if (!allow_accessibility && encryption->getR() <= 3) {
1002
        // Bit 10 is deprecated and should always be set.  This used to mean accessibility.  There
1003
        // is no way to disable accessibility with R > 3.
1004
0
        encryption->setP(10, false);
1005
0
    }
1006
9.18k
    if (!allow_extract) {
1007
0
        encryption->setP(5, false);
1008
0
    }
1009
1010
9.18k
    switch (print) {
1011
0
    case qpdf_r3p_none:
1012
0
        encryption->setP(3, false); // any printing
1013
0
        [[fallthrough]];
1014
0
    case qpdf_r3p_low:
1015
0
        encryption->setP(12, false); // high resolution printing
1016
0
        [[fallthrough]];
1017
9.18k
    case qpdf_r3p_full:
1018
9.18k
        break;
1019
        // no default so gcc warns for missing cases
1020
9.18k
    }
1021
1022
    // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full
1023
    // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're
1024
    // stuck with it. See also allow checks below to control the bits individually.
1025
1026
    // NOT EXERCISED IN TEST SUITE
1027
9.18k
    switch (modify) {
1028
0
    case qpdf_r3m_none:
1029
0
        encryption->setP(11, false); // document assembly
1030
0
        [[fallthrough]];
1031
0
    case qpdf_r3m_assembly:
1032
0
        encryption->setP(9, false); // filling in form fields
1033
0
        [[fallthrough]];
1034
0
    case qpdf_r3m_form:
1035
0
        encryption->setP(6, false); // modify annotations, fill in form fields
1036
0
        [[fallthrough]];
1037
0
    case qpdf_r3m_annotate:
1038
0
        encryption->setP(4, false); // other modifications
1039
0
        [[fallthrough]];
1040
9.18k
    case qpdf_r3m_all:
1041
9.18k
        break;
1042
        // no default so gcc warns for missing cases
1043
9.18k
    }
1044
    // END NOT EXERCISED IN TEST SUITE
1045
1046
9.18k
    if (!allow_assemble) {
1047
0
        encryption->setP(11, false);
1048
0
    }
1049
9.18k
    if (!allow_annotate_and_form) {
1050
0
        encryption->setP(6, false);
1051
0
    }
1052
9.18k
    if (!allow_form_filling) {
1053
0
        encryption->setP(9, false);
1054
0
    }
1055
9.18k
    if (!allow_modify_other) {
1056
0
        encryption->setP(4, false);
1057
0
    }
1058
9.18k
}
1059
1060
void
1061
QPDFWriter::Members::setEncryptionParameters(char const* user_password, char const* owner_password)
1062
9.18k
{
1063
9.18k
    generateID(true);
1064
9.18k
    encryption->setId1(id1);
1065
9.18k
    encryption_key = encryption->compute_parameters(user_password, owner_password);
1066
9.18k
    setEncryptionMinimumVersion();
1067
9.18k
}
1068
1069
void
1070
QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
1071
0
{
1072
0
    m->copyEncryptionParameters(qpdf);
1073
0
}
1074
1075
void
1076
QPDFWriter::Members::copyEncryptionParameters(QPDF& qpdf)
1077
0
{
1078
0
    preserve_encryption = false;
1079
0
    QPDFObjectHandle trailer = qpdf.getTrailer();
1080
0
    if (trailer.hasKey("/Encrypt")) {
1081
0
        generateID(true);
1082
0
        id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue();
1083
0
        QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
1084
0
        int V = encrypt.getKey("/V").getIntValueAsInt();
1085
0
        int key_len = 5;
1086
0
        if (V > 1) {
1087
0
            key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8;
1088
0
        }
1089
0
        const bool encrypt_metadata =
1090
0
            encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool()
1091
0
            ? encrypt.getKey("/EncryptMetadata").getBoolValue()
1092
0
            : true;
1093
0
        if (V >= 4) {
1094
            // When copying encryption parameters, use AES even if the original file did not.
1095
            // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of
1096
            // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF
1097
            // all potentially having different values.
1098
0
            encrypt_use_aes = true;
1099
0
        }
1100
0
        QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", encrypt_metadata ? 0 : 1);
1101
0
        QTC::TC("qpdf", "QPDFWriter copy use_aes", encrypt_use_aes ? 0 : 1);
1102
1103
0
        encryption = std::make_unique<Encryption>(
1104
0
            V,
1105
0
            encrypt.getKey("/R").getIntValueAsInt(),
1106
0
            key_len,
1107
0
            static_cast<int>(encrypt.getKey("/P").getIntValue()),
1108
0
            encrypt.getKey("/O").getStringValue(),
1109
0
            encrypt.getKey("/U").getStringValue(),
1110
0
            V < 5 ? "" : encrypt.getKey("/OE").getStringValue(),
1111
0
            V < 5 ? "" : encrypt.getKey("/UE").getStringValue(),
1112
0
            V < 5 ? "" : encrypt.getKey("/Perms").getStringValue(),
1113
0
            id1, // id1 == the other file's id1
1114
0
            encrypt_metadata);
1115
0
        encryption_key = V >= 5 ? qpdf.getEncryptionKey()
1116
0
                                : encryption->compute_encryption_key(qpdf.getPaddedUserPassword());
1117
0
        setEncryptionMinimumVersion();
1118
0
    }
1119
0
}
1120
1121
void
1122
QPDFWriter::Members::disableIncompatibleEncryption(int major, int minor, int extension_level)
1123
0
{
1124
0
    if (!encryption) {
1125
0
        return;
1126
0
    }
1127
0
    if (compareVersions(major, minor, 1, 3) < 0) {
1128
0
        encryption = nullptr;
1129
0
        return;
1130
0
    }
1131
0
    int V = encryption->getV();
1132
0
    int R = encryption->getR();
1133
0
    if (compareVersions(major, minor, 1, 4) < 0) {
1134
0
        if (V > 1 || R > 2) {
1135
0
            encryption = nullptr;
1136
0
        }
1137
0
    } else if (compareVersions(major, minor, 1, 5) < 0) {
1138
0
        if (V > 2 || R > 3) {
1139
0
            encryption = nullptr;
1140
0
        }
1141
0
    } else if (compareVersions(major, minor, 1, 6) < 0) {
1142
0
        if (encrypt_use_aes) {
1143
0
            encryption = nullptr;
1144
0
        }
1145
0
    } else if (
1146
0
        (compareVersions(major, minor, 1, 7) < 0) ||
1147
0
        ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) {
1148
0
        if (V >= 5 || R >= 5) {
1149
0
            encryption = nullptr;
1150
0
        }
1151
0
    }
1152
1153
0
    if (!encryption) {
1154
0
        QTC::TC("qpdf", "QPDFWriter forced version disabled encryption");
1155
0
    }
1156
0
}
1157
1158
void
1159
QPDFWriter::Members::parseVersion(std::string const& version, int& major, int& minor) const
1160
20.0k
{
1161
20.0k
    major = QUtil::string_to_int(version.c_str());
1162
20.0k
    minor = 0;
1163
20.0k
    size_t p = version.find('.');
1164
20.0k
    if ((p != std::string::npos) && (version.length() > p)) {
1165
20.0k
        minor = QUtil::string_to_int(version.substr(p + 1).c_str());
1166
20.0k
    }
1167
20.0k
    std::string tmp = std::to_string(major) + "." + std::to_string(minor);
1168
20.0k
    if (tmp != version) {
1169
        // The version number in the input is probably invalid. This happens with some files that
1170
        // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately
1171
        // QPDFWriter doesn't have a way to give a warning, so we just ignore this case.
1172
19
    }
1173
20.0k
}
1174
1175
int
1176
QPDFWriter::Members::compareVersions(int major1, int minor1, int major2, int minor2) const
1177
10.0k
{
1178
10.0k
    if (major1 < major2) {
1179
78
        return -1;
1180
78
    }
1181
9.93k
    if (major1 > major2) {
1182
129
        return 1;
1183
129
    }
1184
9.80k
    if (minor1 < minor2) {
1185
8.67k
        return -1;
1186
8.67k
    }
1187
1.12k
    return minor1 > minor2 ? 1 : 0;
1188
9.80k
}
1189
1190
void
1191
QPDFWriter::Members::setEncryptionMinimumVersion()
1192
9.16k
{
1193
9.16k
    auto const R = encryption->getR();
1194
9.16k
    if (R >= 6) {
1195
9.16k
        w.setMinimumPDFVersion("1.7", 8);
1196
9.16k
    } else if (R == 5) {
1197
0
        w.setMinimumPDFVersion("1.7", 3);
1198
0
    } else if (R == 4) {
1199
0
        w.setMinimumPDFVersion(encrypt_use_aes ? "1.6" : "1.5");
1200
0
    } else if (R == 3) {
1201
0
        w.setMinimumPDFVersion("1.4");
1202
0
    } else {
1203
0
        w.setMinimumPDFVersion("1.3");
1204
0
    }
1205
9.16k
}
1206
1207
void
1208
QPDFWriter::Members::setDataKey(int objid)
1209
237k
{
1210
237k
    if (encryption) {
1211
237k
        cur_data_key = QPDF::compute_data_key(
1212
237k
            encryption_key, objid, 0, encrypt_use_aes, encryption->getV(), encryption->getR());
1213
237k
    }
1214
237k
}
1215
1216
unsigned int
1217
QPDFWriter::Members::bytesNeeded(long long n)
1218
7.49k
{
1219
7.49k
    unsigned int bytes = 0;
1220
18.1k
    while (n) {
1221
10.6k
        ++bytes;
1222
10.6k
        n >>= 8;
1223
10.6k
    }
1224
7.49k
    return bytes;
1225
7.49k
}
1226
1227
void
1228
QPDFWriter::Members::writeBinary(unsigned long long val, unsigned int bytes)
1229
415k
{
1230
415k
    if (bytes > sizeof(unsigned long long)) {
1231
0
        throw std::logic_error("QPDFWriter::writeBinary called with too many bytes");
1232
0
    }
1233
415k
    unsigned char data[sizeof(unsigned long long)];
1234
1.02M
    for (unsigned int i = 0; i < bytes; ++i) {
1235
609k
        data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff);
1236
609k
        val >>= 8;
1237
609k
    }
1238
415k
    pipeline->write(data, bytes);
1239
415k
}
1240
1241
QPDFWriter::Members&
1242
QPDFWriter::Members::write(std::string_view str)
1243
8.12M
{
1244
8.12M
    pipeline->write(str);
1245
8.12M
    return *this;
1246
8.12M
}
1247
1248
QPDFWriter::Members&
1249
QPDFWriter::Members::write(std::integral auto val)
1250
1.11M
{
1251
1.11M
    pipeline->write(std::to_string(val));
1252
1.11M
    return *this;
1253
1.11M
}
_ZN10QPDFWriter7Members5writeITkNSt3__18integralEiEERS0_T_
Line
Count
Source
1250
839k
{
1251
839k
    pipeline->write(std::to_string(val));
1252
839k
    return *this;
1253
839k
}
_ZN10QPDFWriter7Members5writeITkNSt3__18integralExEERS0_T_
Line
Count
Source
1250
199k
{
1251
199k
    pipeline->write(std::to_string(val));
1252
199k
    return *this;
1253
199k
}
_ZN10QPDFWriter7Members5writeITkNSt3__18integralEmEERS0_T_
Line
Count
Source
1250
64.6k
{
1251
64.6k
    pipeline->write(std::to_string(val));
1252
64.6k
    return *this;
1253
64.6k
}
_ZN10QPDFWriter7Members5writeITkNSt3__18integralEjEERS0_T_
Line
Count
Source
1250
7.49k
{
1251
7.49k
    pipeline->write(std::to_string(val));
1252
7.49k
    return *this;
1253
7.49k
}
1254
1255
QPDFWriter::Members&
1256
QPDFWriter::Members::write(size_t count, char c)
1257
24.5k
{
1258
24.5k
    pipeline->write(count, c);
1259
24.5k
    return *this;
1260
24.5k
}
1261
1262
QPDFWriter::Members&
1263
QPDFWriter::Members::write_name(std::string const& str)
1264
856k
{
1265
856k
    pipeline->write(Name::normalize(str));
1266
856k
    return *this;
1267
856k
}
1268
1269
QPDFWriter::Members&
1270
QPDFWriter::Members::write_string(std::string const& str, bool force_binary)
1271
106k
{
1272
106k
    pipeline->write(QPDF_String(str).unparse(force_binary));
1273
106k
    return *this;
1274
106k
}
1275
1276
template <typename... Args>
1277
QPDFWriter::Members&
1278
QPDFWriter::Members::write_qdf(Args&&... args)
1279
602k
{
1280
602k
    if (qdf_mode) {
1281
0
        pipeline->write(std::forward<Args>(args)...);
1282
0
    }
1283
602k
    return *this;
1284
602k
}
QPDFWriter::Members& QPDFWriter::Members::write_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1279
495k
{
1280
495k
    if (qdf_mode) {
1281
0
        pipeline->write(std::forward<Args>(args)...);
1282
0
    }
1283
495k
    return *this;
1284
495k
}
QPDFWriter::Members& QPDFWriter::Members::write_qdf<char const (&) [3]>(char const (&) [3])
Line
Count
Source
1279
61.6k
{
1280
61.6k
    if (qdf_mode) {
1281
0
        pipeline->write(std::forward<Args>(args)...);
1282
0
    }
1283
61.6k
    return *this;
1284
61.6k
}
QPDFWriter::Members& QPDFWriter::Members::write_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1279
30.4k
{
1280
30.4k
    if (qdf_mode) {
1281
0
        pipeline->write(std::forward<Args>(args)...);
1282
0
    }
1283
30.4k
    return *this;
1284
30.4k
}
QPDFWriter::Members& QPDFWriter::Members::write_qdf<char const (&) [11]>(char const (&) [11])
Line
Count
Source
1279
15.4k
{
1280
15.4k
    if (qdf_mode) {
1281
0
        pipeline->write(std::forward<Args>(args)...);
1282
0
    }
1283
15.4k
    return *this;
1284
15.4k
}
1285
1286
template <typename... Args>
1287
QPDFWriter::Members&
1288
QPDFWriter::Members::write_no_qdf(Args&&... args)
1289
193k
{
1290
193k
    if (!qdf_mode) {
1291
193k
        pipeline->write(std::forward<Args>(args)...);
1292
193k
    }
1293
193k
    return *this;
1294
193k
}
QPDFWriter::Members& QPDFWriter::Members::write_no_qdf<char const (&) [2]>(char const (&) [2])
Line
Count
Source
1289
163k
{
1290
163k
    if (!qdf_mode) {
1291
163k
        pipeline->write(std::forward<Args>(args)...);
1292
163k
    }
1293
163k
    return *this;
1294
163k
}
QPDFWriter::Members& QPDFWriter::Members::write_no_qdf<char const (&) [4]>(char const (&) [4])
Line
Count
Source
1289
30.4k
{
1290
30.4k
    if (!qdf_mode) {
1291
30.4k
        pipeline->write(std::forward<Args>(args)...);
1292
30.4k
    }
1293
30.4k
    return *this;
1294
30.4k
}
1295
1296
void
1297
QPDFWriter::Members::adjustAESStreamLength(size_t& length)
1298
50.1k
{
1299
50.1k
    if (encryption && !cur_data_key.empty() && encrypt_use_aes) {
1300
        // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16.  It will
1301
        // also be prepended by 16 bits of random data.
1302
50.1k
        length += 32 - (length & 0xf);
1303
50.1k
    }
1304
50.1k
}
1305
1306
QPDFWriter::Members&
1307
QPDFWriter::Members::write_encrypted(std::string_view str)
1308
49.8k
{
1309
49.8k
    if (!(encryption && !cur_data_key.empty())) {
1310
0
        write(str);
1311
49.8k
    } else if (encrypt_use_aes) {
1312
49.8k
        write(pl::pipe<Pl_AES_PDF>(str, true, cur_data_key));
1313
49.8k
    } else {
1314
0
        write(pl::pipe<Pl_RC4>(str, cur_data_key));
1315
0
    }
1316
1317
49.8k
    return *this;
1318
49.8k
}
1319
1320
void
1321
QPDFWriter::Members::computeDeterministicIDData()
1322
0
{
1323
0
    if (!id2.empty()) {
1324
        // Can't happen in the code
1325
0
        throw std::logic_error(
1326
0
            "Deterministic ID computation enabled after ID generation has already occurred.");
1327
0
    }
1328
0
    qpdf_assert_debug(deterministic_id_data.empty());
1329
0
    deterministic_id_data = pipeline_stack.hex_digest();
1330
0
}
1331
1332
int
1333
QPDFWriter::Members::openObject(int objid)
1334
271k
{
1335
271k
    if (objid == 0) {
1336
0
        objid = next_objid++;
1337
0
    }
1338
271k
    new_obj[objid].xref = QPDFXRefEntry(pipeline->getCount());
1339
271k
    write(objid).write(" 0 obj\n");
1340
271k
    return objid;
1341
271k
}
1342
1343
void
1344
QPDFWriter::Members::closeObject(int objid)
1345
270k
{
1346
    // Write a newline before endobj as it makes the file easier to repair.
1347
270k
    write("\nendobj\n").write_qdf("\n");
1348
270k
    auto& no = new_obj[objid];
1349
270k
    no.length = pipeline->getCount() - no.xref.getOffset();
1350
270k
}
1351
1352
void
1353
QPDFWriter::Members::assignCompressedObjectNumbers(QPDFObjGen og)
1354
133k
{
1355
133k
    int objid = og.getObj();
1356
133k
    if (og.getGen() != 0 || !object_stream_to_objects.contains(objid)) {
1357
        // This is not an object stream.
1358
130k
        return;
1359
130k
    }
1360
1361
    // Reserve numbers for the objects that belong to this object stream.
1362
42.8k
    for (auto const& iter: object_stream_to_objects[objid]) {
1363
42.8k
        obj[iter].renumber = next_objid++;
1364
42.8k
    }
1365
3.33k
}
1366
1367
void
1368
QPDFWriter::Members::enqueueObject(QPDFObjectHandle object)
1369
133k
{
1370
133k
    if (object.isIndirect()) {
1371
        // This owner check can only be done for indirect objects. It is possible for a direct
1372
        // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle from
1373
        // one file was insert into another file without copying. Doing that is safe even if the
1374
        // original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from its owner.
1375
133k
        if (object.getOwningQPDF() != &pdf) {
1376
0
            throw std::logic_error(
1377
0
                "QPDFObjectHandle from different QPDF found while writing.  Use "
1378
0
                "QPDF::copyForeignObject to add objects from another file.");
1379
0
        }
1380
1381
133k
        if (qdf_mode && object.isStreamOfType("/XRef")) {
1382
            // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so
1383
            // will confuse fix-qdf, which expects to see only one XRef stream at the end of the
1384
            // file. This case can occur when creating a QDF from a file with object streams when
1385
            // preserving unreferenced objects since the old cross reference streams are not
1386
            // actually referenced by object number.
1387
0
            return;
1388
0
        }
1389
1390
133k
        QPDFObjGen og = object.getObjGen();
1391
133k
        auto& o = obj[og];
1392
1393
133k
        if (o.renumber == 0) {
1394
131k
            if (o.object_stream > 0) {
1395
                // This is in an object stream.  Don't process it here.  Instead, enqueue the object
1396
                // stream.  Object streams always have generation 0.
1397
                // Detect loops by storing invalid object ID -1, which will get overwritten later.
1398
31
                o.renumber = -1;
1399
31
                enqueueObject(pdf.getObject(o.object_stream, 0));
1400
131k
            } else {
1401
131k
                object_queue.emplace_back(object);
1402
131k
                o.renumber = next_objid++;
1403
1404
131k
                if (og.getGen() == 0 && object_stream_to_objects.contains(og.getObj())) {
1405
                    // For linearized files, uncompressed objects go at end, and we take care of
1406
                    // assigning numbers to them elsewhere.
1407
3.21k
                    if (!linearized) {
1408
0
                        assignCompressedObjectNumbers(og);
1409
0
                    }
1410
128k
                } else if (!direct_stream_lengths && object.isStream()) {
1411
                    // reserve next object ID for length
1412
0
                    ++next_objid;
1413
0
                }
1414
131k
            }
1415
131k
        } else if (o.renumber == -1) {
1416
            // This can happen if a specially constructed file indicates that an object stream is
1417
            // inside itself.
1418
1
        }
1419
133k
        return;
1420
133k
    } else if (!linearized) {
1421
0
        if (object.isArray()) {
1422
0
            for (auto& item: object.as_array()) {
1423
0
                enqueueObject(item);
1424
0
            }
1425
0
        } else if (auto d = object.as_dictionary()) {
1426
0
            for (auto const& item: d) {
1427
0
                if (!item.second.null()) {
1428
0
                    enqueueObject(item.second);
1429
0
                }
1430
0
            }
1431
0
        }
1432
207
    } else {
1433
        // ignore
1434
207
    }
1435
133k
}
1436
1437
void
1438
QPDFWriter::Members::unparseChild(QPDFObjectHandle const& child, size_t level, int flags)
1439
1.95M
{
1440
1.95M
    if (!linearized) {
1441
0
        enqueueObject(child);
1442
0
    }
1443
1.95M
    if (child.isIndirect()) {
1444
397k
        write(obj[child].renumber).write(" 0 R");
1445
1.56M
    } else {
1446
1.56M
        unparseObject(child, level, flags);
1447
1.56M
    }
1448
1.95M
}
1449
1450
void
1451
QPDFWriter::Members::writeTrailer(
1452
    trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass)
1453
30.4k
{
1454
30.4k
    auto trailer = trimmed_trailer();
1455
30.4k
    if (xref_stream) {
1456
2.49k
        cur_data_key.clear();
1457
27.9k
    } else {
1458
27.9k
        write("trailer <<");
1459
27.9k
    }
1460
30.4k
    write_qdf("\n");
1461
30.4k
    if (which == t_lin_second) {
1462
15.0k
        write(" /Size ").write(size);
1463
15.4k
    } else {
1464
39.0k
        for (auto const& [key, value]: trailer) {
1465
39.0k
            if (value.null()) {
1466
9.00k
                continue;
1467
9.00k
            }
1468
30.0k
            write_qdf("  ").write_no_qdf(" ").write_name(key).write(" ");
1469
30.0k
            if (key == "/Size") {
1470
4.42k
                write(size);
1471
4.42k
                if (which == t_lin_first) {
1472
4.42k
                    write(" /Prev ");
1473
4.42k
                    qpdf_offset_t pos = pipeline->getCount();
1474
4.42k
                    write(prev).write(QIntC::to_size(pos - pipeline->getCount() + 21), ' ');
1475
4.42k
                }
1476
25.6k
            } else {
1477
25.6k
                unparseChild(value, 1, 0);
1478
25.6k
            }
1479
30.0k
            write_qdf("\n");
1480
30.0k
        }
1481
15.4k
    }
1482
1483
    // Write ID
1484
30.4k
    write_qdf(" ").write(" /ID [");
1485
30.4k
    if (linearization_pass == 1) {
1486
15.4k
        std::string original_id1 = getOriginalID1();
1487
15.4k
        if (original_id1.empty()) {
1488
13.1k
            write("<00000000000000000000000000000000>");
1489
13.1k
        } else {
1490
            // Write a string of zeroes equal in length to the representation of the original ID.
1491
            // While writing the original ID would have the same number of bytes, it would cause a
1492
            // change to the deterministic ID generated by older versions of the software that
1493
            // hard-coded the length of the ID to 16 bytes.
1494
2.26k
            size_t len = QPDF_String(original_id1).unparse(true).length() - 2;
1495
2.26k
            write("<").write(len, '0').write(">");
1496
2.26k
        }
1497
15.4k
        write("<00000000000000000000000000000000>");
1498
15.4k
    } else {
1499
14.9k
        if (linearization_pass == 0 && deterministic_id) {
1500
0
            computeDeterministicIDData();
1501
0
        }
1502
14.9k
        generateID(encryption.get());
1503
14.9k
        write_string(id1, true).write_string(id2, true);
1504
14.9k
    }
1505
30.4k
    write("]");
1506
1507
30.4k
    if (which != t_lin_second) {
1508
        // Write reference to encryption dictionary
1509
15.4k
        if (encryption) {
1510
15.4k
            write(" /Encrypt ").write(encryption_dict_objid).write(" 0 R");
1511
15.4k
        }
1512
15.4k
    }
1513
1514
30.4k
    write_qdf("\n>>").write_no_qdf(" >>");
1515
30.4k
}
1516
1517
bool
1518
QPDFWriter::Members::will_filter_stream(QPDFObjectHandle stream)
1519
21.2k
{
1520
21.2k
    std::string s;
1521
21.2k
    [[maybe_unused]] auto [filter, ignore1, ignore2] = will_filter_stream(stream, &s);
1522
21.2k
    return filter;
1523
21.2k
}
1524
1525
std::tuple<const bool, const bool, const bool>
1526
QPDFWriter::Members::will_filter_stream(QPDFObjectHandle stream, std::string* stream_data)
1527
59.3k
{
1528
59.3k
    const bool is_root_metadata = stream.isRootMetadata();
1529
59.3k
    bool filter = false;
1530
59.3k
    auto decode_level = stream_decode_level;
1531
59.3k
    int encode_flags = 0;
1532
59.3k
    Dictionary stream_dict = stream.getDict();
1533
1534
59.3k
    if (stream.getFilterOnWrite()) {
1535
44.9k
        filter = stream.isDataModified() || compress_streams || decode_level != qpdf_dl_none;
1536
44.9k
        if (compress_streams) {
1537
            // Don't filter if the stream is already compressed with FlateDecode. This way we don't
1538
            // make it worse if the original file used a better Flate algorithm, and we don't spend
1539
            // time and CPU cycles uncompressing and recompressing stuff. This can be overridden
1540
            // with setRecompressFlate(true).
1541
44.9k
            Name Filter = stream_dict["/Filter"];
1542
44.9k
            if (Filter && !recompress_flate && !stream.isDataModified() &&
1543
19.0k
                (Filter == "/FlateDecode" || Filter == "/Fl")) {
1544
11.3k
                filter = false;
1545
11.3k
            }
1546
44.9k
        }
1547
44.9k
        if (is_root_metadata && (!encryption || !encryption->getEncryptMetadata())) {
1548
0
            filter = true;
1549
0
            decode_level = qpdf_dl_all;
1550
44.9k
        } else if (normalize_content && normalized_streams.contains(stream)) {
1551
0
            encode_flags = qpdf_ef_normalize;
1552
0
            filter = true;
1553
44.9k
        } else if (filter && compress_streams) {
1554
33.5k
            encode_flags = qpdf_ef_compress;
1555
33.5k
        }
1556
44.9k
    }
1557
1558
    // Disable compression for empty streams to improve compatibility
1559
59.3k
    if (Integer(stream_dict["/Length"]) == 0) {
1560
2.53k
        filter = true;
1561
2.53k
        encode_flags = 0;
1562
2.53k
    }
1563
1564
68.6k
    for (bool first_attempt: {true, false}) {
1565
68.6k
        auto pp_stream_data =
1566
68.6k
            stream_data ? pipeline_stack.activate(*stream_data) : pipeline_stack.activate(true);
1567
1568
68.6k
        try {
1569
68.6k
            if (stream.pipeStreamData(
1570
68.6k
                    pipeline,
1571
68.6k
                    filter ? encode_flags : 0,
1572
68.6k
                    filter ? decode_level : qpdf_dl_none,
1573
68.6k
                    false,
1574
68.6k
                    first_attempt)) {
1575
25.9k
                return {true, encode_flags & qpdf_ef_compress, is_root_metadata};
1576
25.9k
            }
1577
42.6k
            if (!filter) {
1578
33.2k
                break;
1579
33.2k
            }
1580
42.6k
        } catch (std::runtime_error& e) {
1581
92
            if (!(filter && first_attempt)) {
1582
19
                throw std::runtime_error(
1583
19
                    "error while getting stream data for " + stream.unparse() + ": " + e.what());
1584
19
            }
1585
73
            stream.warn("error while getting stream data: "s + e.what());
1586
73
            stream.warn("qpdf will attempt to write the damaged stream unchanged");
1587
73
        }
1588
        // Try again
1589
9.29k
        filter = false;
1590
9.29k
        stream.setFilterOnWrite(false);
1591
9.29k
        if (stream_data) {
1592
9.29k
            stream_data->clear();
1593
9.29k
        }
1594
9.29k
    }
1595
33.2k
    return {false, false, is_root_metadata};
1596
59.3k
}
1597
1598
void
1599
QPDFWriter::Members::unparseObject(
1600
    QPDFObjectHandle object, size_t level, int flags, size_t stream_length, bool compress)
1601
1.89M
{
1602
1.89M
    QPDFObjGen old_og = object.getObjGen();
1603
1.89M
    int child_flags = flags & ~f_stream;
1604
    // For non-qdf, "indent" and "indent_large" are a single space between tokens. For qdf, they
1605
    // include the preceding newline.
1606
1.89M
    std::string indent_large = " ";
1607
1.89M
    if (qdf_mode) {
1608
0
        indent_large.append(2 * (level + 1), ' ');
1609
0
        indent_large[0] = '\n';
1610
0
    }
1611
1.89M
    std::string_view indent{indent_large.data(), qdf_mode ? indent_large.size() - 2 : 1};
1612
1613
1.89M
    if (auto const tc = object.getTypeCode(); tc == ::ot_array) {
1614
        // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the
1615
        // [ in the /H key of the linearization parameter dictionary.  We'll do this unconditionally
1616
        // for all arrays because it looks nicer and doesn't make the files that much bigger.
1617
115k
        write("[");
1618
1.10M
        for (auto const& item: object.as_array()) {
1619
1.10M
            write(indent_large);
1620
1.10M
            unparseChild(item, level + 1, child_flags);
1621
1.10M
        }
1622
115k
        write(indent).write("]");
1623
1.77M
    } else if (tc == ::ot_dictionary) {
1624
        // Handle special cases for specific dictionaries.
1625
1626
282k
        if (old_og == root_og) {
1627
            // Extensions dictionaries.
1628
1629
            // We have one of several cases:
1630
            //
1631
            // * We need ADBE
1632
            //    - We already have Extensions
1633
            //       - If it has the right ADBE, preserve it
1634
            //       - Otherwise, replace ADBE
1635
            //    - We don't have Extensions: create one from scratch
1636
            // * We don't want ADBE
1637
            //    - We already have Extensions
1638
            //       - If it only has ADBE, remove it
1639
            //       - If it has other things, keep those and remove ADBE
1640
            //    - We have no extensions: no action required
1641
            //
1642
            // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE
1643
            // dictionary, so we can modify in place.
1644
1645
15.3k
            auto extensions = object.getKey("/Extensions");
1646
15.3k
            const bool has_extensions = extensions.isDictionary();
1647
15.3k
            const bool need_extensions_adbe = final_extension_level > 0;
1648
1649
15.3k
            if (has_extensions || need_extensions_adbe) {
1650
                // Make a shallow copy of this object so we can modify it safely without affecting
1651
                // the original. This code has logic to skip certain keys in agreement with
1652
                // prepareFileForWrite and with skip_stream_parameters so that replacing them
1653
                // doesn't leave unreferenced objects in the output. We can use unsafeShallowCopy
1654
                // here because all we are doing is removing or replacing top-level keys.
1655
15.0k
                object = object.unsafeShallowCopy();
1656
15.0k
                if (!has_extensions) {
1657
14.3k
                    extensions = QPDFObjectHandle();
1658
14.3k
                }
1659
1660
15.0k
                const bool have_extensions_adbe = extensions && extensions.hasKey("/ADBE");
1661
15.0k
                const bool have_extensions_other =
1662
15.0k
                    extensions && extensions.getKeys().size() > (have_extensions_adbe ? 1u : 0u);
1663
1664
15.0k
                if (need_extensions_adbe) {
1665
15.0k
                    if (!(have_extensions_other || have_extensions_adbe)) {
1666
                        // We need Extensions and don't have it.  Create it here.
1667
14.4k
                        QTC::TC("qpdf", "QPDFWriter create Extensions", qdf_mode ? 0 : 1);
1668
14.4k
                        extensions = object.replaceKeyAndGetNew(
1669
14.4k
                            "/Extensions", QPDFObjectHandle::newDictionary());
1670
14.4k
                    }
1671
15.0k
                } else if (!have_extensions_other) {
1672
                    // We have Extensions dictionary and don't want one.
1673
13
                    if (have_extensions_adbe) {
1674
8
                        QTC::TC("qpdf", "QPDFWriter remove existing Extensions");
1675
8
                        object.removeKey("/Extensions");
1676
8
                        extensions = QPDFObjectHandle(); // uninitialized
1677
8
                    }
1678
13
                }
1679
1680
15.0k
                if (extensions) {
1681
15.0k
                    QTC::TC("qpdf", "QPDFWriter preserve Extensions");
1682
15.0k
                    QPDFObjectHandle adbe = extensions.getKey("/ADBE");
1683
15.0k
                    if (adbe.isDictionary() &&
1684
460
                        adbe.getKey("/BaseVersion").isNameAndEquals("/" + final_pdf_version) &&
1685
306
                        adbe.getKey("/ExtensionLevel").isInteger() &&
1686
297
                        (adbe.getKey("/ExtensionLevel").getIntValue() == final_extension_level)) {
1687
14.7k
                    } else {
1688
14.7k
                        if (need_extensions_adbe) {
1689
14.7k
                            extensions.replaceKey(
1690
14.7k
                                "/ADBE",
1691
14.7k
                                QPDFObjectHandle::parse(
1692
14.7k
                                    "<< /BaseVersion /" + final_pdf_version + " /ExtensionLevel " +
1693
14.7k
                                    std::to_string(final_extension_level) + " >>"));
1694
14.7k
                        } else {
1695
24
                            extensions.removeKey("/ADBE");
1696
24
                        }
1697
14.7k
                    }
1698
15.0k
                }
1699
15.0k
            }
1700
15.3k
        }
1701
1702
        // Stream dictionaries.
1703
1704
282k
        if (flags & f_stream) {
1705
            // Suppress /Length since we will write it manually
1706
1707
            // Make a shallow copy of this object so we can modify it safely without affecting the
1708
            // original. This code has logic to skip certain keys in agreement with
1709
            // prepareFileForWrite and with skip_stream_parameters so that replacing them doesn't
1710
            // leave unreferenced objects in the output. We can use unsafeShallowCopy here because
1711
            // all we are doing is removing or replacing top-level keys.
1712
37.9k
            object = object.unsafeShallowCopy();
1713
1714
37.9k
            object.removeKey("/Length");
1715
1716
            // If /DecodeParms is an empty list, remove it.
1717
37.9k
            if (object.getKey("/DecodeParms").empty()) {
1718
35.8k
                object.removeKey("/DecodeParms");
1719
35.8k
            }
1720
1721
37.9k
            if (flags & f_filtered) {
1722
                // We will supply our own filter and decode parameters.
1723
16.6k
                object.removeKey("/Filter");
1724
16.6k
                object.removeKey("/DecodeParms");
1725
21.3k
            } else {
1726
                // Make sure, no matter what else we have, that we don't have /Crypt in the output
1727
                // filters.
1728
21.3k
                QPDFObjectHandle filter = object.getKey("/Filter");
1729
21.3k
                QPDFObjectHandle decode_parms = object.getKey("/DecodeParms");
1730
21.3k
                if (filter.isOrHasName("/Crypt")) {
1731
378
                    if (filter.isName()) {
1732
28
                        object.removeKey("/Filter");
1733
28
                        object.removeKey("/DecodeParms");
1734
350
                    } else {
1735
350
                        int idx = 0;
1736
16.4k
                        for (auto const& item: filter.as_array()) {
1737
16.4k
                            if (item.isNameAndEquals("/Crypt")) {
1738
                                // If filter is an array, then the code in QPDF_Stream has already
1739
                                // verified that DecodeParms and Filters are arrays of the same
1740
                                // length, but if they weren't for some reason, eraseItem does type
1741
                                // and bounds checking. Fuzzing tells us that this can actually
1742
                                // happen.
1743
350
                                filter.eraseItem(idx);
1744
350
                                decode_parms.eraseItem(idx);
1745
350
                                break;
1746
350
                            }
1747
16.1k
                            ++idx;
1748
16.1k
                        }
1749
350
                    }
1750
378
                }
1751
21.3k
            }
1752
37.9k
        }
1753
1754
282k
        write("<<");
1755
1756
989k
        for (auto const& [key, value]: object.as_dictionary()) {
1757
989k
            if (!value.null()) {
1758
826k
                write(indent_large).write_name(key).write(" ");
1759
826k
                if (key == "/Contents" && object.isDictionaryOfType("/Sig") &&
1760
32
                    object.hasKey("/ByteRange")) {
1761
20
                    QTC::TC("qpdf", "QPDFWriter no encryption sig contents");
1762
20
                    unparseChild(value, level + 1, child_flags | f_hex_string | f_no_encryption);
1763
826k
                } else {
1764
826k
                    unparseChild(value, level + 1, child_flags);
1765
826k
                }
1766
826k
            }
1767
989k
        }
1768
1769
282k
        if (flags & f_stream) {
1770
37.7k
            write(indent_large).write("/Length ");
1771
1772
37.7k
            if (direct_stream_lengths) {
1773
37.7k
                write(stream_length);
1774
37.7k
            } else {
1775
0
                write(cur_stream_length_id).write(" 0 R");
1776
0
            }
1777
37.7k
            if (compress && (flags & f_filtered)) {
1778
16.4k
                write(indent_large).write("/Filter /FlateDecode");
1779
16.4k
            }
1780
37.7k
        }
1781
1782
282k
        write(indent).write(">>");
1783
1.49M
    } else if (tc == ::ot_stream) {
1784
        // Write stream data to a buffer.
1785
38.0k
        if (!direct_stream_lengths) {
1786
0
            cur_stream_length_id = obj[old_og].renumber + 1;
1787
0
        }
1788
1789
38.0k
        flags |= f_stream;
1790
38.0k
        std::string stream_data;
1791
38.0k
        auto [filter, compress_stream, is_root_metadata] = will_filter_stream(object, &stream_data);
1792
38.0k
        if (filter) {
1793
16.6k
            flags |= f_filtered;
1794
16.6k
        }
1795
38.0k
        QPDFObjectHandle stream_dict = object.getDict();
1796
1797
38.0k
        cur_stream_length = stream_data.size();
1798
38.0k
        if (is_root_metadata && encryption && !encryption->getEncryptMetadata()) {
1799
            // Don't encrypt stream data for the metadata stream
1800
0
            cur_data_key.clear();
1801
0
        }
1802
38.0k
        adjustAESStreamLength(cur_stream_length);
1803
38.0k
        unparseObject(stream_dict, 0, flags, cur_stream_length, compress_stream);
1804
38.0k
        char last_char = stream_data.empty() ? '\0' : stream_data.back();
1805
38.0k
        write("\nstream\n").write_encrypted(stream_data);
1806
38.0k
        added_newline = newline_before_endstream || (qdf_mode && last_char != '\n');
1807
38.0k
        write(added_newline ? "\nendstream" : "endstream");
1808
1.45M
    } else if (tc == ::ot_string) {
1809
43.3k
        std::string val;
1810
43.3k
        if (encryption && !(flags & f_in_ostream) && !(flags & f_no_encryption) &&
1811
30.1k
            !cur_data_key.empty()) {
1812
25.9k
            val = object.getStringValue();
1813
25.9k
            if (encrypt_use_aes) {
1814
25.9k
                Pl_Buffer bufpl("encrypted string");
1815
25.9k
                Pl_AES_PDF pl("aes encrypt string", &bufpl, true, cur_data_key);
1816
25.9k
                pl.writeString(val);
1817
25.9k
                pl.finish();
1818
25.9k
                val = QPDF_String(bufpl.getString()).unparse(true);
1819
25.9k
            } else {
1820
0
                auto tmp_ph = QUtil::make_unique_cstr(val);
1821
0
                char* tmp = tmp_ph.get();
1822
0
                size_t vlen = val.length();
1823
0
                RC4 rc4(
1824
0
                    QUtil::unsigned_char_pointer(cur_data_key),
1825
0
                    QIntC::to_int(cur_data_key.length()));
1826
0
                auto data = QUtil::unsigned_char_pointer(tmp);
1827
0
                rc4.process(data, vlen, data);
1828
0
                val = QPDF_String(std::string(tmp, vlen)).unparse();
1829
0
            }
1830
25.9k
        } else if (flags & f_hex_string) {
1831
20
            val = QPDF_String(object.getStringValue()).unparse(true);
1832
17.3k
        } else {
1833
17.3k
            val = object.unparseResolved();
1834
17.3k
        }
1835
43.3k
        write(val);
1836
1.41M
    } else {
1837
1.41M
        write(object.unparseResolved());
1838
1.41M
    }
1839
1.89M
}
1840
1841
void
1842
QPDFWriter::Members::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj)
1843
9.37k
{
1844
9.37k
    qpdf_assert_debug(first_obj > 0);
1845
9.37k
    bool is_first = true;
1846
9.37k
    auto id = std::to_string(first_obj) + ' ';
1847
138k
    for (auto& offset: offsets) {
1848
138k
        if (is_first) {
1849
9.37k
            is_first = false;
1850
128k
        } else {
1851
128k
            write_qdf("\n").write_no_qdf(" ");
1852
128k
        }
1853
138k
        write(id);
1854
138k
        util::increment(id, 1);
1855
138k
        write(offset);
1856
138k
    }
1857
9.37k
    write("\n");
1858
9.37k
}
1859
1860
void
1861
QPDFWriter::Members::writeObjectStream(QPDFObjectHandle object)
1862
4.69k
{
1863
    // Note: object might be null if this is a place-holder for an object stream that we are
1864
    // generating from scratch.
1865
1866
4.69k
    QPDFObjGen old_og = object.getObjGen();
1867
4.69k
    qpdf_assert_debug(old_og.getGen() == 0);
1868
4.69k
    int old_id = old_og.getObj();
1869
4.69k
    int new_stream_id = obj[old_og].renumber;
1870
1871
4.69k
    std::vector<qpdf_offset_t> offsets;
1872
4.69k
    qpdf_offset_t first = 0;
1873
1874
    // Generate stream itself.  We have to do this in two passes so we can calculate offsets in the
1875
    // first pass.
1876
4.69k
    std::string stream_buffer_pass1;
1877
4.69k
    std::string stream_buffer_pass2;
1878
4.69k
    int first_obj = -1;
1879
4.69k
    const bool compressed = compress_streams && !qdf_mode;
1880
4.69k
    {
1881
        // Pass 1
1882
4.69k
        auto pp_ostream_pass1 = pipeline_stack.activate(stream_buffer_pass1);
1883
1884
4.69k
        int count = -1;
1885
69.0k
        for (auto const& og: object_stream_to_objects[old_id]) {
1886
69.0k
            ++count;
1887
69.0k
            int new_o = obj[og].renumber;
1888
69.0k
            if (first_obj == -1) {
1889
4.69k
                first_obj = new_o;
1890
4.69k
            }
1891
69.0k
            if (qdf_mode) {
1892
0
                write("%% Object stream: object ").write(new_o).write(", index ").write(count);
1893
0
                if (!suppress_original_object_ids) {
1894
0
                    write("; original object ID: ").write(og.getObj());
1895
                    // For compatibility, only write the generation if non-zero.  While object
1896
                    // streams only allow objects with generation 0, if we are generating object
1897
                    // streams, the old object could have a non-zero generation.
1898
0
                    if (og.getGen() != 0) {
1899
0
                        write(" ").write(og.getGen());
1900
0
                    }
1901
0
                }
1902
0
                write("\n");
1903
0
            }
1904
1905
69.0k
            offsets.push_back(pipeline->getCount());
1906
            // To avoid double-counting objects being written in object streams for progress
1907
            // reporting, decrement in pass 1.
1908
69.0k
            indicateProgress(true, false);
1909
1910
69.0k
            QPDFObjectHandle obj_to_write = pdf.getObject(og);
1911
69.0k
            if (obj_to_write.isStream()) {
1912
                // This condition occurred in a fuzz input. Ideally we should block it at parse
1913
                // time, but it's not clear to me how to construct a case for this.
1914
0
                obj_to_write.warn("stream found inside object stream; treating as null");
1915
0
                obj_to_write = QPDFObjectHandle::newNull();
1916
0
            }
1917
69.0k
            writeObject(obj_to_write, count);
1918
1919
69.0k
            new_obj[new_o].xref = QPDFXRefEntry(new_stream_id, count);
1920
69.0k
        }
1921
4.69k
    }
1922
4.69k
    {
1923
        // Adjust offsets to skip over comment before first object
1924
4.69k
        first = offsets.at(0);
1925
69.0k
        for (auto& iter: offsets) {
1926
69.0k
            iter -= first;
1927
69.0k
        }
1928
1929
        // Take one pass at writing pairs of numbers so we can get their size information
1930
4.69k
        {
1931
4.69k
            auto pp_discard = pipeline_stack.activate(true);
1932
4.69k
            writeObjectStreamOffsets(offsets, first_obj);
1933
4.69k
            first += pipeline->getCount();
1934
4.69k
        }
1935
1936
        // Set up a stream to write the stream data into a buffer.
1937
4.69k
        auto pp_ostream = pipeline_stack.activate(stream_buffer_pass2);
1938
1939
4.69k
        writeObjectStreamOffsets(offsets, first_obj);
1940
4.69k
        write(stream_buffer_pass1);
1941
4.69k
        stream_buffer_pass1.clear();
1942
4.69k
        stream_buffer_pass1.shrink_to_fit();
1943
4.69k
        if (compressed) {
1944
4.68k
            stream_buffer_pass2 = pl::pipe<Pl_Flate>(stream_buffer_pass2, Pl_Flate::a_deflate);
1945
4.68k
        }
1946
4.69k
    }
1947
1948
    // Write the object
1949
4.69k
    openObject(new_stream_id);
1950
4.69k
    setDataKey(new_stream_id);
1951
4.69k
    write("<<").write_qdf("\n ").write(" /Type /ObjStm").write_qdf("\n ");
1952
4.69k
    size_t length = stream_buffer_pass2.size();
1953
4.69k
    adjustAESStreamLength(length);
1954
4.69k
    write(" /Length ").write(length).write_qdf("\n ");
1955
4.69k
    if (compressed) {
1956
4.68k
        write(" /Filter /FlateDecode");
1957
4.68k
    }
1958
4.69k
    write(" /N ").write(offsets.size()).write_qdf("\n ").write(" /First ").write(first);
1959
4.69k
    if (!object.null()) {
1960
        // If the original object has an /Extends key, preserve it.
1961
1.54k
        QPDFObjectHandle dict = object.getDict();
1962
1.54k
        QPDFObjectHandle extends = dict.getKey("/Extends");
1963
1.54k
        if (extends.isIndirect()) {
1964
287
            write_qdf("\n ").write(" /Extends ");
1965
287
            unparseChild(extends, 1, f_in_ostream);
1966
287
        }
1967
1.54k
    }
1968
4.69k
    write_qdf("\n").write_no_qdf(" ").write(">>\nstream\n").write_encrypted(stream_buffer_pass2);
1969
4.69k
    if (encryption) {
1970
4.54k
        QTC::TC("qpdf", "QPDFWriter encrypt object stream");
1971
4.54k
    }
1972
4.69k
    write(newline_before_endstream ? "\nendstream" : "endstream");
1973
4.69k
    cur_data_key.clear();
1974
4.69k
    closeObject(new_stream_id);
1975
4.69k
}
1976
1977
void
1978
QPDFWriter::Members::writeObject(QPDFObjectHandle object, int object_stream_index)
1979
299k
{
1980
299k
    QPDFObjGen old_og = object.getObjGen();
1981
1982
299k
    if (object_stream_index == -1 && old_og.getGen() == 0 &&
1983
228k
        object_stream_to_objects.contains(old_og.getObj())) {
1984
4.69k
        writeObjectStream(object);
1985
4.69k
        return;
1986
4.69k
    }
1987
1988
294k
    indicateProgress(false, false);
1989
294k
    auto new_id = obj[old_og].renumber;
1990
294k
    if (qdf_mode) {
1991
0
        if (page_object_to_seq.contains(old_og)) {
1992
0
            write("%% Page ").write(page_object_to_seq[old_og]).write("\n");
1993
0
        }
1994
0
        if (contents_to_page_seq.contains(old_og)) {
1995
0
            write("%% Contents for page ").write(contents_to_page_seq[old_og]).write("\n");
1996
0
        }
1997
0
    }
1998
294k
    if (object_stream_index == -1) {
1999
225k
        if (qdf_mode && !suppress_original_object_ids) {
2000
0
            write("%% Original object ID: ").write(object.getObjGen().unparse(' ')).write("\n");
2001
0
        }
2002
225k
        openObject(new_id);
2003
225k
        setDataKey(new_id);
2004
225k
        unparseObject(object, 0, 0);
2005
225k
        cur_data_key.clear();
2006
225k
        closeObject(new_id);
2007
225k
    } else {
2008
69.0k
        unparseObject(object, 0, f_in_ostream);
2009
69.0k
        write("\n");
2010
69.0k
    }
2011
2012
294k
    if (!direct_stream_lengths && object.isStream()) {
2013
0
        if (qdf_mode) {
2014
0
            if (added_newline) {
2015
0
                write("%QDF: ignore_newline\n");
2016
0
            }
2017
0
        }
2018
0
        openObject(new_id + 1);
2019
0
        write(cur_stream_length);
2020
0
        closeObject(new_id + 1);
2021
0
    }
2022
294k
}
2023
2024
std::string
2025
QPDFWriter::Members::getOriginalID1()
2026
24.6k
{
2027
24.6k
    QPDFObjectHandle trailer = pdf.getTrailer();
2028
24.6k
    if (trailer.hasKey("/ID")) {
2029
3.72k
        return trailer.getKey("/ID").getArrayItem(0).getStringValue();
2030
20.8k
    } else {
2031
20.8k
        return "";
2032
20.8k
    }
2033
24.6k
}
2034
2035
void
2036
QPDFWriter::Members::generateID(bool encrypted)
2037
24.1k
{
2038
    // Generate the ID lazily so that we can handle the user's preference to use static or
2039
    // deterministic ID generation.
2040
2041
24.1k
    if (!id2.empty()) {
2042
14.9k
        return;
2043
14.9k
    }
2044
2045
9.18k
    QPDFObjectHandle trailer = pdf.getTrailer();
2046
2047
9.18k
    std::string result;
2048
2049
9.18k
    if (static_id) {
2050
        // For test suite use only...
2051
9.18k
        static unsigned char tmp[] = {
2052
9.18k
            0x31,
2053
9.18k
            0x41,
2054
9.18k
            0x59,
2055
9.18k
            0x26,
2056
9.18k
            0x53,
2057
9.18k
            0x58,
2058
9.18k
            0x97,
2059
9.18k
            0x93,
2060
9.18k
            0x23,
2061
9.18k
            0x84,
2062
9.18k
            0x62,
2063
9.18k
            0x64,
2064
9.18k
            0x33,
2065
9.18k
            0x83,
2066
9.18k
            0x27,
2067
9.18k
            0x95,
2068
9.18k
            0x00};
2069
9.18k
        result = reinterpret_cast<char*>(tmp);
2070
9.18k
    } else {
2071
        // The PDF specification has guidelines for creating IDs, but it states clearly that the
2072
        // only thing that's really important is that it is very likely to be unique.  We can't
2073
        // really follow the guidelines in the spec exactly because we haven't written the file yet.
2074
        // This scheme should be fine though.  The deterministic ID case uses a digest of a
2075
        // sufficient portion of the file's contents such no two non-matching files would match in
2076
        // the subsets used for this computation.  Note that we explicitly omit the filename from
2077
        // the digest calculation for deterministic ID so that the same file converted with qpdf, in
2078
        // that case, would have the same ID regardless of the output file's name.
2079
2080
0
        std::string seed;
2081
0
        if (deterministic_id) {
2082
0
            if (encrypted) {
2083
0
                throw std::runtime_error(
2084
0
                    "QPDFWriter: unable to generated a deterministic ID because the file to be "
2085
0
                    "written is encrypted (even though the file may not require a password)");
2086
0
            }
2087
0
            if (deterministic_id_data.empty()) {
2088
0
                throw std::logic_error(
2089
0
                    "INTERNAL ERROR: QPDFWriter::generateID has no data for deterministic ID");
2090
0
            }
2091
0
            seed += deterministic_id_data;
2092
0
        } else {
2093
0
            seed += std::to_string(QUtil::get_current_time());
2094
0
            seed += filename;
2095
0
            seed += " ";
2096
0
        }
2097
0
        seed += " QPDF ";
2098
0
        if (trailer.hasKey("/Info")) {
2099
0
            for (auto const& item: trailer.getKey("/Info").as_dictionary()) {
2100
0
                if (item.second.isString()) {
2101
0
                    seed += " ";
2102
0
                    seed += item.second.getStringValue();
2103
0
                }
2104
0
            }
2105
0
        }
2106
2107
0
        MD5 md5;
2108
0
        md5.encodeString(seed.c_str());
2109
0
        MD5::Digest digest;
2110
0
        md5.digest(digest);
2111
0
        result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest));
2112
0
    }
2113
2114
    // If /ID already exists, follow the spec: use the original first word and generate a new second
2115
    // word.  Otherwise, we'll use the generated ID for both.
2116
2117
9.18k
    id2 = result;
2118
    // Note: keep /ID from old file even if --static-id was given.
2119
9.18k
    id1 = getOriginalID1();
2120
9.18k
    if (id1.empty()) {
2121
7.82k
        id1 = id2;
2122
7.82k
    }
2123
9.18k
}
2124
2125
void
2126
QPDFWriter::Members::initializeSpecialStreams()
2127
0
{
2128
    // Mark all page content streams in case we are filtering or normalizing.
2129
0
    std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
2130
0
    int num = 0;
2131
0
    for (auto& page: pages) {
2132
0
        page_object_to_seq[page.getObjGen()] = ++num;
2133
0
        QPDFObjectHandle contents = page.getKey("/Contents");
2134
0
        std::vector<QPDFObjGen> contents_objects;
2135
0
        if (contents.isArray()) {
2136
0
            int n = static_cast<int>(contents.size());
2137
0
            for (int i = 0; i < n; ++i) {
2138
0
                contents_objects.push_back(contents.getArrayItem(i).getObjGen());
2139
0
            }
2140
0
        } else if (contents.isStream()) {
2141
0
            contents_objects.push_back(contents.getObjGen());
2142
0
        }
2143
2144
0
        for (auto const& c: contents_objects) {
2145
0
            contents_to_page_seq[c] = num;
2146
0
            normalized_streams.insert(c);
2147
0
        }
2148
0
    }
2149
0
}
2150
2151
void
2152
QPDFWriter::Members::preserveObjectStreams()
2153
9.16k
{
2154
9.16k
    auto const& xref = getXRefTable();
2155
    // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
2156
    // streams out of old objects that have generation numbers greater than zero. However in an
2157
    // existing PDF, all object stream objects and all objects in them must have generation 0
2158
    // because the PDF spec does not provide any way to do otherwise. This code filters out objects
2159
    // that are not allowed to be in object streams. In addition to removing objects that were
2160
    // erroneously included in object streams in the source PDF, it also prevents unreferenced
2161
    // objects from being included.
2162
9.16k
    auto end = xref.cend();
2163
9.16k
    obj.streams_empty = true;
2164
9.16k
    if (preserve_unreferenced_objects) {
2165
0
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
2166
0
            if (iter->second.getType() == 2) {
2167
                // Pdf contains object streams.
2168
0
                obj.streams_empty = false;
2169
0
                obj[iter->first].object_stream = iter->second.getObjStreamNumber();
2170
0
            }
2171
0
        }
2172
9.16k
    } else {
2173
        // Start by scanning for first compressed object in case we don't have any object streams to
2174
        // process.
2175
97.0k
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
2176
89.1k
            if (iter->second.getType() == 2) {
2177
                // Pdf contains object streams.
2178
1.29k
                obj.streams_empty = false;
2179
1.29k
                auto eligible = getCompressibleObjSet();
2180
                // The object pointed to by iter may be a previous generation, in which case it is
2181
                // removed by getCompressibleObjSet. We need to restart the loop (while the object
2182
                // table may contain multiple generations of an object).
2183
244k
                for (iter = xref.cbegin(); iter != end; ++iter) {
2184
243k
                    if (iter->second.getType() == 2) {
2185
220k
                        auto id = static_cast<size_t>(iter->first.getObj());
2186
220k
                        if (id < eligible.size() && eligible[id]) {
2187
45.5k
                            obj[iter->first].object_stream = iter->second.getObjStreamNumber();
2188
175k
                        } else {
2189
175k
                            QTC::TC("qpdf", "QPDFWriter exclude from object stream");
2190
175k
                        }
2191
220k
                    }
2192
243k
                }
2193
1.29k
                return;
2194
1.29k
            }
2195
89.1k
        }
2196
9.16k
    }
2197
9.16k
}
2198
2199
void
2200
QPDFWriter::Members::generateObjectStreams()
2201
0
{
2202
    // Basic strategy: make a list of objects that can go into an object stream.  Then figure out
2203
    // how many object streams are needed so that we can distribute objects approximately evenly
2204
    // without having any object stream exceed 100 members.  We don't have to worry about linearized
2205
    // files here -- if the file is linearized, we take care of excluding things that aren't allowed
2206
    // here later.
2207
2208
    // This code doesn't do anything with /Extends.
2209
2210
0
    std::vector<QPDFObjGen> eligible = getCompressibleObjGens();
2211
0
    size_t n_object_streams = (eligible.size() + 99U) / 100U;
2212
2213
0
    initializeTables(2U * n_object_streams);
2214
0
    if (n_object_streams == 0) {
2215
0
        obj.streams_empty = true;
2216
0
        return;
2217
0
    }
2218
0
    size_t n_per = eligible.size() / n_object_streams;
2219
0
    if (n_per * n_object_streams < eligible.size()) {
2220
0
        ++n_per;
2221
0
    }
2222
0
    unsigned int n = 0;
2223
0
    int cur_ostream = pdf.newIndirectNull().getObjectID();
2224
0
    for (auto const& item: eligible) {
2225
0
        if (n == n_per) {
2226
0
            n = 0;
2227
            // Construct a new null object as the "original" object stream.  The rest of the code
2228
            // knows that this means we're creating the object stream from scratch.
2229
0
            cur_ostream = pdf.newIndirectNull().getObjectID();
2230
0
        }
2231
0
        auto& o = obj[item];
2232
0
        o.object_stream = cur_ostream;
2233
0
        o.gen = item.getGen();
2234
0
        ++n;
2235
0
    }
2236
0
}
2237
2238
Dictionary
2239
QPDFWriter::Members::trimmed_trailer()
2240
30.4k
{
2241
    // Remove keys from the trailer that necessarily have to be replaced when writing the file.
2242
2243
30.4k
    Dictionary trailer = pdf.getTrailer().unsafeShallowCopy();
2244
2245
    // Remove encryption keys
2246
30.4k
    trailer.erase("/ID");
2247
30.4k
    trailer.erase("/Encrypt");
2248
2249
    // Remove modification information
2250
30.4k
    trailer.erase("/Prev");
2251
2252
    // Remove all trailer keys that potentially come from a cross-reference stream
2253
30.4k
    trailer.erase("/Index");
2254
30.4k
    trailer.erase("/W");
2255
30.4k
    trailer.erase("/Length");
2256
30.4k
    trailer.erase("/Filter");
2257
30.4k
    trailer.erase("/DecodeParms");
2258
30.4k
    trailer.erase("/Type");
2259
30.4k
    trailer.erase("/XRefStm");
2260
2261
30.4k
    return trailer;
2262
30.4k
}
2263
2264
// Make document extension level information direct as required by the spec.
2265
void
2266
QPDFWriter::Members::prepareFileForWrite()
2267
9.09k
{
2268
9.09k
    pdf.fixDanglingReferences();
2269
9.09k
    auto root = pdf.getRoot();
2270
9.09k
    auto oh = root.getKey("/Extensions");
2271
9.09k
    if (oh.isDictionary()) {
2272
389
        const bool extensions_indirect = oh.isIndirect();
2273
389
        if (extensions_indirect) {
2274
118
            QTC::TC("qpdf", "QPDFWriter make Extensions direct");
2275
118
            oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy());
2276
118
        }
2277
389
        if (oh.hasKey("/ADBE")) {
2278
237
            auto adbe = oh.getKey("/ADBE");
2279
237
            if (adbe.isIndirect()) {
2280
161
                QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1);
2281
161
                adbe.makeDirect();
2282
161
                oh.replaceKey("/ADBE", adbe);
2283
161
            }
2284
237
        }
2285
389
    }
2286
9.09k
}
2287
2288
void
2289
QPDFWriter::Members::initializeTables(size_t extra)
2290
9.16k
{
2291
9.16k
    auto size = QIntC::to_size(tableSize() + 100) + extra;
2292
9.16k
    obj.resize(size);
2293
9.16k
    new_obj.resize(size);
2294
9.16k
}
2295
2296
void
2297
QPDFWriter::Members::doWriteSetup()
2298
9.16k
{
2299
9.16k
    if (did_write_setup) {
2300
0
        return;
2301
0
    }
2302
9.16k
    did_write_setup = true;
2303
2304
    // Do preliminary setup
2305
2306
9.16k
    if (linearized) {
2307
9.16k
        qdf_mode = false;
2308
9.16k
    }
2309
2310
9.16k
    if (pclm) {
2311
0
        stream_decode_level = qpdf_dl_none;
2312
0
        compress_streams = false;
2313
0
        encryption = nullptr;
2314
0
    }
2315
2316
9.16k
    if (qdf_mode) {
2317
0
        if (!normalize_content_set) {
2318
0
            normalize_content = true;
2319
0
        }
2320
0
        if (!compress_streams_set) {
2321
0
            compress_streams = false;
2322
0
        }
2323
0
        if (!stream_decode_level_set) {
2324
0
            stream_decode_level = qpdf_dl_generalized;
2325
0
        }
2326
0
    }
2327
2328
9.16k
    if (encryption) {
2329
        // Encryption has been explicitly set
2330
9.16k
        preserve_encryption = false;
2331
9.16k
    } else if (normalize_content || pclm || qdf_mode) {
2332
        // Encryption makes looking at contents pretty useless.  If the user explicitly encrypted
2333
        // though, we still obey that.
2334
0
        preserve_encryption = false;
2335
0
    }
2336
2337
9.16k
    if (preserve_encryption) {
2338
0
        copyEncryptionParameters(pdf);
2339
0
    }
2340
2341
9.16k
    if (!forced_pdf_version.empty()) {
2342
0
        int major = 0;
2343
0
        int minor = 0;
2344
0
        parseVersion(forced_pdf_version, major, minor);
2345
0
        disableIncompatibleEncryption(major, minor, forced_extension_level);
2346
0
        if (compareVersions(major, minor, 1, 5) < 0) {
2347
0
            object_stream_mode = qpdf_o_disable;
2348
0
        }
2349
0
    }
2350
2351
9.16k
    if (qdf_mode || normalize_content) {
2352
0
        initializeSpecialStreams();
2353
0
    }
2354
2355
9.16k
    if (qdf_mode) {
2356
        // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing
2357
        // recomputed stream length data. Certain streams such as object streams, xref streams, and
2358
        // hint streams always get direct stream lengths.
2359
0
        direct_stream_lengths = false;
2360
0
    }
2361
2362
9.16k
    switch (object_stream_mode) {
2363
0
    case qpdf_o_disable:
2364
0
        initializeTables();
2365
0
        obj.streams_empty = true;
2366
0
        break;
2367
2368
9.16k
    case qpdf_o_preserve:
2369
9.16k
        initializeTables();
2370
9.16k
        preserveObjectStreams();
2371
9.16k
        break;
2372
2373
0
    case qpdf_o_generate:
2374
0
        generateObjectStreams();
2375
0
        break;
2376
2377
        // no default so gcc will warn for missing case tag
2378
9.16k
    }
2379
2380
9.14k
    if (!obj.streams_empty) {
2381
1.27k
        if (linearized) {
2382
            // Page dictionaries are not allowed to be compressed objects.
2383
2.00k
            for (auto& page: pdf.getAllPages()) {
2384
2.00k
                if (obj[page].object_stream > 0) {
2385
145
                    obj[page].object_stream = 0;
2386
145
                }
2387
2.00k
            }
2388
1.27k
        }
2389
2390
1.27k
        if (linearized || encryption) {
2391
            // The document catalog is not allowed to be compressed in linearized files either.  It
2392
            // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
2393
            // handle encrypted files with compressed document catalogs, so we disable them in that
2394
            // case as well.
2395
1.27k
            if (obj[root_og].object_stream > 0) {
2396
25
                obj[root_og].object_stream = 0;
2397
25
            }
2398
1.27k
        }
2399
2400
        // Generate reverse mapping from object stream to objects
2401
1.45M
        obj.forEach([this](auto id, auto const& item) -> void {
2402
1.45M
            if (item.object_stream > 0) {
2403
45.1k
                auto& vec = object_stream_to_objects[item.object_stream];
2404
45.1k
                vec.emplace_back(id, item.gen);
2405
45.1k
                if (max_ostream_index < vec.size()) {
2406
18.3k
                    ++max_ostream_index;
2407
18.3k
                }
2408
45.1k
            }
2409
1.45M
        });
2410
1.27k
        --max_ostream_index;
2411
2412
1.27k
        if (object_stream_to_objects.empty()) {
2413
384
            obj.streams_empty = true;
2414
891
        } else {
2415
891
            w.setMinimumPDFVersion("1.5");
2416
891
        }
2417
1.27k
    }
2418
2419
9.14k
    setMinimumPDFVersion(pdf.getPDFVersion(), pdf.getExtensionLevel());
2420
9.14k
    final_pdf_version = min_pdf_version;
2421
9.14k
    final_extension_level = min_extension_level;
2422
9.14k
    if (!forced_pdf_version.empty()) {
2423
0
        final_pdf_version = forced_pdf_version;
2424
0
        final_extension_level = forced_extension_level;
2425
0
    }
2426
9.14k
}
2427
2428
void
2429
QPDFWriter::write()
2430
9.16k
{
2431
9.16k
    m->write();
2432
9.16k
}
2433
2434
void
2435
QPDFWriter::Members::write()
2436
9.16k
{
2437
9.16k
    doWriteSetup();
2438
2439
    // Set up progress reporting. For linearized files, we write two passes. events_expected is an
2440
    // approximation, but it's good enough for progress reporting, which is mostly a guess anyway.
2441
9.16k
    events_expected = QIntC::to_int(pdf.getObjectCount() * (linearized ? 2 : 1));
2442
2443
9.16k
    prepareFileForWrite();
2444
2445
9.16k
    if (linearized) {
2446
9.07k
        writeLinearized();
2447
9.07k
    } else {
2448
93
        writeStandard();
2449
93
    }
2450
2451
9.16k
    pipeline->finish();
2452
9.16k
    if (close_file) {
2453
0
        fclose(file);
2454
0
    }
2455
9.16k
    file = nullptr;
2456
9.16k
    if (buffer_pipeline) {
2457
0
        output_buffer = buffer_pipeline->getBuffer();
2458
0
        buffer_pipeline = nullptr;
2459
0
    }
2460
9.16k
    indicateProgress(false, true);
2461
9.16k
}
2462
2463
QPDFObjGen
2464
QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
2465
0
{
2466
0
    return {m->obj[og].renumber, 0};
2467
0
}
2468
2469
std::map<QPDFObjGen, QPDFXRefEntry>
2470
QPDFWriter::getWrittenXRefTable()
2471
0
{
2472
0
    return m->getWrittenXRefTable();
2473
0
}
2474
2475
std::map<QPDFObjGen, QPDFXRefEntry>
2476
QPDFWriter::Members::getWrittenXRefTable()
2477
0
{
2478
0
    std::map<QPDFObjGen, QPDFXRefEntry> result;
2479
2480
0
    auto it = result.begin();
2481
0
    new_obj.forEach([&it, &result](auto id, auto const& item) -> void {
2482
0
        if (item.xref.getType() != 0) {
2483
0
            it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref);
2484
0
        }
2485
0
    });
2486
0
    return result;
2487
0
}
2488
2489
void
2490
QPDFWriter::Members::enqueuePart(std::vector<QPDFObjectHandle>& part)
2491
43.0k
{
2492
133k
    for (auto const& oh: part) {
2493
133k
        enqueueObject(oh);
2494
133k
    }
2495
43.0k
}
2496
2497
void
2498
QPDFWriter::Members::writeEncryptionDictionary()
2499
15.3k
{
2500
15.3k
    encryption_dict_objid = openObject(encryption_dict_objid);
2501
15.3k
    auto& enc = *encryption;
2502
15.3k
    auto const V = enc.getV();
2503
2504
15.3k
    write("<<");
2505
15.3k
    if (V >= 4) {
2506
15.3k
        write(" /CF << /StdCF << /AuthEvent /DocOpen /CFM ");
2507
15.3k
        write(encrypt_use_aes ? ((V < 5) ? "/AESV2" : "/AESV3") : "/V2");
2508
        // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of
2509
        // MacOS won't open encrypted files without it.
2510
15.3k
        write((V < 5) ? " /Length 16 >> >>" : " /Length 32 >> >>");
2511
15.3k
        if (!encryption->getEncryptMetadata()) {
2512
0
            write(" /EncryptMetadata false");
2513
0
        }
2514
15.3k
    }
2515
15.3k
    write(" /Filter /Standard /Length ").write(enc.getLengthBytes() * 8);
2516
15.3k
    write(" /O ").write_string(enc.getO(), true);
2517
15.3k
    if (V >= 4) {
2518
15.3k
        write(" /OE ").write_string(enc.getOE(), true);
2519
15.3k
    }
2520
15.3k
    write(" /P ").write(enc.getP());
2521
15.3k
    if (V >= 5) {
2522
15.3k
        write(" /Perms ").write_string(enc.getPerms(), true);
2523
15.3k
    }
2524
15.3k
    write(" /R ").write(enc.getR());
2525
2526
15.3k
    if (V >= 4) {
2527
15.3k
        write(" /StmF /StdCF /StrF /StdCF");
2528
15.3k
    }
2529
15.3k
    write(" /U ").write_string(enc.getU(), true);
2530
15.3k
    if (V >= 4) {
2531
15.3k
        write(" /UE ").write_string(enc.getUE(), true);
2532
15.3k
    }
2533
15.3k
    write(" /V ").write(enc.getV()).write(" >>");
2534
15.3k
    closeObject(encryption_dict_objid);
2535
15.3k
}
2536
2537
std::string
2538
QPDFWriter::getFinalVersion()
2539
0
{
2540
0
    m->doWriteSetup();
2541
0
    return m->final_pdf_version;
2542
0
}
2543
2544
void
2545
QPDFWriter::Members::writeHeader()
2546
15.4k
{
2547
15.4k
    write("%PDF-").write(final_pdf_version);
2548
15.4k
    if (pclm) {
2549
        // PCLm version
2550
0
        write("\n%PCLm 1.0\n");
2551
15.4k
    } else {
2552
        // This string of binary characters would not be valid UTF-8, so it really should be treated
2553
        // as binary.
2554
15.4k
        write("\n%\xbf\xf7\xa2\xfe\n");
2555
15.4k
    }
2556
15.4k
    write_qdf("%QDF-1.0\n\n");
2557
2558
    // Note: do not write extra header text here.  Linearized PDFs must include the entire
2559
    // linearization parameter dictionary within the first 1024 characters of the PDF file, so for
2560
    // linearized files, we have to write extra header text after the linearization parameter
2561
    // dictionary.
2562
15.4k
}
2563
2564
void
2565
QPDFWriter::Members::writeHintStream(int hint_id)
2566
7.50k
{
2567
7.50k
    std::string hint_buffer;
2568
7.50k
    int S = 0;
2569
7.50k
    int O = 0;
2570
7.50k
    bool compressed = compress_streams;
2571
7.50k
    generateHintStream(new_obj, obj, hint_buffer, S, O, compressed);
2572
2573
7.50k
    openObject(hint_id);
2574
7.50k
    setDataKey(hint_id);
2575
2576
7.50k
    size_t hlen = hint_buffer.size();
2577
2578
7.50k
    write("<< ");
2579
7.50k
    if (compressed) {
2580
7.50k
        write("/Filter /FlateDecode ");
2581
7.50k
    }
2582
7.50k
    write("/S ").write(S);
2583
7.50k
    if (O) {
2584
249
        write(" /O ").write(O);
2585
249
    }
2586
7.50k
    adjustAESStreamLength(hlen);
2587
7.50k
    write(" /Length ").write(hlen);
2588
7.50k
    write(" >>\nstream\n").write_encrypted(hint_buffer);
2589
2590
7.50k
    if (encryption) {
2591
7.50k
        QTC::TC("qpdf", "QPDFWriter encrypted hint stream");
2592
7.50k
    }
2593
2594
7.50k
    write(hint_buffer.empty() || hint_buffer.back() != '\n' ? "\nendstream" : "endstream");
2595
7.50k
    closeObject(hint_id);
2596
7.50k
}
2597
2598
qpdf_offset_t
2599
QPDFWriter::Members::writeXRefTable(trailer_e which, int first, int last, int size)
2600
0
{
2601
    // There are too many extra arguments to replace overloaded function with defaults in the header
2602
    // file...too much risk of leaving something off.
2603
0
    return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0);
2604
0
}
2605
2606
qpdf_offset_t
2607
QPDFWriter::Members::writeXRefTable(
2608
    trailer_e which,
2609
    int first,
2610
    int last,
2611
    int size,
2612
    qpdf_offset_t prev,
2613
    bool suppress_offsets,
2614
    int hint_id,
2615
    qpdf_offset_t hint_offset,
2616
    qpdf_offset_t hint_length,
2617
    int linearization_pass)
2618
27.9k
{
2619
27.9k
    write("xref\n").write(first).write(" ").write(last - first + 1);
2620
27.9k
    qpdf_offset_t space_before_zero = pipeline->getCount();
2621
27.9k
    write("\n");
2622
27.9k
    if (first == 0) {
2623
13.8k
        write("0000000000 65535 f \n");
2624
13.8k
        ++first;
2625
13.8k
    }
2626
240k
    for (int i = first; i <= last; ++i) {
2627
212k
        qpdf_offset_t offset = 0;
2628
212k
        if (!suppress_offsets) {
2629
139k
            offset = new_obj[i].xref.getOffset();
2630
139k
            if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2631
37.4k
                offset += hint_length;
2632
37.4k
            }
2633
139k
        }
2634
212k
        write(QUtil::int_to_string(offset, 10)).write(" 00000 n \n");
2635
212k
    }
2636
27.9k
    writeTrailer(which, size, false, prev, linearization_pass);
2637
27.9k
    write("\n");
2638
27.9k
    return space_before_zero;
2639
27.9k
}
2640
2641
qpdf_offset_t
2642
QPDFWriter::Members::writeXRefStream(
2643
    int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size)
2644
0
{
2645
    // There are too many extra arguments to replace overloaded function with defaults in the header
2646
    // file...too much risk of leaving something off.
2647
0
    return writeXRefStream(
2648
0
        objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0);
2649
0
}
2650
2651
qpdf_offset_t
2652
QPDFWriter::Members::writeXRefStream(
2653
    int xref_id,
2654
    int max_id,
2655
    qpdf_offset_t max_offset,
2656
    trailer_e which,
2657
    int first,
2658
    int last,
2659
    int size,
2660
    qpdf_offset_t prev,
2661
    int hint_id,
2662
    qpdf_offset_t hint_offset,
2663
    qpdf_offset_t hint_length,
2664
    bool skip_compression,
2665
    int linearization_pass)
2666
2.49k
{
2667
2.49k
    qpdf_offset_t xref_offset = pipeline->getCount();
2668
2.49k
    qpdf_offset_t space_before_zero = xref_offset - 1;
2669
2670
    // field 1 contains offsets and object stream identifiers
2671
2.49k
    unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id));
2672
2673
    // field 2 contains object stream indices
2674
2.49k
    unsigned int f2_size = bytesNeeded(QIntC::to_longlong(max_ostream_index));
2675
2676
2.49k
    unsigned int esize = 1 + f1_size + f2_size;
2677
2678
    // Must store in xref table in advance of writing the actual data rather than waiting for
2679
    // openObject to do it.
2680
2.49k
    new_obj[xref_id].xref = QPDFXRefEntry(pipeline->getCount());
2681
2682
2.49k
    std::string xref_data;
2683
2.49k
    const bool compressed = compress_streams && !qdf_mode;
2684
2.49k
    {
2685
2.49k
        auto pp_xref = pipeline_stack.activate(xref_data);
2686
2687
141k
        for (int i = first; i <= last; ++i) {
2688
138k
            QPDFXRefEntry& e = new_obj[i].xref;
2689
138k
            switch (e.getType()) {
2690
30.5k
            case 0:
2691
30.5k
                writeBinary(0, 1);
2692
30.5k
                writeBinary(0, f1_size);
2693
30.5k
                writeBinary(0, f2_size);
2694
30.5k
                break;
2695
2696
48.4k
            case 1:
2697
48.4k
                {
2698
48.4k
                    qpdf_offset_t offset = e.getOffset();
2699
48.4k
                    if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2700
11.5k
                        offset += hint_length;
2701
11.5k
                    }
2702
48.4k
                    writeBinary(1, 1);
2703
48.4k
                    writeBinary(QIntC::to_ulonglong(offset), f1_size);
2704
48.4k
                    writeBinary(0, f2_size);
2705
48.4k
                }
2706
48.4k
                break;
2707
2708
59.5k
            case 2:
2709
59.5k
                writeBinary(2, 1);
2710
59.5k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size);
2711
59.5k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size);
2712
59.5k
                break;
2713
2714
0
            default:
2715
0
                throw std::logic_error("invalid type writing xref stream");
2716
0
                break;
2717
138k
            }
2718
138k
        }
2719
2.49k
    }
2720
2721
2.49k
    if (compressed) {
2722
2.49k
        xref_data = pl::pipe<Pl_PNGFilter>(xref_data, Pl_PNGFilter::a_encode, esize);
2723
2.49k
        if (!skip_compression) {
2724
            // Write the stream dictionary for compression but don't actually compress.  This
2725
            // helps us with computation of padding for pass 1 of linearization.
2726
1.16k
            xref_data = pl::pipe<Pl_Flate>(xref_data, Pl_Flate::a_deflate);
2727
1.16k
        }
2728
2.49k
    }
2729
2730
2.49k
    openObject(xref_id);
2731
2.49k
    write("<<").write_qdf("\n ").write(" /Type /XRef").write_qdf("\n ");
2732
2.49k
    write(" /Length ").write(xref_data.size());
2733
2.49k
    if (compressed) {
2734
2.49k
        write_qdf("\n ").write(" /Filter /FlateDecode").write_qdf("\n ");
2735
2.49k
        write(" /DecodeParms << /Columns ").write(esize).write(" /Predictor 12 >>");
2736
2.49k
    }
2737
2.49k
    write_qdf("\n ").write(" /W [ 1 ").write(f1_size).write(" ").write(f2_size).write(" ]");
2738
2.49k
    if (!(first == 0 && last == (size - 1))) {
2739
1.32k
        write(" /Index [ ").write(first).write(" ").write(last - first + 1).write(" ]");
2740
1.32k
    }
2741
2.49k
    writeTrailer(which, size, true, prev, linearization_pass);
2742
2.49k
    write("\nstream\n").write(xref_data).write("\nendstream");
2743
2.49k
    closeObject(xref_id);
2744
2.49k
    return space_before_zero;
2745
2.49k
}
2746
2747
size_t
2748
QPDFWriter::Members::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
2749
1.32k
{
2750
    // This routine is called right after a linearization first pass xref stream has been written
2751
    // without compression.  Calculate the amount of padding that would be required in the worst
2752
    // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is
2753
    // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add
2754
    // 10 extra bytes for number length increases.
2755
2756
1.32k
    return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384)));
2757
1.32k
}
2758
2759
void
2760
QPDFWriter::Members::writeLinearized()
2761
9.07k
{
2762
    // Optimize file and enqueue objects in order
2763
2764
9.07k
    std::map<int, int> stream_cache;
2765
2766
45.5k
    auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) {
2767
45.5k
        if (auto& result = stream_cache[stream.getObjectID()]) {
2768
24.2k
            return result;
2769
24.2k
        } else {
2770
21.2k
            return result = will_filter_stream(stream) ? 2 : 1;
2771
21.2k
        }
2772
45.5k
    };
2773
2774
9.07k
    optimize(obj, skip_stream_parameters);
2775
2776
9.07k
    std::vector<QPDFObjectHandle> part4;
2777
9.07k
    std::vector<QPDFObjectHandle> part6;
2778
9.07k
    std::vector<QPDFObjectHandle> part7;
2779
9.07k
    std::vector<QPDFObjectHandle> part8;
2780
9.07k
    std::vector<QPDFObjectHandle> part9;
2781
9.07k
    getLinearizedParts(obj, part4, part6, part7, part8, part9);
2782
2783
    // Object number sequence:
2784
    //
2785
    //  second half
2786
    //    second half uncompressed objects
2787
    //    second half xref stream, if any
2788
    //    second half compressed objects
2789
    //  first half
2790
    //    linearization dictionary
2791
    //    first half xref stream, if any
2792
    //    part 4 uncompresesd objects
2793
    //    encryption dictionary, if any
2794
    //    hint stream
2795
    //    part 6 uncompressed objects
2796
    //    first half compressed objects
2797
    //
2798
2799
    // Second half objects
2800
9.07k
    int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size());
2801
9.07k
    int second_half_first_obj = 1;
2802
9.07k
    int after_second_half = 1 + second_half_uncompressed;
2803
9.07k
    next_objid = after_second_half;
2804
9.07k
    int second_half_xref = 0;
2805
9.07k
    bool need_xref_stream = !obj.streams_empty;
2806
9.07k
    if (need_xref_stream) {
2807
843
        second_half_xref = next_objid++;
2808
843
    }
2809
    // Assign numbers to all compressed objects in the second half.
2810
9.07k
    std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9};
2811
35.2k
    for (int i = 0; i < 3; ++i) {
2812
56.7k
        for (auto const& oh: *vecs2[i]) {
2813
56.7k
            assignCompressedObjectNumbers(oh.getObjGen());
2814
56.7k
        }
2815
26.1k
    }
2816
9.07k
    int second_half_end = next_objid - 1;
2817
9.07k
    int second_trailer_size = next_objid;
2818
2819
    // First half objects
2820
9.07k
    int first_half_start = next_objid;
2821
9.07k
    int lindict_id = next_objid++;
2822
9.07k
    int first_half_xref = 0;
2823
9.07k
    if (need_xref_stream) {
2824
843
        first_half_xref = next_objid++;
2825
843
    }
2826
9.07k
    int part4_first_obj = next_objid;
2827
9.07k
    next_objid += QIntC::to_int(part4.size());
2828
9.07k
    int after_part4 = next_objid;
2829
9.07k
    if (encryption) {
2830
8.71k
        encryption_dict_objid = next_objid++;
2831
8.71k
    }
2832
9.07k
    int hint_id = next_objid++;
2833
9.07k
    int part6_first_obj = next_objid;
2834
9.07k
    next_objid += QIntC::to_int(part6.size());
2835
9.07k
    int after_part6 = next_objid;
2836
    // Assign numbers to all compressed objects in the first half
2837
9.07k
    std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6};
2838
26.4k
    for (int i = 0; i < 2; ++i) {
2839
76.9k
        for (auto const& oh: *vecs1[i]) {
2840
76.9k
            assignCompressedObjectNumbers(oh.getObjGen());
2841
76.9k
        }
2842
17.4k
    }
2843
9.07k
    int first_half_end = next_objid - 1;
2844
9.07k
    int first_trailer_size = next_objid;
2845
2846
9.07k
    int part4_end_marker = part4.back().getObjectID();
2847
9.07k
    int part6_end_marker = part6.back().getObjectID();
2848
9.07k
    qpdf_offset_t space_before_zero = 0;
2849
9.07k
    qpdf_offset_t file_size = 0;
2850
9.07k
    qpdf_offset_t part6_end_offset = 0;
2851
9.07k
    qpdf_offset_t first_half_max_obj_offset = 0;
2852
9.07k
    qpdf_offset_t second_xref_offset = 0;
2853
9.07k
    qpdf_offset_t first_xref_end = 0;
2854
9.07k
    qpdf_offset_t second_xref_end = 0;
2855
2856
9.07k
    next_objid = part4_first_obj;
2857
9.07k
    enqueuePart(part4);
2858
9.07k
    if (next_objid != after_part4) {
2859
        // This can happen with very botched files as in the fuzzer test. There are likely some
2860
        // faulty assumptions in calculateLinearizationData
2861
3
        throw std::runtime_error("error encountered after writing part 4 of linearized data");
2862
3
    }
2863
9.06k
    next_objid = part6_first_obj;
2864
9.06k
    enqueuePart(part6);
2865
9.06k
    if (next_objid != after_part6) {
2866
96
        throw std::runtime_error("error encountered after writing part 6 of linearized data");
2867
96
    }
2868
8.97k
    next_objid = second_half_first_obj;
2869
8.97k
    enqueuePart(part7);
2870
8.97k
    enqueuePart(part8);
2871
8.97k
    enqueuePart(part9);
2872
8.97k
    if (next_objid != after_second_half) {
2873
653
        throw std::runtime_error("error encountered after writing part 9 of linearized data");
2874
653
    }
2875
2876
8.32k
    qpdf_offset_t hint_length = 0;
2877
8.32k
    std::string hint_buffer;
2878
2879
    // Write file in two passes.  Part numbers refer to PDF spec 1.4.
2880
2881
8.32k
    FILE* lin_pass1_file = nullptr;
2882
8.32k
    auto pp_pass1 = pipeline_stack.popper();
2883
8.32k
    auto pp_md5 = pipeline_stack.popper();
2884
15.4k
    for (int pass: {1, 2}) {
2885
15.4k
        if (pass == 1) {
2886
7.91k
            if (!lin_pass1_filename.empty()) {
2887
0
                lin_pass1_file = QUtil::safe_fopen(lin_pass1_filename.c_str(), "wb");
2888
0
                pipeline_stack.activate(
2889
0
                    pp_pass1,
2890
0
                    std::make_unique<Pl_StdioFile>("linearization pass1", lin_pass1_file));
2891
7.91k
            } else {
2892
7.91k
                pipeline_stack.activate(pp_pass1, true);
2893
7.91k
            }
2894
7.91k
            if (deterministic_id) {
2895
0
                pipeline_stack.activate_md5(pp_md5);
2896
0
            }
2897
7.91k
        }
2898
2899
        // Part 1: header
2900
2901
15.4k
        writeHeader();
2902
2903
        // Part 2: linearization parameter dictionary.  Save enough space to write real dictionary.
2904
        // 200 characters is enough space if all numerical values in the parameter dictionary that
2905
        // contain offsets are 20 digits long plus a few extra characters for safety.  The entire
2906
        // linearization parameter dictionary must appear within the first 1024 characters of the
2907
        // file.
2908
2909
15.4k
        qpdf_offset_t pos = pipeline->getCount();
2910
15.4k
        openObject(lindict_id);
2911
15.4k
        write("<<");
2912
15.4k
        if (pass == 2) {
2913
7.50k
            std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
2914
7.50k
            int first_page_object = obj[pages.at(0)].renumber;
2915
2916
7.50k
            write(" /Linearized 1 /L ").write(file_size + hint_length);
2917
            // Implementation note 121 states that a space is mandatory after this open bracket.
2918
7.50k
            write(" /H [ ").write(new_obj[hint_id].xref.getOffset()).write(" ");
2919
7.50k
            write(hint_length);
2920
7.50k
            write(" ] /O ").write(first_page_object);
2921
7.50k
            write(" /E ").write(part6_end_offset + hint_length);
2922
7.50k
            write(" /N ").write(pages.size());
2923
7.50k
            write(" /T ").write(space_before_zero + hint_length);
2924
7.50k
        }
2925
15.4k
        write(" >>");
2926
15.4k
        closeObject(lindict_id);
2927
15.4k
        static int const pad = 200;
2928
15.4k
        write(QIntC::to_size(pos - pipeline->getCount() + pad), ' ').write("\n");
2929
2930
        // If the user supplied any additional header text, write it here after the linearization
2931
        // parameter dictionary.
2932
15.4k
        write(extra_header_text);
2933
2934
        // Part 3: first page cross reference table and trailer.
2935
2936
15.4k
        qpdf_offset_t first_xref_offset = pipeline->getCount();
2937
15.4k
        qpdf_offset_t hint_offset = 0;
2938
15.4k
        if (pass == 2) {
2939
7.50k
            hint_offset = new_obj[hint_id].xref.getOffset();
2940
7.50k
        }
2941
15.4k
        if (need_xref_stream) {
2942
            // Must pad here too.
2943
1.32k
            if (pass == 1) {
2944
                // Set first_half_max_obj_offset to a value large enough to force four bytes to be
2945
                // reserved for each file offset.  This would provide adequate space for the xref
2946
                // stream as long as the last object in page 1 starts with in the first 4 GB of the
2947
                // file, which is extremely likely.  In the second pass, we will know the actual
2948
                // value for this, but it's okay if it's smaller.
2949
743
                first_half_max_obj_offset = 1 << 25;
2950
743
            }
2951
1.32k
            pos = pipeline->getCount();
2952
1.32k
            writeXRefStream(
2953
1.32k
                first_half_xref,
2954
1.32k
                first_half_end,
2955
1.32k
                first_half_max_obj_offset,
2956
1.32k
                t_lin_first,
2957
1.32k
                first_half_start,
2958
1.32k
                first_half_end,
2959
1.32k
                first_trailer_size,
2960
1.32k
                hint_length + second_xref_offset,
2961
1.32k
                hint_id,
2962
1.32k
                hint_offset,
2963
1.32k
                hint_length,
2964
1.32k
                (pass == 1),
2965
1.32k
                pass);
2966
1.32k
            qpdf_offset_t endpos = pipeline->getCount();
2967
1.32k
            if (pass == 1) {
2968
                // Pad so we have enough room for the real xref stream.
2969
742
                write(calculateXrefStreamPadding(endpos - pos), ' ');
2970
742
                first_xref_end = pipeline->getCount();
2971
742
            } else {
2972
                // Pad so that the next object starts at the same place as in pass 1.
2973
586
                write(QIntC::to_size(first_xref_end - endpos), ' ');
2974
2975
586
                if (pipeline->getCount() != first_xref_end) {
2976
0
                    throw std::logic_error(
2977
0
                        "insufficient padding for first pass xref stream; first_xref_end=" +
2978
0
                        std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos));
2979
0
                }
2980
586
            }
2981
1.32k
            write("\n");
2982
14.0k
        } else {
2983
14.0k
            writeXRefTable(
2984
14.0k
                t_lin_first,
2985
14.0k
                first_half_start,
2986
14.0k
                first_half_end,
2987
14.0k
                first_trailer_size,
2988
14.0k
                hint_length + second_xref_offset,
2989
14.0k
                (pass == 1),
2990
14.0k
                hint_id,
2991
14.0k
                hint_offset,
2992
14.0k
                hint_length,
2993
14.0k
                pass);
2994
14.0k
            write("startxref\n0\n%%EOF\n");
2995
14.0k
        }
2996
2997
        // Parts 4 through 9
2998
2999
230k
        for (auto const& cur_object: object_queue) {
3000
230k
            if (cur_object.getObjectID() == part6_end_marker) {
3001
15.2k
                first_half_max_obj_offset = pipeline->getCount();
3002
15.2k
            }
3003
230k
            writeObject(cur_object);
3004
230k
            if (cur_object.getObjectID() == part4_end_marker) {
3005
15.3k
                if (encryption) {
3006
15.3k
                    writeEncryptionDictionary();
3007
15.3k
                }
3008
15.3k
                if (pass == 1) {
3009
7.85k
                    new_obj[hint_id].xref = QPDFXRefEntry(pipeline->getCount());
3010
7.85k
                } else {
3011
                    // Part 5: hint stream
3012
7.50k
                    write(hint_buffer);
3013
7.50k
                }
3014
15.3k
            }
3015
230k
            if (cur_object.getObjectID() == part6_end_marker) {
3016
15.1k
                part6_end_offset = pipeline->getCount();
3017
15.1k
            }
3018
230k
        }
3019
3020
        // Part 10: overflow hint stream -- not used
3021
3022
        // Part 11: main cross reference table and trailer
3023
3024
15.4k
        second_xref_offset = pipeline->getCount();
3025
15.4k
        if (need_xref_stream) {
3026
1.16k
            pos = pipeline->getCount();
3027
1.16k
            space_before_zero = writeXRefStream(
3028
1.16k
                second_half_xref,
3029
1.16k
                second_half_end,
3030
1.16k
                second_xref_offset,
3031
1.16k
                t_lin_second,
3032
1.16k
                0,
3033
1.16k
                second_half_end,
3034
1.16k
                second_trailer_size,
3035
1.16k
                0,
3036
1.16k
                0,
3037
1.16k
                0,
3038
1.16k
                0,
3039
1.16k
                (pass == 1),
3040
1.16k
                pass);
3041
1.16k
            qpdf_offset_t endpos = pipeline->getCount();
3042
3043
1.16k
            if (pass == 1) {
3044
                // Pad so we have enough room for the real xref stream.  See comments for previous
3045
                // xref stream on how we calculate the padding.
3046
585
                write(calculateXrefStreamPadding(endpos - pos), ' ').write("\n");
3047
585
                second_xref_end = pipeline->getCount();
3048
585
            } else {
3049
                // Make the file size the same.
3050
584
                auto padding =
3051
584
                    QIntC::to_size(second_xref_end + hint_length - 1 - pipeline->getCount());
3052
584
                write(padding, ' ').write("\n");
3053
3054
                // If this assertion fails, maybe we didn't have enough padding above.
3055
584
                if (pipeline->getCount() != second_xref_end + hint_length) {
3056
0
                    throw std::logic_error(
3057
0
                        "count mismatch after xref stream; possible insufficient padding?");
3058
0
                }
3059
584
            }
3060
14.2k
        } else {
3061
14.2k
            space_before_zero = writeXRefTable(
3062
14.2k
                t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass);
3063
14.2k
        }
3064
15.4k
        write("startxref\n").write(first_xref_offset).write("\n%%EOF\n");
3065
3066
15.4k
        if (pass == 1) {
3067
7.50k
            if (deterministic_id) {
3068
0
                QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1);
3069
0
                computeDeterministicIDData();
3070
0
                pp_md5.pop();
3071
0
            }
3072
3073
            // Close first pass pipeline
3074
7.50k
            file_size = pipeline->getCount();
3075
7.50k
            pp_pass1.pop();
3076
3077
            // Save hint offset since it will be set to zero by calling openObject.
3078
7.50k
            qpdf_offset_t hint_offset1 = new_obj[hint_id].xref.getOffset();
3079
3080
            // Write hint stream to a buffer
3081
7.50k
            {
3082
7.50k
                auto pp_hint = pipeline_stack.activate(hint_buffer);
3083
7.50k
                writeHintStream(hint_id);
3084
7.50k
            }
3085
7.50k
            hint_length = QIntC::to_offset(hint_buffer.size());
3086
3087
            // Restore hint offset
3088
7.50k
            new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1);
3089
7.50k
            if (lin_pass1_file) {
3090
                // Write some debugging information
3091
0
                fprintf(
3092
0
                    lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str());
3093
0
                fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str());
3094
0
                fprintf(
3095
0
                    lin_pass1_file,
3096
0
                    "%% second_xref_offset=%s\n",
3097
0
                    std::to_string(second_xref_offset).c_str());
3098
0
                fprintf(
3099
0
                    lin_pass1_file,
3100
0
                    "%% second_xref_end=%s\n",
3101
0
                    std::to_string(second_xref_end).c_str());
3102
0
                fclose(lin_pass1_file);
3103
0
                lin_pass1_file = nullptr;
3104
0
            }
3105
7.50k
        }
3106
15.4k
    }
3107
8.32k
}
3108
3109
void
3110
QPDFWriter::Members::enqueueObjectsStandard()
3111
0
{
3112
0
    if (preserve_unreferenced_objects) {
3113
0
        for (auto const& oh: pdf.getAllObjects()) {
3114
0
            enqueueObject(oh);
3115
0
        }
3116
0
    }
3117
3118
    // Put root first on queue.
3119
0
    auto trailer = trimmed_trailer();
3120
0
    enqueueObject(trailer["/Root"]);
3121
3122
    // Next place any other objects referenced from the trailer dictionary into the queue, handling
3123
    // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op.
3124
0
    for (auto& item: trailer) {
3125
0
        if (!item.second.null()) {
3126
0
            enqueueObject(item.second);
3127
0
        }
3128
0
    }
3129
0
}
3130
3131
void
3132
QPDFWriter::Members::enqueueObjectsPCLm()
3133
0
{
3134
    // Image transform stream content for page strip images. Each of this new stream has to come
3135
    // after every page image strip written in the pclm file.
3136
0
    std::string image_transform_content = "q /image Do Q\n";
3137
3138
    // enqueue all pages first
3139
0
    std::vector<QPDFObjectHandle> all = pdf.getAllPages();
3140
0
    for (auto& page: all) {
3141
        // enqueue page
3142
0
        enqueueObject(page);
3143
3144
        // enqueue page contents stream
3145
0
        enqueueObject(page.getKey("/Contents"));
3146
3147
        // enqueue all the strips for each page
3148
0
        QPDFObjectHandle strips = page.getKey("/Resources").getKey("/XObject");
3149
0
        for (auto& image: strips.as_dictionary()) {
3150
0
            if (!image.second.null()) {
3151
0
                enqueueObject(image.second);
3152
0
                enqueueObject(QPDFObjectHandle::newStream(&pdf, image_transform_content));
3153
0
            }
3154
0
        }
3155
0
    }
3156
3157
0
    enqueueObject(trimmed_trailer()["/Root"]);
3158
0
}
3159
3160
void
3161
QPDFWriter::Members::indicateProgress(bool decrement, bool finished)
3162
371k
{
3163
371k
    if (decrement) {
3164
69.0k
        --events_seen;
3165
69.0k
        return;
3166
69.0k
    }
3167
3168
302k
    ++events_seen;
3169
3170
302k
    if (!progress_reporter.get()) {
3171
302k
        return;
3172
302k
    }
3173
3174
0
    if (finished || events_seen >= next_progress_report) {
3175
0
        int percentage =
3176
0
            (finished ? 100
3177
0
                 : next_progress_report == 0
3178
0
                 ? 0
3179
0
                 : std::min(99, 1 + ((100 * events_seen) / events_expected)));
3180
0
        progress_reporter->reportProgress(percentage);
3181
0
    }
3182
0
    int increment = std::max(1, (events_expected / 100));
3183
0
    while (events_seen >= next_progress_report) {
3184
0
        next_progress_report += increment;
3185
0
    }
3186
0
}
3187
3188
void
3189
QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr)
3190
0
{
3191
0
    m->progress_reporter = pr;
3192
0
}
3193
3194
void
3195
QPDFWriter::Members::writeStandard()
3196
0
{
3197
0
    auto pp_md5 = pipeline_stack.popper();
3198
0
    if (deterministic_id) {
3199
0
        pipeline_stack.activate_md5(pp_md5);
3200
0
    }
3201
3202
    // Start writing
3203
3204
0
    writeHeader();
3205
0
    write(extra_header_text);
3206
3207
0
    if (pclm) {
3208
0
        enqueueObjectsPCLm();
3209
0
    } else {
3210
0
        enqueueObjectsStandard();
3211
0
    }
3212
3213
    // Now start walking queue, outputting each object.
3214
0
    while (object_queue_front < object_queue.size()) {
3215
0
        QPDFObjectHandle cur_object = object_queue.at(object_queue_front);
3216
0
        ++object_queue_front;
3217
0
        writeObject(cur_object);
3218
0
    }
3219
3220
    // Write out the encryption dictionary, if any
3221
0
    if (encryption) {
3222
0
        writeEncryptionDictionary();
3223
0
    }
3224
3225
    // Now write out xref.  next_objid is now the number of objects.
3226
0
    qpdf_offset_t xref_offset = pipeline->getCount();
3227
0
    if (object_stream_to_objects.empty()) {
3228
        // Write regular cross-reference table
3229
0
        writeXRefTable(t_normal, 0, next_objid - 1, next_objid);
3230
0
    } else {
3231
        // Write cross-reference stream.
3232
0
        int xref_id = next_objid++;
3233
0
        writeXRefStream(xref_id, xref_id, xref_offset, t_normal, 0, next_objid - 1, next_objid);
3234
0
    }
3235
0
    write("startxref\n").write(xref_offset).write("\n%%EOF\n");
3236
3237
0
    if (deterministic_id) {
3238
0
        QTC::TC(
3239
0
            "qpdf",
3240
0
            "QPDFWriter standard deterministic ID",
3241
0
            object_stream_to_objects.empty() ? 0 : 1);
3242
0
    }
3243
0
}