Coverage Report

Created: 2025-07-12 06:29

/src/qpdf/libqpdf/QPDFWriter.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/assert_debug.h>
2
3
#include <qpdf/qpdf-config.h> // include early for large file support
4
5
#include <qpdf/QPDFWriter_private.hh>
6
7
#include <qpdf/MD5.hh>
8
#include <qpdf/Pl_AES_PDF.hh>
9
#include <qpdf/Pl_Flate.hh>
10
#include <qpdf/Pl_MD5.hh>
11
#include <qpdf/Pl_PNGFilter.hh>
12
#include <qpdf/Pl_RC4.hh>
13
#include <qpdf/Pl_StdioFile.hh>
14
#include <qpdf/Pl_String.hh>
15
#include <qpdf/QIntC.hh>
16
#include <qpdf/QPDFObjectHandle_private.hh>
17
#include <qpdf/QPDFObject_private.hh>
18
#include <qpdf/QPDF_private.hh>
19
#include <qpdf/QTC.hh>
20
#include <qpdf/QUtil.hh>
21
#include <qpdf/RC4.hh>
22
#include <qpdf/Util.hh>
23
24
#include <algorithm>
25
#include <cstdlib>
26
#include <stdexcept>
27
28
using namespace std::literals;
29
using namespace qpdf;
30
31
QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default)
32
0
{
33
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
34
0
}
35
36
QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) :
37
0
    handler(handler)
38
0
{
39
0
}
40
41
QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT
42
                                                                  // (modernize-use-equals-default)
43
0
{
44
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
45
0
}
46
47
void
48
QPDFWriter::FunctionProgressReporter::reportProgress(int progress)
49
0
{
50
0
    this->handler(progress);
51
0
}
52
53
class QPDFWriter::Members
54
{
55
    friend class QPDFWriter;
56
57
  public:
58
    ~Members();
59
60
  private:
61
    Members(QPDF& pdf);
62
    Members(Members const&) = delete;
63
64
    QPDF& pdf;
65
    QPDFObjGen root_og{-1, 0};
66
    char const* filename{"unspecified"};
67
    FILE* file{nullptr};
68
    bool close_file{false};
69
    Pl_Buffer* buffer_pipeline{nullptr};
70
    Buffer* output_buffer{nullptr};
71
    bool normalize_content_set{false};
72
    bool normalize_content{false};
73
    bool compress_streams{true};
74
    bool compress_streams_set{false};
75
    qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_generalized};
76
    bool stream_decode_level_set{false};
77
    bool recompress_flate{false};
78
    bool qdf_mode{false};
79
    bool preserve_unreferenced_objects{false};
80
    bool newline_before_endstream{false};
81
    bool static_id{false};
82
    bool suppress_original_object_ids{false};
83
    bool direct_stream_lengths{true};
84
    bool preserve_encryption{true};
85
    bool linearized{false};
86
    bool pclm{false};
87
    qpdf_object_stream_e object_stream_mode{qpdf_o_preserve};
88
89
    std::unique_ptr<QPDF::EncryptionData> encryption;
90
    std::string encryption_key;
91
    bool encrypt_use_aes{false};
92
93
    std::string id1; // for /ID key of
94
    std::string id2; // trailer dictionary
95
    std::string final_pdf_version;
96
    int final_extension_level{0};
97
    std::string min_pdf_version;
98
    int min_extension_level{0};
99
    std::string forced_pdf_version;
100
    int forced_extension_level{0};
101
    std::string extra_header_text;
102
    int encryption_dict_objid{0};
103
    std::string cur_data_key;
104
    std::list<std::shared_ptr<Pipeline>> to_delete;
105
    qpdf::pl::Count* pipeline{nullptr};
106
    std::vector<QPDFObjectHandle> object_queue;
107
    size_t object_queue_front{0};
108
    QPDFWriter::ObjTable obj;
109
    QPDFWriter::NewObjTable new_obj;
110
    int next_objid{1};
111
    int cur_stream_length_id{0};
112
    size_t cur_stream_length{0};
113
    bool added_newline{false};
114
    size_t max_ostream_index{0};
115
    std::set<QPDFObjGen> normalized_streams;
116
    std::map<QPDFObjGen, int> page_object_to_seq;
117
    std::map<QPDFObjGen, int> contents_to_page_seq;
118
    std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects;
119
    std::vector<Pipeline*> pipeline_stack;
120
    unsigned long next_stack_id{2};
121
    std::string count_buffer;
122
    bool deterministic_id{false};
123
    Pl_MD5* md5_pipeline{nullptr};
124
    std::string deterministic_id_data;
125
    bool did_write_setup{false};
126
127
    // For linearization only
128
    std::string lin_pass1_filename;
129
130
    // For progress reporting
131
    std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter;
132
    int events_expected{0};
133
    int events_seen{0};
134
    int next_progress_report{0};
135
};
136
137
QPDFWriter::Members::Members(QPDF& pdf) :
138
8.31k
    pdf(pdf),
139
8.31k
    root_og(pdf.getRoot().getObjGen().isIndirect() ? pdf.getRoot().getObjGen() : QPDFObjGen(-1, 0))
140
8.31k
{
141
8.31k
}
142
143
QPDFWriter::Members::~Members()
144
8.12k
{
145
8.12k
    if (file && close_file) {
146
0
        fclose(file);
147
0
    }
148
8.12k
    delete output_buffer;
149
8.12k
}
150
151
QPDFWriter::QPDFWriter(QPDF& pdf) :
152
8.31k
    m(new Members(pdf))
153
8.31k
{
154
8.31k
}
155
156
QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) :
157
0
    m(new Members(pdf))
158
0
{
159
0
    setOutputFilename(filename);
160
0
}
161
162
QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) :
163
0
    m(new Members(pdf))
164
0
{
165
0
    setOutputFile(description, file, close_file);
166
0
}
167
168
void
169
QPDFWriter::setOutputFilename(char const* filename)
170
0
{
171
0
    char const* description = filename;
172
0
    FILE* f = nullptr;
173
0
    bool close_file = false;
174
0
    if (filename == nullptr) {
175
0
        description = "standard output";
176
0
        QTC::TC("qpdf", "QPDFWriter write to stdout");
177
0
        f = stdout;
178
0
        QUtil::binary_stdout();
179
0
    } else {
180
0
        QTC::TC("qpdf", "QPDFWriter write to file");
181
0
        f = QUtil::safe_fopen(filename, "wb+");
182
0
        close_file = true;
183
0
    }
184
0
    setOutputFile(description, f, close_file);
185
0
}
186
187
void
188
QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file)
189
0
{
190
0
    m->filename = description;
191
0
    m->file = file;
192
0
    m->close_file = close_file;
193
0
    std::shared_ptr<Pipeline> p = std::make_shared<Pl_StdioFile>("qpdf output", file);
194
0
    m->to_delete.push_back(p);
195
0
    initializePipelineStack(p.get());
196
0
}
197
198
void
199
QPDFWriter::setOutputMemory()
200
0
{
201
0
    m->filename = "memory buffer";
202
0
    m->buffer_pipeline = new Pl_Buffer("qpdf output");
203
0
    m->to_delete.push_back(std::shared_ptr<Pipeline>(m->buffer_pipeline));
204
0
    initializePipelineStack(m->buffer_pipeline);
205
0
}
206
207
Buffer*
208
QPDFWriter::getBuffer()
209
0
{
210
0
    Buffer* result = m->output_buffer;
211
0
    m->output_buffer = nullptr;
212
0
    return result;
213
0
}
214
215
std::shared_ptr<Buffer>
216
QPDFWriter::getBufferSharedPointer()
217
0
{
218
0
    return std::shared_ptr<Buffer>(getBuffer());
219
0
}
220
221
void
222
QPDFWriter::setOutputPipeline(Pipeline* p)
223
8.12k
{
224
8.12k
    m->filename = "custom pipeline";
225
8.12k
    initializePipelineStack(p);
226
8.12k
}
227
228
void
229
QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode)
230
0
{
231
0
    m->object_stream_mode = mode;
232
0
}
233
234
void
235
QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode)
236
0
{
237
0
    switch (mode) {
238
0
    case qpdf_s_uncompress:
239
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
240
0
        m->compress_streams = false;
241
0
        break;
242
243
0
    case qpdf_s_preserve:
244
0
        m->stream_decode_level = qpdf_dl_none;
245
0
        m->compress_streams = false;
246
0
        break;
247
248
0
    case qpdf_s_compress:
249
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
250
0
        m->compress_streams = true;
251
0
        break;
252
0
    }
253
0
    m->stream_decode_level_set = true;
254
0
    m->compress_streams_set = true;
255
0
}
256
257
void
258
QPDFWriter::setCompressStreams(bool val)
259
0
{
260
0
    m->compress_streams = val;
261
0
    m->compress_streams_set = true;
262
0
}
263
264
void
265
QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
266
8.12k
{
267
8.12k
    m->stream_decode_level = val;
268
8.12k
    m->stream_decode_level_set = true;
269
8.12k
}
270
271
void
272
QPDFWriter::setRecompressFlate(bool val)
273
0
{
274
0
    m->recompress_flate = val;
275
0
}
276
277
void
278
QPDFWriter::setContentNormalization(bool val)
279
0
{
280
0
    m->normalize_content_set = true;
281
0
    m->normalize_content = val;
282
0
}
283
284
void
285
QPDFWriter::setQDFMode(bool val)
286
0
{
287
0
    m->qdf_mode = val;
288
0
}
289
290
void
291
QPDFWriter::setPreserveUnreferencedObjects(bool val)
292
0
{
293
0
    m->preserve_unreferenced_objects = val;
294
0
}
295
296
void
297
QPDFWriter::setNewlineBeforeEndstream(bool val)
298
0
{
299
0
    m->newline_before_endstream = val;
300
0
}
301
302
void
303
QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level)
304
17.1k
{
305
17.1k
    bool set_version = false;
306
17.1k
    bool set_extension_level = false;
307
17.1k
    if (m->min_pdf_version.empty()) {
308
8.10k
        set_version = true;
309
8.10k
        set_extension_level = true;
310
9.07k
    } else {
311
9.07k
        int old_major = 0;
312
9.07k
        int old_minor = 0;
313
9.07k
        int min_major = 0;
314
9.07k
        int min_minor = 0;
315
9.07k
        parseVersion(version, old_major, old_minor);
316
9.07k
        parseVersion(m->min_pdf_version, min_major, min_minor);
317
9.07k
        int compare = compareVersions(old_major, old_minor, min_major, min_minor);
318
9.07k
        if (compare > 0) {
319
387
            QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1);
320
387
            set_version = true;
321
387
            set_extension_level = true;
322
8.69k
        } else if (compare == 0) {
323
1.68k
            if (extension_level > m->min_extension_level) {
324
1
                QTC::TC("qpdf", "QPDFWriter increasing extension level");
325
1
                set_extension_level = true;
326
1
            }
327
1.68k
        }
328
9.07k
    }
329
330
17.1k
    if (set_version) {
331
8.49k
        m->min_pdf_version = version;
332
8.49k
    }
333
17.1k
    if (set_extension_level) {
334
8.49k
        m->min_extension_level = extension_level;
335
8.49k
    }
336
17.1k
}
337
338
void
339
QPDFWriter::setMinimumPDFVersion(PDFVersion const& v)
340
0
{
341
0
    std::string version;
342
0
    int extension_level;
343
0
    v.getVersion(version, extension_level);
344
0
    setMinimumPDFVersion(version, extension_level);
345
0
}
346
347
void
348
QPDFWriter::forcePDFVersion(std::string const& version, int extension_level)
349
0
{
350
0
    m->forced_pdf_version = version;
351
0
    m->forced_extension_level = extension_level;
352
0
}
353
354
void
355
QPDFWriter::setExtraHeaderText(std::string const& text)
356
0
{
357
0
    m->extra_header_text = text;
358
0
    if (!m->extra_header_text.empty() && *m->extra_header_text.rbegin() != '\n') {
359
0
        QTC::TC("qpdf", "QPDFWriter extra header text add newline");
360
0
        m->extra_header_text += "\n";
361
0
    } else {
362
0
        QTC::TC("qpdf", "QPDFWriter extra header text no newline");
363
0
    }
364
0
}
365
366
void
367
QPDFWriter::setStaticID(bool val)
368
8.12k
{
369
8.12k
    m->static_id = val;
370
8.12k
}
371
372
void
373
QPDFWriter::setDeterministicID(bool val)
374
0
{
375
0
    m->deterministic_id = val;
376
0
}
377
378
void
379
QPDFWriter::setStaticAesIV(bool val)
380
0
{
381
0
    if (val) {
382
0
        Pl_AES_PDF::useStaticIV();
383
0
    }
384
0
}
385
386
void
387
QPDFWriter::setSuppressOriginalObjectIDs(bool val)
388
0
{
389
0
    m->suppress_original_object_ids = val;
390
0
}
391
392
void
393
QPDFWriter::setPreserveEncryption(bool val)
394
0
{
395
0
    m->preserve_encryption = val;
396
0
}
397
398
void
399
QPDFWriter::setLinearization(bool val)
400
8.12k
{
401
8.12k
    m->linearized = val;
402
8.12k
    if (val) {
403
8.12k
        m->pclm = false;
404
8.12k
    }
405
8.12k
}
406
407
void
408
QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
409
0
{
410
0
    m->lin_pass1_filename = filename;
411
0
}
412
413
void
414
QPDFWriter::setPCLm(bool val)
415
0
{
416
0
    m->pclm = val;
417
0
    if (val) {
418
0
        m->linearized = false;
419
0
    }
420
0
}
421
422
void
423
QPDFWriter::setR2EncryptionParametersInsecure(
424
    char const* user_password,
425
    char const* owner_password,
426
    bool allow_print,
427
    bool allow_modify,
428
    bool allow_extract,
429
    bool allow_annotate)
430
0
{
431
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(1, 2, 5, true);
432
0
    if (!allow_print) {
433
0
        m->encryption->setP(3, false);
434
0
    }
435
0
    if (!allow_modify) {
436
0
        m->encryption->setP(4, false);
437
0
    }
438
0
    if (!allow_extract) {
439
0
        m->encryption->setP(5, false);
440
0
    }
441
0
    if (!allow_annotate) {
442
0
        m->encryption->setP(6, false);
443
0
    }
444
0
    setEncryptionParameters(user_password, owner_password);
445
0
}
446
447
void
448
QPDFWriter::setR3EncryptionParametersInsecure(
449
    char const* user_password,
450
    char const* owner_password,
451
    bool allow_accessibility,
452
    bool allow_extract,
453
    bool allow_assemble,
454
    bool allow_annotate_and_form,
455
    bool allow_form_filling,
456
    bool allow_modify_other,
457
    qpdf_r3_print_e print)
458
0
{
459
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(2, 3, 16, true);
460
0
    interpretR3EncryptionParameters(
461
0
        allow_accessibility,
462
0
        allow_extract,
463
0
        allow_assemble,
464
0
        allow_annotate_and_form,
465
0
        allow_form_filling,
466
0
        allow_modify_other,
467
0
        print,
468
0
        qpdf_r3m_all);
469
0
    setEncryptionParameters(user_password, owner_password);
470
0
}
471
472
void
473
QPDFWriter::setR4EncryptionParametersInsecure(
474
    char const* user_password,
475
    char const* owner_password,
476
    bool allow_accessibility,
477
    bool allow_extract,
478
    bool allow_assemble,
479
    bool allow_annotate_and_form,
480
    bool allow_form_filling,
481
    bool allow_modify_other,
482
    qpdf_r3_print_e print,
483
    bool encrypt_metadata,
484
    bool use_aes)
485
0
{
486
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(4, 4, 16, encrypt_metadata);
487
0
    m->encrypt_use_aes = use_aes;
488
0
    interpretR3EncryptionParameters(
489
0
        allow_accessibility,
490
0
        allow_extract,
491
0
        allow_assemble,
492
0
        allow_annotate_and_form,
493
0
        allow_form_filling,
494
0
        allow_modify_other,
495
0
        print,
496
0
        qpdf_r3m_all);
497
0
    setEncryptionParameters(user_password, owner_password);
498
0
}
499
500
void
501
QPDFWriter::setR5EncryptionParameters(
502
    char const* user_password,
503
    char const* owner_password,
504
    bool allow_accessibility,
505
    bool allow_extract,
506
    bool allow_assemble,
507
    bool allow_annotate_and_form,
508
    bool allow_form_filling,
509
    bool allow_modify_other,
510
    qpdf_r3_print_e print,
511
    bool encrypt_metadata)
512
0
{
513
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(5, 5, 32, encrypt_metadata);
514
0
    m->encrypt_use_aes = true;
515
0
    interpretR3EncryptionParameters(
516
0
        allow_accessibility,
517
0
        allow_extract,
518
0
        allow_assemble,
519
0
        allow_annotate_and_form,
520
0
        allow_form_filling,
521
0
        allow_modify_other,
522
0
        print,
523
0
        qpdf_r3m_all);
524
0
    setEncryptionParameters(user_password, owner_password);
525
0
}
526
527
void
528
QPDFWriter::setR6EncryptionParameters(
529
    char const* user_password,
530
    char const* owner_password,
531
    bool allow_accessibility,
532
    bool allow_extract,
533
    bool allow_assemble,
534
    bool allow_annotate_and_form,
535
    bool allow_form_filling,
536
    bool allow_modify_other,
537
    qpdf_r3_print_e print,
538
    bool encrypt_metadata)
539
8.12k
{
540
8.12k
    m->encryption = std::make_unique<QPDF::EncryptionData>(5, 6, 32, encrypt_metadata);
541
8.12k
    interpretR3EncryptionParameters(
542
8.12k
        allow_accessibility,
543
8.12k
        allow_extract,
544
8.12k
        allow_assemble,
545
8.12k
        allow_annotate_and_form,
546
8.12k
        allow_form_filling,
547
8.12k
        allow_modify_other,
548
8.12k
        print,
549
8.12k
        qpdf_r3m_all);
550
8.12k
    m->encrypt_use_aes = true;
551
8.12k
    setEncryptionParameters(user_password, owner_password);
552
8.12k
}
553
554
void
555
QPDFWriter::interpretR3EncryptionParameters(
556
    bool allow_accessibility,
557
    bool allow_extract,
558
    bool allow_assemble,
559
    bool allow_annotate_and_form,
560
    bool allow_form_filling,
561
    bool allow_modify_other,
562
    qpdf_r3_print_e print,
563
    qpdf_r3_modify_e modify)
564
8.12k
{
565
    // Acrobat 5 security options:
566
567
    // Checkboxes:
568
    //   Enable Content Access for the Visually Impaired
569
    //   Allow Content Copying and Extraction
570
571
    // Allowed changes menu:
572
    //   None
573
    //   Only Document Assembly
574
    //   Only Form Field Fill-in or Signing
575
    //   Comment Authoring, Form Field Fill-in or Signing
576
    //   General Editing, Comment and Form Field Authoring
577
578
    // Allowed printing menu:
579
    //   None
580
    //   Low Resolution
581
    //   Full printing
582
583
    // Meanings of bits in P when R >= 3
584
    //
585
    //  3: low-resolution printing
586
    //  4: document modification except as controlled by 6, 9, and 11
587
    //  5: extraction
588
    //  6: add/modify annotations (comment), fill in forms
589
    //     if 4+6 are set, also allows modification of form fields
590
    //  9: fill in forms even if 6 is clear
591
    // 10: accessibility; ignored by readers, should always be set
592
    // 11: document assembly even if 4 is clear
593
    // 12: high-resolution printing
594
8.12k
    if (!allow_accessibility && m->encryption->getR() <= 3) {
595
        // Bit 10 is deprecated and should always be set.  This used to mean accessibility.  There
596
        // is no way to disable accessibility with R > 3.
597
0
        m->encryption->setP(10, false);
598
0
    }
599
8.12k
    if (!allow_extract) {
600
0
        m->encryption->setP(5, false);
601
0
    }
602
603
8.12k
    switch (print) {
604
0
    case qpdf_r3p_none:
605
0
        m->encryption->setP(3, false); // any printing
606
0
        [[fallthrough]];
607
0
    case qpdf_r3p_low:
608
0
        m->encryption->setP(12, false); // high resolution printing
609
0
        [[fallthrough]];
610
8.12k
    case qpdf_r3p_full:
611
8.12k
        break;
612
        // no default so gcc warns for missing cases
613
8.12k
    }
614
615
    // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full
616
    // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're
617
    // stuck with it. See also allow checks below to control the bits individually.
618
619
    // NOT EXERCISED IN TEST SUITE
620
8.12k
    switch (modify) {
621
0
    case qpdf_r3m_none:
622
0
        m->encryption->setP(11, false); // document assembly
623
0
        [[fallthrough]];
624
0
    case qpdf_r3m_assembly:
625
0
        m->encryption->setP(9, false); // filling in form fields
626
0
        [[fallthrough]];
627
0
    case qpdf_r3m_form:
628
0
        m->encryption->setP(6, false); // modify annotations, fill in form fields
629
0
        [[fallthrough]];
630
0
    case qpdf_r3m_annotate:
631
0
        m->encryption->setP(4, false); // other modifications
632
0
        [[fallthrough]];
633
8.12k
    case qpdf_r3m_all:
634
8.12k
        break;
635
        // no default so gcc warns for missing cases
636
8.12k
    }
637
    // END NOT EXERCISED IN TEST SUITE
638
639
8.12k
    if (!allow_assemble) {
640
0
        m->encryption->setP(11, false);
641
0
    }
642
8.12k
    if (!allow_annotate_and_form) {
643
0
        m->encryption->setP(6, false);
644
0
    }
645
8.12k
    if (!allow_form_filling) {
646
0
        m->encryption->setP(9, false);
647
0
    }
648
8.12k
    if (!allow_modify_other) {
649
0
        m->encryption->setP(4, false);
650
0
    }
651
8.12k
}
652
653
void
654
QPDFWriter::setEncryptionParameters(char const* user_password, char const* owner_password)
655
8.12k
{
656
8.12k
    generateID();
657
8.12k
    m->encryption->setId1(m->id1);
658
8.12k
    m->encryption_key = m->encryption->compute_parameters(user_password, owner_password);
659
8.12k
    setEncryptionMinimumVersion();
660
8.12k
}
661
662
void
663
QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
664
0
{
665
0
    m->preserve_encryption = false;
666
0
    QPDFObjectHandle trailer = qpdf.getTrailer();
667
0
    if (trailer.hasKey("/Encrypt")) {
668
0
        generateID();
669
0
        m->id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue();
670
0
        QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
671
0
        int V = encrypt.getKey("/V").getIntValueAsInt();
672
0
        int key_len = 5;
673
0
        if (V > 1) {
674
0
            key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8;
675
0
        }
676
0
        const bool encrypt_metadata =
677
0
            encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool()
678
0
            ? encrypt.getKey("/EncryptMetadata").getBoolValue()
679
0
            : true;
680
0
        if (V >= 4) {
681
            // When copying encryption parameters, use AES even if the original file did not.
682
            // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of
683
            // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF
684
            // all potentially having different values.
685
0
            m->encrypt_use_aes = true;
686
0
        }
687
0
        QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", encrypt_metadata ? 0 : 1);
688
0
        QTC::TC("qpdf", "QPDFWriter copy use_aes", m->encrypt_use_aes ? 0 : 1);
689
690
0
        m->encryption = std::make_unique<QPDF::EncryptionData>(
691
0
            V,
692
0
            encrypt.getKey("/R").getIntValueAsInt(),
693
0
            key_len,
694
0
            static_cast<int>(encrypt.getKey("/P").getIntValue()),
695
0
            encrypt.getKey("/O").getStringValue(),
696
0
            encrypt.getKey("/U").getStringValue(),
697
0
            V < 5 ? "" : encrypt.getKey("/OE").getStringValue(),
698
0
            V < 5 ? "" : encrypt.getKey("/UE").getStringValue(),
699
0
            V < 5 ? "" : encrypt.getKey("/Perms").getStringValue(),
700
0
            m->id1, // m->id1 == the other file's id1
701
0
            encrypt_metadata);
702
0
        m->encryption_key = V >= 5
703
0
            ? qpdf.getEncryptionKey()
704
0
            : m->encryption->compute_encryption_key(qpdf.getPaddedUserPassword());
705
0
        setEncryptionMinimumVersion();
706
0
    }
707
0
}
708
709
void
710
QPDFWriter::disableIncompatibleEncryption(int major, int minor, int extension_level)
711
0
{
712
0
    if (!m->encryption) {
713
0
        return;
714
0
    }
715
0
    if (compareVersions(major, minor, 1, 3) < 0) {
716
0
        m->encryption = nullptr;
717
0
        return;
718
0
    }
719
0
    int V = m->encryption->getV();
720
0
    int R = m->encryption->getR();
721
0
    if (compareVersions(major, minor, 1, 4) < 0) {
722
0
        if (V > 1 || R > 2) {
723
0
            m->encryption = nullptr;
724
0
        }
725
0
    } else if (compareVersions(major, minor, 1, 5) < 0) {
726
0
        if (V > 2 || R > 3) {
727
0
            m->encryption = nullptr;
728
0
        }
729
0
    } else if (compareVersions(major, minor, 1, 6) < 0) {
730
0
        if (m->encrypt_use_aes) {
731
0
            m->encryption = nullptr;
732
0
        }
733
0
    } else if (
734
0
        (compareVersions(major, minor, 1, 7) < 0) ||
735
0
        ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) {
736
0
        if (V >= 5 || R >= 5) {
737
0
            m->encryption = nullptr;
738
0
        }
739
0
    }
740
741
0
    if (!m->encryption) {
742
0
        QTC::TC("qpdf", "QPDFWriter forced version disabled encryption");
743
0
    }
744
0
}
745
746
void
747
QPDFWriter::parseVersion(std::string const& version, int& major, int& minor) const
748
18.1k
{
749
18.1k
    major = QUtil::string_to_int(version.c_str());
750
18.1k
    minor = 0;
751
18.1k
    size_t p = version.find('.');
752
18.1k
    if ((p != std::string::npos) && (version.length() > p)) {
753
18.1k
        minor = QUtil::string_to_int(version.substr(p + 1).c_str());
754
18.1k
    }
755
18.1k
    std::string tmp = std::to_string(major) + "." + std::to_string(minor);
756
18.1k
    if (tmp != version) {
757
        // The version number in the input is probably invalid. This happens with some files that
758
        // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately
759
        // QPDFWriter doesn't have a way to give a warning, so we just ignore this case.
760
24
    }
761
18.1k
}
762
763
int
764
QPDFWriter::compareVersions(int major1, int minor1, int major2, int minor2) const
765
9.07k
{
766
9.07k
    if (major1 < major2) {
767
216
        return -1;
768
8.85k
    } else if (major1 > major2) {
769
214
        return 1;
770
8.64k
    } else if (minor1 < minor2) {
771
6.78k
        return -1;
772
6.78k
    } else if (minor1 > minor2) {
773
173
        return 1;
774
1.68k
    } else {
775
1.68k
        return 0;
776
1.68k
    }
777
9.07k
}
778
779
void
780
QPDFWriter::setEncryptionMinimumVersion()
781
8.10k
{
782
8.10k
    auto const R = m->encryption->getR();
783
8.10k
    if (R >= 6) {
784
8.10k
        setMinimumPDFVersion("1.7", 8);
785
8.10k
    } else if (R == 5) {
786
0
        setMinimumPDFVersion("1.7", 3);
787
0
    } else if (R == 4) {
788
0
        setMinimumPDFVersion(m->encrypt_use_aes ? "1.6" : "1.5");
789
0
    } else if (R == 3) {
790
0
        setMinimumPDFVersion("1.4");
791
0
    } else {
792
0
        setMinimumPDFVersion("1.3");
793
0
    }
794
8.10k
}
795
796
void
797
QPDFWriter::setDataKey(int objid)
798
252k
{
799
252k
    if (m->encryption) {
800
252k
        m->cur_data_key = QPDF::compute_data_key(
801
252k
            m->encryption_key,
802
252k
            objid,
803
252k
            0,
804
252k
            m->encrypt_use_aes,
805
252k
            m->encryption->getV(),
806
252k
            m->encryption->getR());
807
252k
    }
808
252k
}
809
810
unsigned int
811
QPDFWriter::bytesNeeded(long long n)
812
7.32k
{
813
7.32k
    unsigned int bytes = 0;
814
18.1k
    while (n) {
815
10.7k
        ++bytes;
816
10.7k
        n >>= 8;
817
10.7k
    }
818
7.32k
    return bytes;
819
7.32k
}
820
821
void
822
QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes)
823
641k
{
824
641k
    if (bytes > sizeof(unsigned long long)) {
825
0
        throw std::logic_error("QPDFWriter::writeBinary called with too many bytes");
826
0
    }
827
641k
    unsigned char data[sizeof(unsigned long long)];
828
1.60M
    for (unsigned int i = 0; i < bytes; ++i) {
829
966k
        data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff);
830
966k
        val >>= 8;
831
966k
    }
832
641k
    m->pipeline->write(data, bytes);
833
641k
}
834
835
void
836
QPDFWriter::writeString(std::string_view str)
837
11.6M
{
838
11.6M
    m->pipeline->write(reinterpret_cast<unsigned char const*>(str.data()), str.size());
839
11.6M
}
840
841
void
842
QPDFWriter::writeStringQDF(std::string_view str)
843
2.96M
{
844
2.96M
    if (m->qdf_mode) {
845
0
        m->pipeline->write(reinterpret_cast<unsigned char const*>(str.data()), str.size());
846
0
    }
847
2.96M
}
848
849
void
850
QPDFWriter::writeStringNoQDF(std::string_view str)
851
302k
{
852
302k
    if (!m->qdf_mode) {
853
302k
        m->pipeline->write(reinterpret_cast<unsigned char const*>(str.data()), str.size());
854
302k
    }
855
302k
}
856
857
void
858
QPDFWriter::writePad(size_t nspaces)
859
21.3k
{
860
21.3k
    writeString(std::string(nspaces, ' '));
861
21.3k
}
862
863
Pipeline*
864
QPDFWriter::pushPipeline(Pipeline* p)
865
52.4k
{
866
52.4k
    qpdf_assert_debug(!dynamic_cast<pl::Count*>(p));
867
52.4k
    m->pipeline_stack.emplace_back(p);
868
52.4k
    return p;
869
52.4k
}
870
871
void
872
QPDFWriter::initializePipelineStack(Pipeline* p)
873
8.12k
{
874
8.12k
    m->pipeline = new pl::Count(1, p);
875
8.12k
    m->to_delete.emplace_back(std::shared_ptr<Pipeline>(m->pipeline));
876
8.12k
    m->pipeline_stack.emplace_back(m->pipeline);
877
8.12k
}
878
879
void
880
QPDFWriter::activatePipelineStack(PipelinePopper& pp, std::string& str)
881
53.9k
{
882
53.9k
    activatePipelineStack(pp, false, &str, nullptr);
883
53.9k
}
884
885
void
886
QPDFWriter::activatePipelineStack(PipelinePopper& pp, std::unique_ptr<pl::Link> link)
887
7.38k
{
888
7.38k
    m->count_buffer.clear();
889
7.38k
    activatePipelineStack(pp, false, &m->count_buffer, std::move(link));
890
7.38k
}
891
892
void
893
QPDFWriter::activatePipelineStack(
894
    PipelinePopper& pp, bool discard, std::string* str, std::unique_ptr<pl::Link> link)
895
156k
{
896
156k
    pl::Count* c;
897
156k
    if (link) {
898
7.38k
        c = new pl::Count(m->next_stack_id, m->count_buffer, std::move(link));
899
148k
    } else if (discard) {
900
42.4k
        c = new pl::Count(m->next_stack_id, nullptr);
901
106k
    } else if (!str) {
902
52.4k
        c = new pl::Count(m->next_stack_id, m->pipeline_stack.back());
903
53.9k
    } else {
904
53.9k
        c = new pl::Count(m->next_stack_id, *str);
905
53.9k
    }
906
156k
    pp.stack_id = m->next_stack_id;
907
156k
    m->pipeline_stack.emplace_back(c);
908
156k
    m->pipeline = c;
909
156k
    ++m->next_stack_id;
910
156k
}
911
912
QPDFWriter::PipelinePopper::~PipelinePopper()
913
163k
{
914
163k
    if (!stack_id) {
915
7.15k
        return;
916
7.15k
    }
917
156k
    qpdf_assert_debug(qw->m->pipeline_stack.size() >= 2);
918
156k
    qw->m->pipeline->finish();
919
156k
    qpdf_assert_debug(dynamic_cast<pl::Count*>(qw->m->pipeline_stack.back()) == qw->m->pipeline);
920
    // It might be possible for this assertion to fail if writeLinearized exits by exception when
921
    // deterministic ID, but I don't think so. As of this writing, this is the only case in which
922
    // two dynamically allocated PipelinePopper objects ever exist at the same time, so the
923
    // assertion will fail if they get popped out of order from automatic destruction.
924
156k
    qpdf_assert_debug(qw->m->pipeline->id() == stack_id);
925
156k
    delete qw->m->pipeline_stack.back();
926
156k
    qw->m->pipeline_stack.pop_back();
927
208k
    while (!dynamic_cast<pl::Count*>(qw->m->pipeline_stack.back())) {
928
52.4k
        Pipeline* p = qw->m->pipeline_stack.back();
929
52.4k
        if (dynamic_cast<Pl_MD5*>(p) == qw->m->md5_pipeline) {
930
52.4k
            qw->m->md5_pipeline = nullptr;
931
52.4k
        }
932
52.4k
        qw->m->pipeline_stack.pop_back();
933
52.4k
        delete p;
934
52.4k
    }
935
156k
    qw->m->pipeline = dynamic_cast<pl::Count*>(qw->m->pipeline_stack.back());
936
156k
}
937
938
void
939
QPDFWriter::adjustAESStreamLength(size_t& length)
940
52.8k
{
941
52.8k
    if (m->encryption && !m->cur_data_key.empty() && m->encrypt_use_aes) {
942
        // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16.  It will
943
        // also be prepended by 16 bits of random data.
944
52.8k
        length += 32 - (length & 0xf);
945
52.8k
    }
946
52.8k
}
947
948
void
949
QPDFWriter::pushEncryptionFilter(PipelinePopper& pp)
950
52.4k
{
951
52.4k
    if (m->encryption && !m->cur_data_key.empty()) {
952
52.4k
        Pipeline* p = nullptr;
953
52.4k
        if (m->encrypt_use_aes) {
954
52.4k
            p = new Pl_AES_PDF(
955
52.4k
                "aes stream encryption",
956
52.4k
                m->pipeline,
957
52.4k
                true,
958
52.4k
                QUtil::unsigned_char_pointer(m->cur_data_key),
959
52.4k
                m->cur_data_key.length());
960
52.4k
        } else {
961
0
            p = new Pl_RC4(
962
0
                "rc4 stream encryption",
963
0
                m->pipeline,
964
0
                QUtil::unsigned_char_pointer(m->cur_data_key),
965
0
                QIntC::to_int(m->cur_data_key.length()));
966
0
        }
967
52.4k
        pushPipeline(p);
968
52.4k
    }
969
    // Must call this unconditionally so we can call popPipelineStack to balance
970
    // pushEncryptionFilter().
971
52.4k
    activatePipelineStack(pp);
972
52.4k
}
973
974
void
975
QPDFWriter::pushMD5Pipeline(PipelinePopper& pp)
976
0
{
977
0
    if (!m->id2.empty()) {
978
        // Can't happen in the code
979
0
        throw std::logic_error(
980
0
            "Deterministic ID computation enabled after ID generation has already occurred.");
981
0
    }
982
0
    qpdf_assert_debug(m->deterministic_id);
983
0
    qpdf_assert_debug(m->md5_pipeline == nullptr);
984
0
    qpdf_assert_debug(m->pipeline->getCount() == 0);
985
0
    m->md5_pipeline = new Pl_MD5("qpdf md5", m->pipeline);
986
0
    m->md5_pipeline->persistAcrossFinish(true);
987
    // Special case code in popPipelineStack clears m->md5_pipeline upon deletion.
988
0
    pushPipeline(m->md5_pipeline);
989
0
    activatePipelineStack(pp);
990
0
}
991
992
void
993
QPDFWriter::computeDeterministicIDData()
994
0
{
995
0
    qpdf_assert_debug(m->md5_pipeline != nullptr);
996
0
    qpdf_assert_debug(m->deterministic_id_data.empty());
997
0
    m->deterministic_id_data = m->md5_pipeline->getHexDigest();
998
0
    m->md5_pipeline->enable(false);
999
0
}
1000
1001
int
1002
QPDFWriter::openObject(int objid)
1003
282k
{
1004
282k
    if (objid == 0) {
1005
0
        objid = m->next_objid++;
1006
0
    }
1007
282k
    m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount());
1008
282k
    writeString(std::to_string(objid));
1009
282k
    writeString(" 0 obj\n");
1010
282k
    return objid;
1011
282k
}
1012
1013
void
1014
QPDFWriter::closeObject(int objid)
1015
282k
{
1016
    // Write a newline before endobj as it makes the file easier to repair.
1017
282k
    writeString("\nendobj\n");
1018
282k
    writeStringQDF("\n");
1019
282k
    auto& new_obj = m->new_obj[objid];
1020
282k
    new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset();
1021
282k
}
1022
1023
void
1024
QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen og)
1025
143k
{
1026
143k
    int objid = og.getObj();
1027
143k
    if ((og.getGen() != 0) || (!m->object_stream_to_objects.contains(objid))) {
1028
        // This is not an object stream.
1029
140k
        return;
1030
140k
    }
1031
1032
    // Reserve numbers for the objects that belong to this object stream.
1033
76.4k
    for (auto const& iter: m->object_stream_to_objects[objid]) {
1034
76.4k
        m->obj[iter].renumber = m->next_objid++;
1035
76.4k
    }
1036
3.41k
}
1037
1038
void
1039
QPDFWriter::enqueueObject(QPDFObjectHandle object)
1040
143k
{
1041
143k
    if (object.isIndirect()) {
1042
        // This owner check can only be done for indirect objects. It is possible for a direct
1043
        // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle from
1044
        // one file was insert into another file without copying. Doing that is safe even if the
1045
        // original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from its owner.
1046
143k
        if (object.getOwningQPDF() != &(m->pdf)) {
1047
0
            QTC::TC("qpdf", "QPDFWriter foreign object");
1048
0
            throw std::logic_error(
1049
0
                "QPDFObjectHandle from different QPDF found while writing.  Use "
1050
0
                "QPDF::copyForeignObject to add objects from another file.");
1051
0
        }
1052
1053
143k
        if (m->qdf_mode && object.isStreamOfType("/XRef")) {
1054
            // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so
1055
            // will confuse fix-qdf, which expects to see only one XRef stream at the end of the
1056
            // file. This case can occur when creating a QDF from a file with object streams when
1057
            // preserving unreferenced objects since the old cross reference streams are not
1058
            // actually referenced by object number.
1059
0
            QTC::TC("qpdf", "QPDFWriter ignore XRef in qdf mode");
1060
0
            return;
1061
0
        }
1062
1063
143k
        QPDFObjGen og = object.getObjGen();
1064
143k
        auto& obj = m->obj[og];
1065
1066
143k
        if (obj.renumber == 0) {
1067
142k
            if (obj.object_stream > 0) {
1068
                // This is in an object stream.  Don't process it here.  Instead, enqueue the object
1069
                // stream.  Object streams always have generation 0.
1070
                // Detect loops by storing invalid object ID -1, which will get overwritten later.
1071
17
                obj.renumber = -1;
1072
17
                enqueueObject(m->pdf.getObject(obj.object_stream, 0));
1073
142k
            } else {
1074
142k
                m->object_queue.push_back(object);
1075
142k
                obj.renumber = m->next_objid++;
1076
1077
142k
                if ((og.getGen() == 0) && m->object_stream_to_objects.contains(og.getObj())) {
1078
                    // For linearized files, uncompressed objects go at end, and we take care of
1079
                    // assigning numbers to them elsewhere.
1080
3.35k
                    if (!m->linearized) {
1081
0
                        assignCompressedObjectNumbers(og);
1082
0
                    }
1083
139k
                } else if ((!m->direct_stream_lengths) && object.isStream()) {
1084
                    // reserve next object ID for length
1085
0
                    ++m->next_objid;
1086
0
                }
1087
142k
            }
1088
142k
        } else if (obj.renumber == -1) {
1089
            // This can happen if a specially constructed file indicates that an object stream is
1090
            // inside itself.
1091
0
        }
1092
143k
        return;
1093
143k
    } else if (!m->linearized) {
1094
0
        if (object.isArray()) {
1095
0
            for (auto& item: object.as_array()) {
1096
0
                enqueueObject(item);
1097
0
            }
1098
0
        } else if (auto d = object.as_dictionary()) {
1099
0
            for (auto const& item: d) {
1100
0
                if (!item.second.null()) {
1101
0
                    enqueueObject(item.second);
1102
0
                }
1103
0
            }
1104
0
        }
1105
153
    } else {
1106
        // ignore
1107
153
    }
1108
143k
}
1109
1110
void
1111
QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
1112
2.21M
{
1113
2.21M
    if (!m->linearized) {
1114
0
        enqueueObject(child);
1115
0
    }
1116
2.21M
    if (child.isIndirect()) {
1117
546k
        writeString(std::to_string(m->obj[child].renumber));
1118
546k
        writeString(" 0 R");
1119
1.66M
    } else {
1120
1.66M
        unparseObject(child, level, flags);
1121
1.66M
    }
1122
2.21M
}
1123
1124
void
1125
QPDFWriter::writeTrailer(
1126
    trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass)
1127
27.2k
{
1128
27.2k
    QPDFObjectHandle trailer = getTrimmedTrailer();
1129
27.2k
    if (xref_stream) {
1130
2.44k
        m->cur_data_key.clear();
1131
24.8k
    } else {
1132
24.8k
        writeString("trailer <<");
1133
24.8k
    }
1134
27.2k
    writeStringQDF("\n");
1135
27.2k
    if (which == t_lin_second) {
1136
13.4k
        writeString(" /Size ");
1137
13.4k
        writeString(std::to_string(size));
1138
13.8k
    } else {
1139
43.8k
        for (auto const& [key, value]: trailer.as_dictionary()) {
1140
43.8k
            if (value.null()) {
1141
11.7k
                continue;
1142
11.7k
            }
1143
32.1k
            writeStringQDF("  ");
1144
32.1k
            writeStringNoQDF(" ");
1145
32.1k
            writeString(Name::normalize(key));
1146
32.1k
            writeString(" ");
1147
32.1k
            if (key == "/Size") {
1148
5.03k
                writeString(std::to_string(size));
1149
5.03k
                if (which == t_lin_first) {
1150
5.03k
                    writeString(" /Prev ");
1151
5.03k
                    qpdf_offset_t pos = m->pipeline->getCount();
1152
5.03k
                    writeString(std::to_string(prev));
1153
5.03k
                    writePad(QIntC::to_size(pos - m->pipeline->getCount() + 21));
1154
5.03k
                }
1155
27.1k
            } else {
1156
27.1k
                unparseChild(value, 1, 0);
1157
27.1k
            }
1158
32.1k
            writeStringQDF("\n");
1159
32.1k
        }
1160
13.8k
    }
1161
1162
    // Write ID
1163
27.2k
    writeStringQDF(" ");
1164
27.2k
    writeString(" /ID [");
1165
27.2k
    if (linearization_pass == 1) {
1166
13.8k
        std::string original_id1 = getOriginalID1();
1167
13.8k
        if (original_id1.empty()) {
1168
12.0k
            writeString("<00000000000000000000000000000000>");
1169
12.0k
        } else {
1170
            // Write a string of zeroes equal in length to the representation of the original ID.
1171
            // While writing the original ID would have the same number of bytes, it would cause a
1172
            // change to the deterministic ID generated by older versions of the software that
1173
            // hard-coded the length of the ID to 16 bytes.
1174
1.81k
            writeString("<");
1175
1.81k
            size_t len = QPDF_String(original_id1).unparse(true).length() - 2;
1176
61.4k
            for (size_t i = 0; i < len; ++i) {
1177
59.6k
                writeString("0");
1178
59.6k
            }
1179
1.81k
            writeString(">");
1180
1.81k
        }
1181
13.8k
        writeString("<00000000000000000000000000000000>");
1182
13.8k
    } else {
1183
13.4k
        if ((linearization_pass == 0) && (m->deterministic_id)) {
1184
0
            computeDeterministicIDData();
1185
0
        }
1186
13.4k
        generateID();
1187
13.4k
        writeString(QPDF_String(m->id1).unparse(true));
1188
13.4k
        writeString(QPDF_String(m->id2).unparse(true));
1189
13.4k
    }
1190
27.2k
    writeString("]");
1191
1192
27.2k
    if (which != t_lin_second) {
1193
        // Write reference to encryption dictionary
1194
13.8k
        if (m->encryption) {
1195
13.8k
            writeString(" /Encrypt ");
1196
13.8k
            writeString(std::to_string(m->encryption_dict_objid));
1197
13.8k
            writeString(" 0 R");
1198
13.8k
        }
1199
13.8k
    }
1200
1201
27.2k
    writeStringQDF("\n");
1202
27.2k
    writeStringNoQDF(" ");
1203
27.2k
    writeString(">>");
1204
27.2k
}
1205
1206
bool
1207
QPDFWriter::willFilterStream(
1208
    QPDFObjectHandle stream,
1209
    bool& compress_stream,  // out only
1210
    bool& is_root_metadata, // out only
1211
    std::string* stream_data)
1212
64.4k
{
1213
64.4k
    compress_stream = false;
1214
64.4k
    is_root_metadata = false;
1215
1216
64.4k
    QPDFObjGen old_og = stream.getObjGen();
1217
64.4k
    QPDFObjectHandle stream_dict = stream.getDict();
1218
1219
64.4k
    if (stream.isRootMetadata()) {
1220
834
        is_root_metadata = true;
1221
834
    }
1222
64.4k
    bool filter = stream.isDataModified() || m->compress_streams || m->stream_decode_level;
1223
64.4k
    bool filter_on_write = stream.getFilterOnWrite();
1224
64.4k
    if (!filter_on_write) {
1225
13.2k
        QTC::TC("qpdf", "QPDFWriter getFilterOnWrite false");
1226
13.2k
        filter = false;
1227
13.2k
    }
1228
64.4k
    if (filter_on_write && m->compress_streams) {
1229
        // Don't filter if the stream is already compressed with FlateDecode. This way we don't make
1230
        // it worse if the original file used a better Flate algorithm, and we don't spend time and
1231
        // CPU cycles uncompressing and recompressing stuff. This can be overridden with
1232
        // setRecompressFlate(true).
1233
51.2k
        QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
1234
51.2k
        if (!m->recompress_flate && !stream.isDataModified() && filter_obj.isName() &&
1235
51.2k
            (filter_obj.getName() == "/FlateDecode" || filter_obj.getName() == "/Fl")) {
1236
13.5k
            QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode");
1237
13.5k
            filter = false;
1238
13.5k
        }
1239
51.2k
    }
1240
64.4k
    bool normalize = false;
1241
64.4k
    bool uncompress = false;
1242
64.4k
    if (filter_on_write && is_root_metadata &&
1243
64.4k
        (!m->encryption || !m->encryption->getEncryptMetadata())) {
1244
0
        QTC::TC("qpdf", "QPDFWriter not compressing metadata");
1245
0
        filter = true;
1246
0
        compress_stream = false;
1247
0
        uncompress = true;
1248
64.4k
    } else if (filter_on_write && m->normalize_content && m->normalized_streams.contains(old_og)) {
1249
0
        normalize = true;
1250
0
        filter = true;
1251
64.4k
    } else if (filter_on_write && filter && m->compress_streams) {
1252
37.6k
        compress_stream = true;
1253
37.6k
        QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");
1254
37.6k
    }
1255
1256
    // Disable compression for empty streams to improve compatibility
1257
64.4k
    if (stream_dict.getKey("/Length").isInteger() &&
1258
64.4k
        stream_dict.getKey("/Length").getIntValue() == 0) {
1259
1.83k
        filter = true;
1260
1.83k
        compress_stream = false;
1261
1.83k
    }
1262
1263
64.4k
    bool filtered = false;
1264
72.6k
    for (bool first_attempt: {true, false}) {
1265
72.6k
        PipelinePopper pp_stream_data(this);
1266
72.6k
        if (stream_data != nullptr) {
1267
42.2k
            activatePipelineStack(pp_stream_data, *stream_data);
1268
42.2k
        } else {
1269
30.3k
            activatePipelineStack(pp_stream_data, true);
1270
30.3k
        }
1271
72.6k
        try {
1272
72.6k
            filtered = stream.pipeStreamData(
1273
72.6k
                m->pipeline,
1274
72.6k
                !filter ? 0
1275
72.6k
                        : ((normalize ? qpdf_ef_normalize : 0) |
1276
38.7k
                           (compress_stream ? qpdf_ef_compress : 0)),
1277
72.6k
                !filter ? qpdf_dl_none : (uncompress ? qpdf_dl_all : m->stream_decode_level),
1278
72.6k
                false,
1279
72.6k
                first_attempt);
1280
72.6k
            if (filter && !filtered) {
1281
                // Try again
1282
8.19k
                filter = false;
1283
8.19k
                stream.setFilterOnWrite(false);
1284
64.4k
            } else {
1285
64.4k
                break;
1286
64.4k
            }
1287
72.6k
        } catch (std::runtime_error& e) {
1288
91
            if (filter && first_attempt) {
1289
81
                stream.warnIfPossible("error while getting stream data: "s + e.what());
1290
81
                stream.warnIfPossible("qpdf will attempt to write the damaged stream unchanged");
1291
81
                filter = false;
1292
81
                stream.setFilterOnWrite(false);
1293
81
                continue;
1294
81
            }
1295
10
            throw std::runtime_error(
1296
10
                "error while getting stream data for " + stream.unparse() + ": " + e.what());
1297
91
        }
1298
8.19k
        if (stream_data) {
1299
1.02k
            stream_data->clear();
1300
1.02k
        }
1301
8.19k
    }
1302
64.5k
    if (!filtered) {
1303
33.6k
        compress_stream = false;
1304
33.6k
    }
1305
64.5k
    return filtered;
1306
64.4k
}
1307
1308
void
1309
QPDFWriter::unparseObject(
1310
    QPDFObjectHandle object, int level, int flags, size_t stream_length, bool compress)
1311
2.07M
{
1312
2.07M
    QPDFObjGen old_og = object.getObjGen();
1313
2.07M
    int child_flags = flags & ~f_stream;
1314
2.07M
    if (level < 0) {
1315
0
        throw std::logic_error("invalid level in QPDFWriter::unparseObject");
1316
0
    }
1317
    // For non-qdf, "indent" is a single space between tokens. For qdf, indent includes the
1318
    // preceding newline.
1319
2.07M
    std::string indent = " ";
1320
2.07M
    if (m->qdf_mode) {
1321
0
        indent.append(static_cast<size_t>(2 * level), ' ');
1322
0
        indent[0] = '\n';
1323
0
    }
1324
1325
2.07M
    if (auto const tc = object.getTypeCode(); tc == ::ot_array) {
1326
        // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the
1327
        // [ in the /H key of the linearization parameter dictionary.  We'll do this unconditionally
1328
        // for all arrays because it looks nicer and doesn't make the files that much bigger.
1329
133k
        writeString("[");
1330
1.21M
        for (auto const& item: object.as_array()) {
1331
1.21M
            writeString(indent);
1332
1.21M
            writeStringQDF("  ");
1333
1.21M
            unparseChild(item, level + 1, child_flags);
1334
1.21M
        }
1335
133k
        writeString(indent);
1336
133k
        writeString("]");
1337
1.94M
    } else if (tc == ::ot_dictionary) {
1338
        // Handle special cases for specific dictionaries.
1339
1340
324k
        if (old_og == m->root_og) {
1341
            // Extensions dictionaries.
1342
1343
            // We have one of several cases:
1344
            //
1345
            // * We need ADBE
1346
            //    - We already have Extensions
1347
            //       - If it has the right ADBE, preserve it
1348
            //       - Otherwise, replace ADBE
1349
            //    - We don't have Extensions: create one from scratch
1350
            // * We don't want ADBE
1351
            //    - We already have Extensions
1352
            //       - If it only has ADBE, remove it
1353
            //       - If it has other things, keep those and remove ADBE
1354
            //    - We have no extensions: no action required
1355
            //
1356
            // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE
1357
            // dictionary, so we can modify in place.
1358
1359
13.8k
            auto extensions = object.getKey("/Extensions");
1360
13.8k
            const bool has_extensions = extensions.isDictionary();
1361
13.8k
            const bool need_extensions_adbe = m->final_extension_level > 0;
1362
1363
13.8k
            if (has_extensions || need_extensions_adbe) {
1364
                // Make a shallow copy of this object so we can modify it safely without affecting
1365
                // the original. This code has logic to skip certain keys in agreement with
1366
                // prepareFileForWrite and with skip_stream_parameters so that replacing them
1367
                // doesn't leave unreferenced objects in the output. We can use unsafeShallowCopy
1368
                // here because all we are doing is removing or replacing top-level keys.
1369
13.2k
                object = object.unsafeShallowCopy();
1370
13.2k
                if (!has_extensions) {
1371
12.9k
                    extensions = QPDFObjectHandle();
1372
12.9k
                }
1373
1374
13.2k
                const bool have_extensions_adbe = extensions && extensions.hasKey("/ADBE");
1375
13.2k
                const bool have_extensions_other =
1376
13.2k
                    extensions && extensions.getKeys().size() > (have_extensions_adbe ? 1u : 0u);
1377
1378
13.2k
                if (need_extensions_adbe) {
1379
13.2k
                    if (!(have_extensions_other || have_extensions_adbe)) {
1380
                        // We need Extensions and don't have it.  Create it here.
1381
12.9k
                        QTC::TC("qpdf", "QPDFWriter create Extensions", m->qdf_mode ? 0 : 1);
1382
12.9k
                        extensions = object.replaceKeyAndGetNew(
1383
12.9k
                            "/Extensions", QPDFObjectHandle::newDictionary());
1384
12.9k
                    }
1385
13.2k
                } else if (!have_extensions_other) {
1386
                    // We have Extensions dictionary and don't want one.
1387
5
                    if (have_extensions_adbe) {
1388
3
                        QTC::TC("qpdf", "QPDFWriter remove existing Extensions");
1389
3
                        object.removeKey("/Extensions");
1390
3
                        extensions = QPDFObjectHandle(); // uninitialized
1391
3
                    }
1392
5
                }
1393
1394
13.2k
                if (extensions) {
1395
13.2k
                    QTC::TC("qpdf", "QPDFWriter preserve Extensions");
1396
13.2k
                    QPDFObjectHandle adbe = extensions.getKey("/ADBE");
1397
13.2k
                    if (adbe.isDictionary() &&
1398
13.2k
                        adbe.getKey("/BaseVersion").isNameAndEquals("/" + m->final_pdf_version) &&
1399
13.2k
                        adbe.getKey("/ExtensionLevel").isInteger() &&
1400
13.2k
                        (adbe.getKey("/ExtensionLevel").getIntValue() ==
1401
163
                         m->final_extension_level)) {
1402
161
                        QTC::TC("qpdf", "QPDFWriter preserve ADBE");
1403
13.1k
                    } else {
1404
13.1k
                        if (need_extensions_adbe) {
1405
13.0k
                            extensions.replaceKey(
1406
13.0k
                                "/ADBE",
1407
13.0k
                                QPDFObjectHandle::parse(
1408
13.0k
                                    "<< /BaseVersion /" + m->final_pdf_version +
1409
13.0k
                                    " /ExtensionLevel " + std::to_string(m->final_extension_level) +
1410
13.0k
                                    " >>"));
1411
13.0k
                        } else {
1412
59
                            QTC::TC("qpdf", "QPDFWriter remove ADBE");
1413
59
                            extensions.removeKey("/ADBE");
1414
59
                        }
1415
13.1k
                    }
1416
13.2k
                }
1417
13.2k
            }
1418
13.8k
        }
1419
1420
        // Stream dictionaries.
1421
1422
324k
        if (flags & f_stream) {
1423
            // Suppress /Length since we will write it manually
1424
1425
            // Make a shallow copy of this object so we can modify it safely without affecting the
1426
            // original. This code has logic to skip certain keys in agreement with
1427
            // prepareFileForWrite and with skip_stream_parameters so that replacing them doesn't
1428
            // leave unreferenced objects in the output. We can use unsafeShallowCopy here because
1429
            // all we are doing is removing or replacing top-level keys.
1430
41.2k
            object = object.unsafeShallowCopy();
1431
1432
41.2k
            object.removeKey("/Length");
1433
1434
            // If /DecodeParms is an empty list, remove it.
1435
41.2k
            if (object.getKey("/DecodeParms").isArray() &&
1436
41.2k
                (0 == object.getKey("/DecodeParms").getArrayNItems())) {
1437
9
                QTC::TC("qpdf", "QPDFWriter remove empty DecodeParms");
1438
9
                object.removeKey("/DecodeParms");
1439
9
            }
1440
1441
41.2k
            if (flags & f_filtered) {
1442
                // We will supply our own filter and decode parameters.
1443
19.8k
                object.removeKey("/Filter");
1444
19.8k
                object.removeKey("/DecodeParms");
1445
21.4k
            } else {
1446
                // Make sure, no matter what else we have, that we don't have /Crypt in the output
1447
                // filters.
1448
21.4k
                QPDFObjectHandle filter = object.getKey("/Filter");
1449
21.4k
                QPDFObjectHandle decode_parms = object.getKey("/DecodeParms");
1450
21.4k
                if (filter.isOrHasName("/Crypt")) {
1451
276
                    if (filter.isName()) {
1452
15
                        object.removeKey("/Filter");
1453
15
                        object.removeKey("/DecodeParms");
1454
261
                    } else {
1455
261
                        int idx = -1;
1456
26.6k
                        for (int i = 0; i < filter.getArrayNItems(); ++i) {
1457
26.6k
                            QPDFObjectHandle item = filter.getArrayItem(i);
1458
26.6k
                            if (item.isNameAndEquals("/Crypt")) {
1459
261
                                idx = i;
1460
261
                                break;
1461
261
                            }
1462
26.6k
                        }
1463
261
                        if (idx >= 0) {
1464
                            // If filter is an array, then the code in QPDF_Stream has already
1465
                            // verified that DecodeParms and Filters are arrays of the same length,
1466
                            // but if they weren't for some reason, eraseItem does type and bounds
1467
                            // checking.
1468
261
                            QTC::TC("qpdf", "QPDFWriter remove Crypt");
1469
261
                            filter.eraseItem(idx);
1470
261
                            decode_parms.eraseItem(idx);
1471
261
                        }
1472
261
                    }
1473
276
                }
1474
21.4k
            }
1475
41.2k
        }
1476
1477
324k
        writeString("<<");
1478
1479
1.15M
        for (auto const& [key, value]: object.as_dictionary()) {
1480
1.15M
            if (!value.null()) {
1481
967k
                writeString(indent);
1482
967k
                writeStringQDF("  ");
1483
967k
                writeString(Name::normalize(key));
1484
967k
                writeString(" ");
1485
967k
                if (key == "/Contents" && object.isDictionaryOfType("/Sig") &&
1486
967k
                    object.hasKey("/ByteRange")) {
1487
13
                    QTC::TC("qpdf", "QPDFWriter no encryption sig contents");
1488
13
                    unparseChild(value, level + 1, child_flags | f_hex_string | f_no_encryption);
1489
967k
                } else {
1490
967k
                    unparseChild(value, level + 1, child_flags);
1491
967k
                }
1492
967k
            }
1493
1.15M
        }
1494
1495
324k
        if (flags & f_stream) {
1496
40.9k
            writeString(indent);
1497
40.9k
            writeStringQDF("  ");
1498
40.9k
            writeString("/Length ");
1499
1500
40.9k
            if (m->direct_stream_lengths) {
1501
40.9k
                writeString(std::to_string(stream_length));
1502
40.9k
            } else {
1503
0
                writeString(std::to_string(m->cur_stream_length_id));
1504
0
                writeString(" 0 R");
1505
0
            }
1506
40.9k
            if (compress && (flags & f_filtered)) {
1507
19.5k
                writeString(indent);
1508
19.5k
                writeStringQDF("  ");
1509
19.5k
                writeString("/Filter /FlateDecode");
1510
19.5k
            }
1511
40.9k
        }
1512
1513
324k
        writeString(indent);
1514
324k
        writeString(">>");
1515
1.61M
    } else if (tc == ::ot_stream) {
1516
        // Write stream data to a buffer.
1517
41.2k
        if (!m->direct_stream_lengths) {
1518
0
            m->cur_stream_length_id = m->obj[old_og].renumber + 1;
1519
0
        }
1520
1521
41.2k
        flags |= f_stream;
1522
41.2k
        bool compress_stream = false;
1523
41.2k
        bool is_metadata = false;
1524
41.2k
        std::string stream_data;
1525
41.2k
        if (willFilterStream(object, compress_stream, is_metadata, &stream_data)) {
1526
19.8k
            flags |= f_filtered;
1527
19.8k
        }
1528
41.2k
        QPDFObjectHandle stream_dict = object.getDict();
1529
1530
41.2k
        m->cur_stream_length = stream_data.size();
1531
41.2k
        if (is_metadata && m->encryption && !m->encryption->getEncryptMetadata()) {
1532
            // Don't encrypt stream data for the metadata stream
1533
0
            m->cur_data_key.clear();
1534
0
        }
1535
41.2k
        adjustAESStreamLength(m->cur_stream_length);
1536
41.2k
        unparseObject(stream_dict, 0, flags, m->cur_stream_length, compress_stream);
1537
41.2k
        char last_char = stream_data.empty() ? '\0' : stream_data.back();
1538
41.2k
        writeString("\nstream\n");
1539
41.2k
        {
1540
41.2k
            PipelinePopper pp_enc(this);
1541
41.2k
            pushEncryptionFilter(pp_enc);
1542
41.2k
            writeString(stream_data);
1543
41.2k
        }
1544
1545
41.2k
        if ((m->added_newline =
1546
41.2k
                 m->newline_before_endstream || (m->qdf_mode && last_char != '\n'))) {
1547
0
            writeString("\nendstream");
1548
41.2k
        } else {
1549
41.2k
            writeString("endstream");
1550
41.2k
        }
1551
1.57M
    } else if (tc == ::ot_string) {
1552
54.6k
        std::string val;
1553
54.6k
        if (m->encryption && !(flags & f_in_ostream) && !(flags & f_no_encryption) &&
1554
54.6k
            !m->cur_data_key.empty()) {
1555
42.0k
            val = object.getStringValue();
1556
42.0k
            if (m->encrypt_use_aes) {
1557
42.0k
                Pl_Buffer bufpl("encrypted string");
1558
42.0k
                Pl_AES_PDF pl(
1559
42.0k
                    "aes encrypt string",
1560
42.0k
                    &bufpl,
1561
42.0k
                    true,
1562
42.0k
                    QUtil::unsigned_char_pointer(m->cur_data_key),
1563
42.0k
                    m->cur_data_key.length());
1564
42.0k
                pl.writeString(val);
1565
42.0k
                pl.finish();
1566
42.0k
                val = QPDF_String(bufpl.getString()).unparse(true);
1567
42.0k
            } else {
1568
0
                auto tmp_ph = QUtil::make_unique_cstr(val);
1569
0
                char* tmp = tmp_ph.get();
1570
0
                size_t vlen = val.length();
1571
0
                RC4 rc4(
1572
0
                    QUtil::unsigned_char_pointer(m->cur_data_key),
1573
0
                    QIntC::to_int(m->cur_data_key.length()));
1574
0
                auto data = QUtil::unsigned_char_pointer(tmp);
1575
0
                rc4.process(data, vlen, data);
1576
0
                val = QPDF_String(std::string(tmp, vlen)).unparse();
1577
0
            }
1578
42.0k
        } else if (flags & f_hex_string) {
1579
13
            val = QPDF_String(object.getStringValue()).unparse(true);
1580
12.5k
        } else {
1581
12.5k
            val = object.unparseResolved();
1582
12.5k
        }
1583
54.6k
        writeString(val);
1584
1.51M
    } else {
1585
1.51M
        writeString(object.unparseResolved());
1586
1.51M
    }
1587
2.07M
}
1588
1589
void
1590
QPDFWriter::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj)
1591
9.88k
{
1592
9.88k
    qpdf_assert_debug(first_obj > 0);
1593
9.88k
    bool is_first = true;
1594
9.88k
    auto id = std::to_string(first_obj) + ' ';
1595
247k
    for (auto& offset: offsets) {
1596
247k
        if (is_first) {
1597
9.88k
            is_first = false;
1598
237k
        } else {
1599
237k
            writeStringQDF("\n");
1600
237k
            writeStringNoQDF(" ");
1601
237k
        }
1602
247k
        writeString(id);
1603
247k
        util::increment(id, 1);
1604
247k
        writeString(std::to_string(offset));
1605
247k
    }
1606
9.88k
    writeString("\n");
1607
9.88k
}
1608
1609
void
1610
QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1611
4.94k
{
1612
    // Note: object might be null if this is a place-holder for an object stream that we are
1613
    // generating from scratch.
1614
1615
4.94k
    QPDFObjGen old_og = object.getObjGen();
1616
4.94k
    qpdf_assert_debug(old_og.getGen() == 0);
1617
4.94k
    int old_id = old_og.getObj();
1618
4.94k
    int new_stream_id = m->obj[old_og].renumber;
1619
1620
4.94k
    std::vector<qpdf_offset_t> offsets;
1621
4.94k
    qpdf_offset_t first = 0;
1622
1623
    // Generate stream itself.  We have to do this in two passes so we can calculate offsets in the
1624
    // first pass.
1625
4.94k
    std::string stream_buffer_pass1;
1626
4.94k
    std::string stream_buffer_pass2;
1627
4.94k
    int first_obj = -1;
1628
4.94k
    const bool compressed = m->compress_streams && !m->qdf_mode;
1629
4.94k
    {
1630
        // Pass 1
1631
4.94k
        PipelinePopper pp_ostream_pass1(this);
1632
4.94k
        activatePipelineStack(pp_ostream_pass1, stream_buffer_pass1);
1633
1634
4.94k
        int count = -1;
1635
123k
        for (auto const& obj: m->object_stream_to_objects[old_id]) {
1636
123k
            ++count;
1637
123k
            int new_obj = m->obj[obj].renumber;
1638
123k
            if (first_obj == -1) {
1639
4.94k
                first_obj = new_obj;
1640
4.94k
            }
1641
123k
            if (m->qdf_mode) {
1642
0
                writeString(
1643
0
                    "%% Object stream: object " + std::to_string(new_obj) + ", index " +
1644
0
                    std::to_string(count));
1645
0
                if (!m->suppress_original_object_ids) {
1646
0
                    writeString("; original object ID: " + std::to_string(obj.getObj()));
1647
                    // For compatibility, only write the generation if non-zero.  While object
1648
                    // streams only allow objects with generation 0, if we are generating object
1649
                    // streams, the old object could have a non-zero generation.
1650
0
                    if (obj.getGen() != 0) {
1651
0
                        QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
1652
0
                        writeString(" " + std::to_string(obj.getGen()));
1653
0
                    }
1654
0
                }
1655
0
                writeString("\n");
1656
0
            }
1657
1658
123k
            offsets.push_back(m->pipeline->getCount());
1659
            // To avoid double-counting objects being written in object streams for progress
1660
            // reporting, decrement in pass 1.
1661
123k
            indicateProgress(true, false);
1662
1663
123k
            QPDFObjectHandle obj_to_write = m->pdf.getObject(obj);
1664
123k
            if (obj_to_write.isStream()) {
1665
                // This condition occurred in a fuzz input. Ideally we should block it at parse
1666
                // time, but it's not clear to me how to construct a case for this.
1667
0
                obj_to_write.warnIfPossible("stream found inside object stream; treating as null");
1668
0
                obj_to_write = QPDFObjectHandle::newNull();
1669
0
            }
1670
123k
            writeObject(obj_to_write, count);
1671
1672
123k
            m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count);
1673
123k
        }
1674
4.94k
    }
1675
4.94k
    {
1676
4.94k
        PipelinePopper pp_ostream(this);
1677
        // Adjust offsets to skip over comment before first object
1678
4.94k
        first = offsets.at(0);
1679
123k
        for (auto& iter: offsets) {
1680
123k
            iter -= first;
1681
123k
        }
1682
1683
        // Take one pass at writing pairs of numbers so we can get their size information
1684
4.94k
        {
1685
4.94k
            PipelinePopper pp_discard(this);
1686
4.94k
            activatePipelineStack(pp_discard, true);
1687
4.94k
            writeObjectStreamOffsets(offsets, first_obj);
1688
4.94k
            first += m->pipeline->getCount();
1689
4.94k
        }
1690
1691
        // Set up a stream to write the stream data into a buffer.
1692
4.94k
        if (compressed) {
1693
4.94k
            activatePipelineStack(
1694
4.94k
                pp_ostream,
1695
4.94k
                pl::create<Pl_Flate>(
1696
4.94k
                    pl::create<pl::String>(stream_buffer_pass2), Pl_Flate::a_deflate));
1697
4.94k
        } else {
1698
3
            activatePipelineStack(pp_ostream, stream_buffer_pass2);
1699
3
        }
1700
4.94k
        writeObjectStreamOffsets(offsets, first_obj);
1701
4.94k
        writeString(stream_buffer_pass1);
1702
4.94k
        stream_buffer_pass1.clear();
1703
4.94k
        stream_buffer_pass1.shrink_to_fit();
1704
4.94k
    }
1705
1706
    // Write the object
1707
4.94k
    openObject(new_stream_id);
1708
4.94k
    setDataKey(new_stream_id);
1709
4.94k
    writeString("<<");
1710
4.94k
    writeStringQDF("\n ");
1711
4.94k
    writeString(" /Type /ObjStm");
1712
4.94k
    writeStringQDF("\n ");
1713
4.94k
    size_t length = stream_buffer_pass2.size();
1714
4.94k
    adjustAESStreamLength(length);
1715
4.94k
    writeString(" /Length " + std::to_string(length));
1716
4.94k
    writeStringQDF("\n ");
1717
4.94k
    if (compressed) {
1718
4.94k
        writeString(" /Filter /FlateDecode");
1719
4.94k
    }
1720
4.94k
    writeString(" /N " + std::to_string(offsets.size()));
1721
4.94k
    writeStringQDF("\n ");
1722
4.94k
    writeString(" /First " + std::to_string(first));
1723
4.94k
    if (!object.isNull()) {
1724
        // If the original object has an /Extends key, preserve it.
1725
2.12k
        QPDFObjectHandle dict = object.getDict();
1726
2.12k
        QPDFObjectHandle extends = dict.getKey("/Extends");
1727
2.12k
        if (extends.isIndirect()) {
1728
772
            QTC::TC("qpdf", "QPDFWriter copy Extends");
1729
772
            writeStringQDF("\n ");
1730
772
            writeString(" /Extends ");
1731
772
            unparseChild(extends, 1, f_in_ostream);
1732
772
        }
1733
2.12k
    }
1734
4.94k
    writeStringQDF("\n");
1735
4.94k
    writeStringNoQDF(" ");
1736
4.94k
    writeString(">>\nstream\n");
1737
4.94k
    if (m->encryption) {
1738
4.78k
        QTC::TC("qpdf", "QPDFWriter encrypt object stream");
1739
4.78k
    }
1740
4.94k
    {
1741
4.94k
        PipelinePopper pp_enc(this);
1742
4.94k
        pushEncryptionFilter(pp_enc);
1743
4.94k
        writeString(stream_buffer_pass2);
1744
4.94k
    }
1745
4.94k
    if (m->newline_before_endstream) {
1746
0
        writeString("\n");
1747
0
    }
1748
4.94k
    writeString("endstream");
1749
4.94k
    m->cur_data_key.clear();
1750
4.94k
    closeObject(new_stream_id);
1751
4.94k
}
1752
1753
void
1754
QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
1755
369k
{
1756
369k
    QPDFObjGen old_og = object.getObjGen();
1757
1758
369k
    if ((object_stream_index == -1) && (old_og.getGen() == 0) &&
1759
369k
        (m->object_stream_to_objects.count(old_og.getObj()))) {
1760
4.94k
        writeObjectStream(object);
1761
4.94k
        return;
1762
4.94k
    }
1763
1764
365k
    indicateProgress(false, false);
1765
365k
    auto new_id = m->obj[old_og].renumber;
1766
365k
    if (m->qdf_mode) {
1767
0
        if (m->page_object_to_seq.contains(old_og)) {
1768
0
            writeString("%% Page ");
1769
0
            writeString(std::to_string(m->page_object_to_seq[old_og]));
1770
0
            writeString("\n");
1771
0
        }
1772
0
        if (m->contents_to_page_seq.contains(old_og)) {
1773
0
            writeString("%% Contents for page ");
1774
0
            writeString(std::to_string(m->contents_to_page_seq[old_og]));
1775
0
            writeString("\n");
1776
0
        }
1777
0
    }
1778
365k
    if (object_stream_index == -1) {
1779
241k
        if (m->qdf_mode && (!m->suppress_original_object_ids)) {
1780
0
            writeString("%% Original object ID: " + object.getObjGen().unparse(' ') + "\n");
1781
0
        }
1782
241k
        openObject(new_id);
1783
241k
        setDataKey(new_id);
1784
241k
        unparseObject(object, 0, 0);
1785
241k
        m->cur_data_key.clear();
1786
241k
        closeObject(new_id);
1787
241k
    } else {
1788
123k
        unparseObject(object, 0, f_in_ostream);
1789
123k
        writeString("\n");
1790
123k
    }
1791
1792
365k
    if ((!m->direct_stream_lengths) && object.isStream()) {
1793
0
        if (m->qdf_mode) {
1794
0
            if (m->added_newline) {
1795
0
                writeString("%QDF: ignore_newline\n");
1796
0
            }
1797
0
        }
1798
0
        openObject(new_id + 1);
1799
0
        writeString(std::to_string(m->cur_stream_length));
1800
0
        closeObject(new_id + 1);
1801
0
    }
1802
365k
}
1803
1804
std::string
1805
QPDFWriter::getOriginalID1()
1806
21.9k
{
1807
21.9k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1808
21.9k
    if (trailer.hasKey("/ID")) {
1809
3.11k
        return trailer.getKey("/ID").getArrayItem(0).getStringValue();
1810
18.8k
    } else {
1811
18.8k
        return "";
1812
18.8k
    }
1813
21.9k
}
1814
1815
void
1816
QPDFWriter::generateID()
1817
21.5k
{
1818
    // Generate the ID lazily so that we can handle the user's preference to use static or
1819
    // deterministic ID generation.
1820
1821
21.5k
    if (!m->id2.empty()) {
1822
13.4k
        return;
1823
13.4k
    }
1824
1825
8.12k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1826
1827
8.12k
    std::string result;
1828
1829
8.12k
    if (m->static_id) {
1830
        // For test suite use only...
1831
8.12k
        static unsigned char tmp[] = {
1832
8.12k
            0x31,
1833
8.12k
            0x41,
1834
8.12k
            0x59,
1835
8.12k
            0x26,
1836
8.12k
            0x53,
1837
8.12k
            0x58,
1838
8.12k
            0x97,
1839
8.12k
            0x93,
1840
8.12k
            0x23,
1841
8.12k
            0x84,
1842
8.12k
            0x62,
1843
8.12k
            0x64,
1844
8.12k
            0x33,
1845
8.12k
            0x83,
1846
8.12k
            0x27,
1847
8.12k
            0x95,
1848
8.12k
            0x00};
1849
8.12k
        result = reinterpret_cast<char*>(tmp);
1850
8.12k
    } else {
1851
        // The PDF specification has guidelines for creating IDs, but it states clearly that the
1852
        // only thing that's really important is that it is very likely to be unique.  We can't
1853
        // really follow the guidelines in the spec exactly because we haven't written the file yet.
1854
        // This scheme should be fine though.  The deterministic ID case uses a digest of a
1855
        // sufficient portion of the file's contents such no two non-matching files would match in
1856
        // the subsets used for this computation.  Note that we explicitly omit the filename from
1857
        // the digest calculation for deterministic ID so that the same file converted with qpdf, in
1858
        // that case, would have the same ID regardless of the output file's name.
1859
1860
0
        std::string seed;
1861
0
        if (m->deterministic_id) {
1862
0
            if (m->deterministic_id_data.empty()) {
1863
0
                QTC::TC("qpdf", "QPDFWriter deterministic with no data");
1864
0
                throw std::runtime_error(
1865
0
                    "INTERNAL ERROR: QPDFWriter::generateID has no data for "
1866
0
                    "deterministic ID.  This may happen if deterministic ID "
1867
0
                    "and file encryption are requested together.");
1868
0
            }
1869
0
            seed += m->deterministic_id_data;
1870
0
        } else {
1871
0
            seed += std::to_string(QUtil::get_current_time());
1872
0
            seed += m->filename;
1873
0
            seed += " ";
1874
0
        }
1875
0
        seed += " QPDF ";
1876
0
        if (trailer.hasKey("/Info")) {
1877
0
            for (auto const& item: trailer.getKey("/Info").as_dictionary()) {
1878
0
                if (item.second.isString()) {
1879
0
                    seed += " ";
1880
0
                    seed += item.second.getStringValue();
1881
0
                }
1882
0
            }
1883
0
        }
1884
1885
0
        MD5 m;
1886
0
        m.encodeString(seed.c_str());
1887
0
        MD5::Digest digest;
1888
0
        m.digest(digest);
1889
0
        result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest));
1890
0
    }
1891
1892
    // If /ID already exists, follow the spec: use the original first word and generate a new second
1893
    // word.  Otherwise, we'll use the generated ID for both.
1894
1895
8.12k
    m->id2 = result;
1896
    // Note: keep /ID from old file even if --static-id was given.
1897
8.12k
    m->id1 = getOriginalID1();
1898
8.12k
    if (m->id1.empty()) {
1899
6.88k
        m->id1 = m->id2;
1900
6.88k
    }
1901
8.12k
}
1902
1903
void
1904
QPDFWriter::initializeSpecialStreams()
1905
8.10k
{
1906
    // Mark all page content streams in case we are filtering or normalizing.
1907
8.10k
    std::vector<QPDFObjectHandle> pages = m->pdf.getAllPages();
1908
8.10k
    int num = 0;
1909
12.7k
    for (auto& page: pages) {
1910
12.7k
        m->page_object_to_seq[page.getObjGen()] = ++num;
1911
12.7k
        QPDFObjectHandle contents = page.getKey("/Contents");
1912
12.7k
        std::vector<QPDFObjGen> contents_objects;
1913
12.7k
        if (contents.isArray()) {
1914
540
            int n = contents.getArrayNItems();
1915
5.12k
            for (int i = 0; i < n; ++i) {
1916
4.58k
                contents_objects.push_back(contents.getArrayItem(i).getObjGen());
1917
4.58k
            }
1918
12.2k
        } else if (contents.isStream()) {
1919
3.01k
            contents_objects.push_back(contents.getObjGen());
1920
3.01k
        }
1921
1922
12.7k
        for (auto const& c: contents_objects) {
1923
7.59k
            m->contents_to_page_seq[c] = num;
1924
7.59k
            m->normalized_streams.insert(c);
1925
7.59k
        }
1926
12.7k
    }
1927
8.10k
}
1928
1929
void
1930
QPDFWriter::preserveObjectStreams()
1931
8.09k
{
1932
8.09k
    auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
1933
    // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
1934
    // streams out of old objects that have generation numbers greater than zero. However in an
1935
    // existing PDF, all object stream objects and all objects in them must have generation 0
1936
    // because the PDF spec does not provide any way to do otherwise. This code filters out objects
1937
    // that are not allowed to be in object streams. In addition to removing objects that were
1938
    // erroneously included in object streams in the source PDF, it also prevents unreferenced
1939
    // objects from being included.
1940
8.09k
    auto end = xref.cend();
1941
8.09k
    m->obj.streams_empty = true;
1942
8.09k
    if (m->preserve_unreferenced_objects) {
1943
0
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
1944
0
            if (iter->second.getType() == 2) {
1945
                // Pdf contains object streams.
1946
0
                QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
1947
0
                m->obj.streams_empty = false;
1948
0
                m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
1949
0
            }
1950
0
        }
1951
8.09k
    } else {
1952
        // Start by scanning for first compressed object in case we don't have any object streams to
1953
        // process.
1954
102k
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
1955
95.3k
            if (iter->second.getType() == 2) {
1956
                // Pdf contains object streams.
1957
1.47k
                QTC::TC("qpdf", "QPDFWriter preserve object streams");
1958
1.47k
                m->obj.streams_empty = false;
1959
1.47k
                auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
1960
                // The object pointed to by iter may be a previous generation, in which case it is
1961
                // removed by getCompressibleObjSet. We need to restart the loop (while the object
1962
                // table may contain multiple generations of an object).
1963
458k
                for (iter = xref.cbegin(); iter != end; ++iter) {
1964
456k
                    if (iter->second.getType() == 2) {
1965
423k
                        auto id = static_cast<size_t>(iter->first.getObj());
1966
423k
                        if (id < eligible.size() && eligible[id]) {
1967
80.2k
                            m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
1968
343k
                        } else {
1969
343k
                            QTC::TC("qpdf", "QPDFWriter exclude from object stream");
1970
343k
                        }
1971
423k
                    }
1972
456k
                }
1973
1.47k
                return;
1974
1.47k
            }
1975
95.3k
        }
1976
8.09k
    }
1977
8.09k
}
1978
1979
void
1980
QPDFWriter::generateObjectStreams()
1981
0
{
1982
    // Basic strategy: make a list of objects that can go into an object stream.  Then figure out
1983
    // how many object streams are needed so that we can distribute objects approximately evenly
1984
    // without having any object stream exceed 100 members.  We don't have to worry about linearized
1985
    // files here -- if the file is linearized, we take care of excluding things that aren't allowed
1986
    // here later.
1987
1988
    // This code doesn't do anything with /Extends.
1989
1990
0
    std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf);
1991
0
    size_t n_object_streams = (eligible.size() + 99U) / 100U;
1992
1993
0
    initializeTables(2U * n_object_streams);
1994
0
    if (n_object_streams == 0) {
1995
0
        m->obj.streams_empty = true;
1996
0
        return;
1997
0
    }
1998
0
    size_t n_per = eligible.size() / n_object_streams;
1999
0
    if (n_per * n_object_streams < eligible.size()) {
2000
0
        ++n_per;
2001
0
    }
2002
0
    unsigned int n = 0;
2003
0
    int cur_ostream = m->pdf.newIndirectNull().getObjectID();
2004
0
    for (auto const& item: eligible) {
2005
0
        if (n == n_per) {
2006
0
            QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
2007
0
            n = 0;
2008
            // Construct a new null object as the "original" object stream.  The rest of the code
2009
            // knows that this means we're creating the object stream from scratch.
2010
0
            cur_ostream = m->pdf.newIndirectNull().getObjectID();
2011
0
        }
2012
0
        auto& obj = m->obj[item];
2013
0
        obj.object_stream = cur_ostream;
2014
0
        obj.gen = item.getGen();
2015
0
        ++n;
2016
0
    }
2017
0
}
2018
2019
QPDFObjectHandle
2020
QPDFWriter::getTrimmedTrailer()
2021
27.2k
{
2022
    // Remove keys from the trailer that necessarily have to be replaced when writing the file.
2023
2024
27.2k
    QPDFObjectHandle trailer = m->pdf.getTrailer().unsafeShallowCopy();
2025
2026
    // Remove encryption keys
2027
27.2k
    trailer.removeKey("/ID");
2028
27.2k
    trailer.removeKey("/Encrypt");
2029
2030
    // Remove modification information
2031
27.2k
    trailer.removeKey("/Prev");
2032
2033
    // Remove all trailer keys that potentially come from a cross-reference stream
2034
27.2k
    trailer.removeKey("/Index");
2035
27.2k
    trailer.removeKey("/W");
2036
27.2k
    trailer.removeKey("/Length");
2037
27.2k
    trailer.removeKey("/Filter");
2038
27.2k
    trailer.removeKey("/DecodeParms");
2039
27.2k
    trailer.removeKey("/Type");
2040
27.2k
    trailer.removeKey("/XRefStm");
2041
2042
27.2k
    return trailer;
2043
27.2k
}
2044
2045
// Make document extension level information direct as required by the spec.
2046
void
2047
QPDFWriter::prepareFileForWrite()
2048
8.03k
{
2049
8.03k
    m->pdf.fixDanglingReferences();
2050
8.03k
    auto root = m->pdf.getRoot();
2051
8.03k
    auto oh = root.getKey("/Extensions");
2052
8.03k
    if (oh.isDictionary()) {
2053
246
        const bool extensions_indirect = oh.isIndirect();
2054
246
        if (extensions_indirect) {
2055
88
            QTC::TC("qpdf", "QPDFWriter make Extensions direct");
2056
88
            oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy());
2057
88
        }
2058
246
        if (oh.hasKey("/ADBE")) {
2059
136
            auto adbe = oh.getKey("/ADBE");
2060
136
            if (adbe.isIndirect()) {
2061
76
                QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1);
2062
76
                adbe.makeDirect();
2063
76
                oh.replaceKey("/ADBE", adbe);
2064
76
            }
2065
136
        }
2066
246
    }
2067
8.03k
}
2068
2069
void
2070
QPDFWriter::initializeTables(size_t extra)
2071
8.09k
{
2072
8.09k
    auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra;
2073
8.09k
    m->obj.resize(size);
2074
8.09k
    m->new_obj.resize(size);
2075
8.09k
}
2076
2077
void
2078
QPDFWriter::doWriteSetup()
2079
8.10k
{
2080
8.10k
    if (m->did_write_setup) {
2081
0
        return;
2082
0
    }
2083
8.10k
    m->did_write_setup = true;
2084
2085
    // Do preliminary setup
2086
2087
8.10k
    if (m->linearized) {
2088
8.10k
        m->qdf_mode = false;
2089
8.10k
    }
2090
2091
8.10k
    if (m->pclm) {
2092
0
        m->stream_decode_level = qpdf_dl_none;
2093
0
        m->compress_streams = false;
2094
0
        m->encryption = nullptr;
2095
0
    }
2096
2097
8.10k
    if (m->qdf_mode) {
2098
0
        if (!m->normalize_content_set) {
2099
0
            m->normalize_content = true;
2100
0
        }
2101
0
        if (!m->compress_streams_set) {
2102
0
            m->compress_streams = false;
2103
0
        }
2104
0
        if (!m->stream_decode_level_set) {
2105
0
            m->stream_decode_level = qpdf_dl_generalized;
2106
0
        }
2107
0
    }
2108
2109
8.10k
    if (m->encryption) {
2110
        // Encryption has been explicitly set
2111
8.10k
        m->preserve_encryption = false;
2112
8.10k
    } else if (m->normalize_content || !m->compress_streams || m->pclm || m->qdf_mode) {
2113
        // Encryption makes looking at contents pretty useless.  If the user explicitly encrypted
2114
        // though, we still obey that.
2115
0
        m->preserve_encryption = false;
2116
0
    }
2117
2118
8.10k
    if (m->preserve_encryption) {
2119
0
        copyEncryptionParameters(m->pdf);
2120
0
    }
2121
2122
8.10k
    if (!m->forced_pdf_version.empty()) {
2123
0
        int major = 0;
2124
0
        int minor = 0;
2125
0
        parseVersion(m->forced_pdf_version, major, minor);
2126
0
        disableIncompatibleEncryption(major, minor, m->forced_extension_level);
2127
0
        if (compareVersions(major, minor, 1, 5) < 0) {
2128
0
            QTC::TC("qpdf", "QPDFWriter forcing object stream disable");
2129
0
            m->object_stream_mode = qpdf_o_disable;
2130
0
        }
2131
0
    }
2132
2133
8.10k
    if (m->qdf_mode || m->normalize_content || m->stream_decode_level) {
2134
8.10k
        initializeSpecialStreams();
2135
8.10k
    }
2136
2137
8.10k
    if (m->qdf_mode) {
2138
        // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing
2139
        // recomputed stream length data. Certain streams such as object streams, xref streams, and
2140
        // hint streams always get direct stream lengths.
2141
0
        m->direct_stream_lengths = false;
2142
0
    }
2143
2144
8.10k
    switch (m->object_stream_mode) {
2145
0
    case qpdf_o_disable:
2146
0
        initializeTables();
2147
0
        m->obj.streams_empty = true;
2148
0
        break;
2149
2150
8.09k
    case qpdf_o_preserve:
2151
8.09k
        initializeTables();
2152
8.09k
        preserveObjectStreams();
2153
8.09k
        break;
2154
2155
0
    case qpdf_o_generate:
2156
0
        generateObjectStreams();
2157
0
        break;
2158
2159
        // no default so gcc will warn for missing case tag
2160
8.10k
    }
2161
2162
8.07k
    if (!m->obj.streams_empty) {
2163
1.45k
        if (m->linearized) {
2164
            // Page dictionaries are not allowed to be compressed objects.
2165
2.38k
            for (auto& page: m->pdf.getAllPages()) {
2166
2.38k
                if (m->obj[page].object_stream > 0) {
2167
187
                    QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
2168
187
                    m->obj[page].object_stream = 0;
2169
187
                }
2170
2.38k
            }
2171
1.45k
        }
2172
2173
1.45k
        if (m->linearized || m->encryption) {
2174
            // The document catalog is not allowed to be compressed in linearized files either.  It
2175
            // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
2176
            // handle encrypted files with compressed document catalogs, so we disable them in that
2177
            // case as well.
2178
1.45k
            if (m->obj[m->root_og].object_stream > 0) {
2179
2
                QTC::TC("qpdf", "QPDFWriter uncompressing root");
2180
2
                m->obj[m->root_og].object_stream = 0;
2181
2
            }
2182
1.45k
        }
2183
2184
        // Generate reverse mapping from object stream to objects
2185
1.87M
        m->obj.forEach([this](auto id, auto const& item) -> void {
2186
1.87M
            if (item.object_stream > 0) {
2187
79.6k
                auto& vec = m->object_stream_to_objects[item.object_stream];
2188
79.6k
                vec.emplace_back(id, item.gen);
2189
79.6k
                if (m->max_ostream_index < vec.size()) {
2190
22.3k
                    ++m->max_ostream_index;
2191
22.3k
                }
2192
79.6k
            }
2193
1.87M
        });
2194
1.45k
        --m->max_ostream_index;
2195
2196
1.45k
        if (m->object_stream_to_objects.empty()) {
2197
436
            m->obj.streams_empty = true;
2198
1.02k
        } else {
2199
1.02k
            setMinimumPDFVersion("1.5");
2200
1.02k
        }
2201
1.45k
    }
2202
2203
8.07k
    setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel());
2204
8.07k
    m->final_pdf_version = m->min_pdf_version;
2205
8.07k
    m->final_extension_level = m->min_extension_level;
2206
8.07k
    if (!m->forced_pdf_version.empty()) {
2207
0
        QTC::TC("qpdf", "QPDFWriter using forced PDF version");
2208
0
        m->final_pdf_version = m->forced_pdf_version;
2209
0
        m->final_extension_level = m->forced_extension_level;
2210
0
    }
2211
8.07k
}
2212
2213
void
2214
QPDFWriter::write()
2215
8.10k
{
2216
8.10k
    doWriteSetup();
2217
2218
    // Set up progress reporting. For linearized files, we write two passes. events_expected is an
2219
    // approximation, but it's good enough for progress reporting, which is mostly a guess anyway.
2220
8.10k
    m->events_expected = QIntC::to_int(m->pdf.getObjectCount() * (m->linearized ? 2 : 1));
2221
2222
8.10k
    prepareFileForWrite();
2223
2224
8.10k
    if (m->linearized) {
2225
8.00k
        writeLinearized();
2226
8.00k
    } else {
2227
108
        writeStandard();
2228
108
    }
2229
2230
8.10k
    m->pipeline->finish();
2231
8.10k
    if (m->close_file) {
2232
0
        fclose(m->file);
2233
0
    }
2234
8.10k
    m->file = nullptr;
2235
8.10k
    if (m->buffer_pipeline) {
2236
0
        m->output_buffer = m->buffer_pipeline->getBuffer();
2237
0
        m->buffer_pipeline = nullptr;
2238
0
    }
2239
8.10k
    indicateProgress(false, true);
2240
8.10k
}
2241
2242
QPDFObjGen
2243
QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
2244
0
{
2245
0
    return {m->obj[og].renumber, 0};
2246
0
}
2247
2248
std::map<QPDFObjGen, QPDFXRefEntry>
2249
QPDFWriter::getWrittenXRefTable()
2250
0
{
2251
0
    std::map<QPDFObjGen, QPDFXRefEntry> result;
2252
2253
0
    auto it = result.begin();
2254
0
    m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void {
2255
0
        if (item.xref.getType() != 0) {
2256
0
            it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref);
2257
0
        }
2258
0
    });
2259
0
    return result;
2260
0
}
2261
2262
void
2263
QPDFWriter::enqueuePart(std::vector<QPDFObjectHandle>& part)
2264
36.8k
{
2265
143k
    for (auto const& oh: part) {
2266
143k
        enqueueObject(oh);
2267
143k
    }
2268
36.8k
}
2269
2270
void
2271
QPDFWriter::writeEncryptionDictionary()
2272
13.8k
{
2273
13.8k
    m->encryption_dict_objid = openObject(m->encryption_dict_objid);
2274
13.8k
    auto& enc = *m->encryption;
2275
13.8k
    auto const V = enc.getV();
2276
2277
13.8k
    writeString("<<");
2278
13.8k
    if (V >= 4) {
2279
13.8k
        writeString(" /CF << /StdCF << /AuthEvent /DocOpen /CFM ");
2280
13.8k
        writeString(m->encrypt_use_aes ? ((V < 5) ? "/AESV2" : "/AESV3") : "/V2");
2281
        // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of
2282
        // MacOS won't open encrypted files without it.
2283
13.8k
        writeString((V < 5) ? " /Length 16 >> >>" : " /Length 32 >> >>");
2284
13.8k
        if (!m->encryption->getEncryptMetadata()) {
2285
0
            writeString(" /EncryptMetadata false");
2286
0
        }
2287
13.8k
    }
2288
13.8k
    writeString(" /Filter /Standard /Length ");
2289
13.8k
    writeString(std::to_string(enc.getLengthBytes() * 8));
2290
13.8k
    writeString(" /O ");
2291
13.8k
    writeString(QPDF_String(enc.getO()).unparse(true));
2292
13.8k
    if (V >= 4) {
2293
13.8k
        writeString(" /OE ");
2294
13.8k
        writeString(QPDF_String(enc.getOE()).unparse(true));
2295
13.8k
    }
2296
13.8k
    writeString(" /P ");
2297
13.8k
    writeString(std::to_string(enc.getP()));
2298
13.8k
    if (V >= 5) {
2299
13.8k
        writeString(" /Perms ");
2300
13.8k
        writeString(QPDF_String(enc.getPerms()).unparse(true));
2301
13.8k
    }
2302
13.8k
    writeString(" /R ");
2303
13.8k
    writeString(std::to_string(enc.getR()));
2304
2305
13.8k
    if (V >= 4) {
2306
13.8k
        writeString(" /StmF /StdCF /StrF /StdCF");
2307
13.8k
    }
2308
13.8k
    writeString(" /U ");
2309
13.8k
    writeString(QPDF_String(enc.getU()).unparse(true));
2310
13.8k
    if (V >= 4) {
2311
13.8k
        writeString(" /UE ");
2312
13.8k
        writeString(QPDF_String(enc.getUE()).unparse(true));
2313
13.8k
    }
2314
13.8k
    writeString(" /V ");
2315
13.8k
    writeString(std::to_string(enc.getV()));
2316
13.8k
    writeString(" >>");
2317
13.8k
    closeObject(m->encryption_dict_objid);
2318
13.8k
}
2319
2320
std::string
2321
QPDFWriter::getFinalVersion()
2322
0
{
2323
0
    doWriteSetup();
2324
0
    return m->final_pdf_version;
2325
0
}
2326
2327
void
2328
QPDFWriter::writeHeader()
2329
13.8k
{
2330
13.8k
    writeString("%PDF-");
2331
13.8k
    writeString(m->final_pdf_version);
2332
13.8k
    if (m->pclm) {
2333
        // PCLm version
2334
0
        writeString("\n%PCLm 1.0\n");
2335
13.8k
    } else {
2336
        // This string of binary characters would not be valid UTF-8, so it really should be treated
2337
        // as binary.
2338
13.8k
        writeString("\n%\xbf\xf7\xa2\xfe\n");
2339
13.8k
    }
2340
13.8k
    writeStringQDF("%QDF-1.0\n\n");
2341
2342
    // Note: do not write extra header text here.  Linearized PDFs must include the entire
2343
    // linearization parameter dictionary within the first 1024 characters of the PDF file, so for
2344
    // linearized files, we have to write extra header text after the linearization parameter
2345
    // dictionary.
2346
13.8k
}
2347
2348
void
2349
QPDFWriter::writeHintStream(int hint_id)
2350
6.71k
{
2351
6.71k
    std::string hint_buffer;
2352
6.71k
    int S = 0;
2353
6.71k
    int O = 0;
2354
6.71k
    bool compressed = (m->compress_streams && !m->qdf_mode);
2355
6.71k
    QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed);
2356
2357
6.71k
    openObject(hint_id);
2358
6.71k
    setDataKey(hint_id);
2359
2360
6.71k
    size_t hlen = hint_buffer.size();
2361
2362
6.71k
    writeString("<< ");
2363
6.71k
    if (compressed) {
2364
6.71k
        writeString("/Filter /FlateDecode ");
2365
6.71k
    }
2366
6.71k
    writeString("/S ");
2367
6.71k
    writeString(std::to_string(S));
2368
6.71k
    if (O) {
2369
321
        writeString(" /O ");
2370
321
        writeString(std::to_string(O));
2371
321
    }
2372
6.71k
    writeString(" /Length ");
2373
6.71k
    adjustAESStreamLength(hlen);
2374
6.71k
    writeString(std::to_string(hlen));
2375
6.71k
    writeString(" >>\nstream\n");
2376
2377
6.71k
    if (m->encryption) {
2378
6.71k
        QTC::TC("qpdf", "QPDFWriter encrypted hint stream");
2379
6.71k
    }
2380
6.71k
    char last_char = hint_buffer.empty() ? '\0' : hint_buffer.back();
2381
6.71k
    {
2382
6.71k
        PipelinePopper pp_enc(this);
2383
6.71k
        pushEncryptionFilter(pp_enc);
2384
6.71k
        writeString(hint_buffer);
2385
6.71k
    }
2386
2387
6.71k
    if (last_char != '\n') {
2388
6.68k
        writeString("\n");
2389
6.68k
    }
2390
6.71k
    writeString("endstream");
2391
6.71k
    closeObject(hint_id);
2392
6.71k
}
2393
2394
qpdf_offset_t
2395
QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size)
2396
0
{
2397
    // There are too many extra arguments to replace overloaded function with defaults in the header
2398
    // file...too much risk of leaving something off.
2399
0
    return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0);
2400
0
}
2401
2402
qpdf_offset_t
2403
QPDFWriter::writeXRefTable(
2404
    trailer_e which,
2405
    int first,
2406
    int last,
2407
    int size,
2408
    qpdf_offset_t prev,
2409
    bool suppress_offsets,
2410
    int hint_id,
2411
    qpdf_offset_t hint_offset,
2412
    qpdf_offset_t hint_length,
2413
    int linearization_pass)
2414
24.8k
{
2415
24.8k
    writeString("xref\n");
2416
24.8k
    writeString(std::to_string(first));
2417
24.8k
    writeString(" ");
2418
24.8k
    writeString(std::to_string(last - first + 1));
2419
24.8k
    qpdf_offset_t space_before_zero = m->pipeline->getCount();
2420
24.8k
    writeString("\n");
2421
238k
    for (int i = first; i <= last; ++i) {
2422
213k
        if (i == 0) {
2423
12.2k
            writeString("0000000000 65535 f \n");
2424
201k
        } else {
2425
201k
            qpdf_offset_t offset = 0;
2426
201k
            if (!suppress_offsets) {
2427
127k
                offset = m->new_obj[i].xref.getOffset();
2428
127k
                if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2429
39.3k
                    offset += hint_length;
2430
39.3k
                }
2431
127k
            }
2432
201k
            writeString(QUtil::int_to_string(offset, 10));
2433
201k
            writeString(" 00000 n \n");
2434
201k
        }
2435
213k
    }
2436
24.8k
    writeTrailer(which, size, false, prev, linearization_pass);
2437
24.8k
    writeString("\n");
2438
24.8k
    return space_before_zero;
2439
24.8k
}
2440
2441
qpdf_offset_t
2442
QPDFWriter::writeXRefStream(
2443
    int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size)
2444
0
{
2445
    // There are too many extra arguments to replace overloaded function with defaults in the header
2446
    // file...too much risk of leaving something off.
2447
0
    return writeXRefStream(
2448
0
        objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0);
2449
0
}
2450
2451
qpdf_offset_t
2452
QPDFWriter::writeXRefStream(
2453
    int xref_id,
2454
    int max_id,
2455
    qpdf_offset_t max_offset,
2456
    trailer_e which,
2457
    int first,
2458
    int last,
2459
    int size,
2460
    qpdf_offset_t prev,
2461
    int hint_id,
2462
    qpdf_offset_t hint_offset,
2463
    qpdf_offset_t hint_length,
2464
    bool skip_compression,
2465
    int linearization_pass)
2466
2.44k
{
2467
2.44k
    qpdf_offset_t xref_offset = m->pipeline->getCount();
2468
2.44k
    qpdf_offset_t space_before_zero = xref_offset - 1;
2469
2470
    // field 1 contains offsets and object stream identifiers
2471
2.44k
    unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id));
2472
2473
    // field 2 contains object stream indices
2474
2.44k
    unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index));
2475
2476
2.44k
    unsigned int esize = 1 + f1_size + f2_size;
2477
2478
    // Must store in xref table in advance of writing the actual data rather than waiting for
2479
    // openObject to do it.
2480
2.44k
    m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2481
2482
2.44k
    std::string xref_data;
2483
2.44k
    const bool compressed = m->compress_streams && !m->qdf_mode;
2484
2.44k
    {
2485
2.44k
        PipelinePopper pp_xref(this);
2486
2.44k
        if (compressed) {
2487
2.44k
            m->count_buffer.clear();
2488
2.44k
            auto link = pl::create<pl::String>(xref_data);
2489
2.44k
            if (!skip_compression) {
2490
                // Write the stream dictionary for compression but don't actually compress.  This
2491
                // helps us with computation of padding for pass 1 of linearization.
2492
1.13k
                link = pl::create<Pl_Flate>(std::move(link), Pl_Flate::a_deflate);
2493
1.13k
            }
2494
2.44k
            activatePipelineStack(
2495
2.44k
                pp_xref, pl::create<Pl_PNGFilter>(std::move(link), Pl_PNGFilter::a_encode, esize));
2496
2.44k
        } else {
2497
0
            activatePipelineStack(pp_xref, xref_data);
2498
0
        }
2499
2500
216k
        for (int i = first; i <= last; ++i) {
2501
213k
            QPDFXRefEntry& e = m->new_obj[i].xref;
2502
213k
            switch (e.getType()) {
2503
46.3k
            case 0:
2504
46.3k
                writeBinary(0, 1);
2505
46.3k
                writeBinary(0, f1_size);
2506
46.3k
                writeBinary(0, f2_size);
2507
46.3k
                break;
2508
2509
57.8k
            case 1:
2510
57.8k
                {
2511
57.8k
                    qpdf_offset_t offset = e.getOffset();
2512
57.8k
                    if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2513
16.0k
                        offset += hint_length;
2514
16.0k
                    }
2515
57.8k
                    writeBinary(1, 1);
2516
57.8k
                    writeBinary(QIntC::to_ulonglong(offset), f1_size);
2517
57.8k
                    writeBinary(0, f2_size);
2518
57.8k
                }
2519
57.8k
                break;
2520
2521
109k
            case 2:
2522
109k
                writeBinary(2, 1);
2523
109k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size);
2524
109k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size);
2525
109k
                break;
2526
2527
0
            default:
2528
0
                throw std::logic_error("invalid type writing xref stream");
2529
0
                break;
2530
213k
            }
2531
213k
        }
2532
2.44k
    }
2533
2534
2.44k
    openObject(xref_id);
2535
2.44k
    writeString("<<");
2536
2.44k
    writeStringQDF("\n ");
2537
2.44k
    writeString(" /Type /XRef");
2538
2.44k
    writeStringQDF("\n ");
2539
2.44k
    writeString(" /Length " + std::to_string(xref_data.size()));
2540
2.44k
    if (compressed) {
2541
2.44k
        writeStringQDF("\n ");
2542
2.44k
        writeString(" /Filter /FlateDecode");
2543
2.44k
        writeStringQDF("\n ");
2544
2.44k
        writeString(" /DecodeParms << /Columns " + std::to_string(esize) + " /Predictor 12 >>");
2545
2.44k
    }
2546
2.44k
    writeStringQDF("\n ");
2547
2.44k
    writeString(" /W [ 1 " + std::to_string(f1_size) + " " + std::to_string(f2_size) + " ]");
2548
2.44k
    if (!((first == 0) && (last == size - 1))) {
2549
1.30k
        writeString(
2550
1.30k
            " /Index [ " + std::to_string(first) + " " + std::to_string(last - first + 1) + " ]");
2551
1.30k
    }
2552
2.44k
    writeTrailer(which, size, true, prev, linearization_pass);
2553
2.44k
    writeString("\nstream\n");
2554
2.44k
    writeString(xref_data);
2555
2.44k
    writeString("\nendstream");
2556
2.44k
    closeObject(xref_id);
2557
2.44k
    return space_before_zero;
2558
2.44k
}
2559
2560
size_t
2561
QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
2562
1.30k
{
2563
    // This routine is called right after a linearization first pass xref stream has been written
2564
    // without compression.  Calculate the amount of padding that would be required in the worst
2565
    // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is
2566
    // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add
2567
    // 10 extra bytes for number length increases.
2568
2569
1.30k
    return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384)));
2570
1.30k
}
2571
2572
void
2573
QPDFWriter::writeLinearized()
2574
8.00k
{
2575
    // Optimize file and enqueue objects in order
2576
2577
8.00k
    std::map<int, int> stream_cache;
2578
2579
41.9k
    auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) {
2580
41.9k
        auto& result = stream_cache[stream.getObjectID()];
2581
41.9k
        if (result == 0) {
2582
23.2k
            bool compress_stream;
2583
23.2k
            bool is_metadata;
2584
23.2k
            if (willFilterStream(stream, compress_stream, is_metadata, nullptr)) {
2585
10.9k
                result = 2;
2586
12.2k
            } else {
2587
12.2k
                result = 1;
2588
12.2k
            }
2589
23.2k
        }
2590
41.9k
        return result;
2591
41.9k
    };
2592
2593
8.00k
    QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters);
2594
2595
8.00k
    std::vector<QPDFObjectHandle> part4;
2596
8.00k
    std::vector<QPDFObjectHandle> part6;
2597
8.00k
    std::vector<QPDFObjectHandle> part7;
2598
8.00k
    std::vector<QPDFObjectHandle> part8;
2599
8.00k
    std::vector<QPDFObjectHandle> part9;
2600
8.00k
    QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9);
2601
2602
    // Object number sequence:
2603
    //
2604
    //  second half
2605
    //    second half uncompressed objects
2606
    //    second half xref stream, if any
2607
    //    second half compressed objects
2608
    //  first half
2609
    //    linearization dictionary
2610
    //    first half xref stream, if any
2611
    //    part 4 uncompresesd objects
2612
    //    encryption dictionary, if any
2613
    //    hint stream
2614
    //    part 6 uncompressed objects
2615
    //    first half compressed objects
2616
    //
2617
2618
    // Second half objects
2619
8.00k
    int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size());
2620
8.00k
    int second_half_first_obj = 1;
2621
8.00k
    int after_second_half = 1 + second_half_uncompressed;
2622
8.00k
    m->next_objid = after_second_half;
2623
8.00k
    int second_half_xref = 0;
2624
8.00k
    bool need_xref_stream = !m->obj.streams_empty;
2625
8.00k
    if (need_xref_stream) {
2626
834
        second_half_xref = m->next_objid++;
2627
834
    }
2628
    // Assign numbers to all compressed objects in the second half.
2629
8.00k
    std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9};
2630
30.5k
    for (int i = 0; i < 3; ++i) {
2631
51.4k
        for (auto const& oh: *vecs2[i]) {
2632
51.4k
            assignCompressedObjectNumbers(oh.getObjGen());
2633
51.4k
        }
2634
22.5k
    }
2635
8.00k
    int second_half_end = m->next_objid - 1;
2636
8.00k
    int second_trailer_size = m->next_objid;
2637
2638
    // First half objects
2639
8.00k
    int first_half_start = m->next_objid;
2640
8.00k
    int lindict_id = m->next_objid++;
2641
8.00k
    int first_half_xref = 0;
2642
8.00k
    if (need_xref_stream) {
2643
834
        first_half_xref = m->next_objid++;
2644
834
    }
2645
8.00k
    int part4_first_obj = m->next_objid;
2646
8.00k
    m->next_objid += QIntC::to_int(part4.size());
2647
8.00k
    int after_part4 = m->next_objid;
2648
8.00k
    if (m->encryption) {
2649
7.50k
        m->encryption_dict_objid = m->next_objid++;
2650
7.50k
    }
2651
8.00k
    int hint_id = m->next_objid++;
2652
8.00k
    int part6_first_obj = m->next_objid;
2653
8.00k
    m->next_objid += QIntC::to_int(part6.size());
2654
8.00k
    int after_part6 = m->next_objid;
2655
    // Assign numbers to all compressed objects in the first half
2656
8.00k
    std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6};
2657
23.0k
    for (int i = 0; i < 2; ++i) {
2658
92.3k
        for (auto const& oh: *vecs1[i]) {
2659
92.3k
            assignCompressedObjectNumbers(oh.getObjGen());
2660
92.3k
        }
2661
15.0k
    }
2662
8.00k
    int first_half_end = m->next_objid - 1;
2663
8.00k
    int first_trailer_size = m->next_objid;
2664
2665
8.00k
    int part4_end_marker = part4.back().getObjectID();
2666
8.00k
    int part6_end_marker = part6.back().getObjectID();
2667
8.00k
    qpdf_offset_t space_before_zero = 0;
2668
8.00k
    qpdf_offset_t file_size = 0;
2669
8.00k
    qpdf_offset_t part6_end_offset = 0;
2670
8.00k
    qpdf_offset_t first_half_max_obj_offset = 0;
2671
8.00k
    qpdf_offset_t second_xref_offset = 0;
2672
8.00k
    qpdf_offset_t first_xref_end = 0;
2673
8.00k
    qpdf_offset_t second_xref_end = 0;
2674
2675
8.00k
    m->next_objid = part4_first_obj;
2676
8.00k
    enqueuePart(part4);
2677
8.00k
    if (m->next_objid != after_part4) {
2678
        // This can happen with very botched files as in the fuzzer test. There are likely some
2679
        // faulty assumptions in calculateLinearizationData
2680
3
        throw std::runtime_error("error encountered after writing part 4 of linearized data");
2681
3
    }
2682
7.99k
    m->next_objid = part6_first_obj;
2683
7.99k
    enqueuePart(part6);
2684
7.99k
    if (m->next_objid != after_part6) {
2685
208
        throw std::runtime_error("error encountered after writing part 6 of linearized data");
2686
208
    }
2687
7.79k
    m->next_objid = second_half_first_obj;
2688
7.79k
    enqueuePart(part7);
2689
7.79k
    enqueuePart(part8);
2690
7.79k
    enqueuePart(part9);
2691
7.79k
    if (m->next_objid != after_second_half) {
2692
127
        throw std::runtime_error("error encountered after writing part 9 of linearized data");
2693
127
    }
2694
2695
7.66k
    qpdf_offset_t hint_length = 0;
2696
7.66k
    std::string hint_buffer;
2697
2698
    // Write file in two passes.  Part numbers refer to PDF spec 1.4.
2699
2700
7.66k
    FILE* lin_pass1_file = nullptr;
2701
7.66k
    auto pp_pass1 = std::make_unique<PipelinePopper>(this);
2702
7.66k
    auto pp_md5 = std::make_unique<PipelinePopper>(this);
2703
13.8k
    for (int pass: {1, 2}) {
2704
13.8k
        if (pass == 1) {
2705
7.15k
            if (!m->lin_pass1_filename.empty()) {
2706
0
                lin_pass1_file = QUtil::safe_fopen(m->lin_pass1_filename.c_str(), "wb");
2707
0
                pushPipeline(new Pl_StdioFile("linearization pass1", lin_pass1_file));
2708
0
                activatePipelineStack(*pp_pass1);
2709
7.15k
            } else {
2710
7.15k
                activatePipelineStack(*pp_pass1, true);
2711
7.15k
            }
2712
7.15k
            if (m->deterministic_id) {
2713
0
                pushMD5Pipeline(*pp_md5);
2714
0
            }
2715
7.15k
        }
2716
2717
        // Part 1: header
2718
2719
13.8k
        writeHeader();
2720
2721
        // Part 2: linearization parameter dictionary.  Save enough space to write real dictionary.
2722
        // 200 characters is enough space if all numerical values in the parameter dictionary that
2723
        // contain offsets are 20 digits long plus a few extra characters for safety.  The entire
2724
        // linearization parameter dictionary must appear within the first 1024 characters of the
2725
        // file.
2726
2727
13.8k
        qpdf_offset_t pos = m->pipeline->getCount();
2728
13.8k
        openObject(lindict_id);
2729
13.8k
        writeString("<<");
2730
13.8k
        if (pass == 2) {
2731
6.71k
            std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages();
2732
6.71k
            int first_page_object = m->obj[pages.at(0)].renumber;
2733
6.71k
            int npages = QIntC::to_int(pages.size());
2734
2735
6.71k
            writeString(" /Linearized 1 /L ");
2736
6.71k
            writeString(std::to_string(file_size + hint_length));
2737
            // Implementation note 121 states that a space is mandatory after this open bracket.
2738
6.71k
            writeString(" /H [ ");
2739
6.71k
            writeString(std::to_string(m->new_obj[hint_id].xref.getOffset()));
2740
6.71k
            writeString(" ");
2741
6.71k
            writeString(std::to_string(hint_length));
2742
6.71k
            writeString(" ] /O ");
2743
6.71k
            writeString(std::to_string(first_page_object));
2744
6.71k
            writeString(" /E ");
2745
6.71k
            writeString(std::to_string(part6_end_offset + hint_length));
2746
6.71k
            writeString(" /N ");
2747
6.71k
            writeString(std::to_string(npages));
2748
6.71k
            writeString(" /T ");
2749
6.71k
            writeString(std::to_string(space_before_zero + hint_length));
2750
6.71k
        }
2751
13.8k
        writeString(" >>");
2752
13.8k
        closeObject(lindict_id);
2753
13.8k
        static int const pad = 200;
2754
13.8k
        writePad(QIntC::to_size(pos - m->pipeline->getCount() + pad));
2755
13.8k
        writeString("\n");
2756
2757
        // If the user supplied any additional header text, write it here after the linearization
2758
        // parameter dictionary.
2759
13.8k
        writeString(m->extra_header_text);
2760
2761
        // Part 3: first page cross reference table and trailer.
2762
2763
13.8k
        qpdf_offset_t first_xref_offset = m->pipeline->getCount();
2764
13.8k
        qpdf_offset_t hint_offset = 0;
2765
13.8k
        if (pass == 2) {
2766
6.71k
            hint_offset = m->new_obj[hint_id].xref.getOffset();
2767
6.71k
        }
2768
13.8k
        if (need_xref_stream) {
2769
            // Must pad here too.
2770
1.30k
            if (pass == 1) {
2771
                // Set first_half_max_obj_offset to a value large enough to force four bytes to be
2772
                // reserved for each file offset.  This would provide adequate space for the xref
2773
                // stream as long as the last object in page 1 starts with in the first 4 GB of the
2774
                // file, which is extremely likely.  In the second pass, we will know the actual
2775
                // value for this, but it's okay if it's smaller.
2776
741
                first_half_max_obj_offset = 1 << 25;
2777
741
            }
2778
1.30k
            pos = m->pipeline->getCount();
2779
1.30k
            writeXRefStream(
2780
1.30k
                first_half_xref,
2781
1.30k
                first_half_end,
2782
1.30k
                first_half_max_obj_offset,
2783
1.30k
                t_lin_first,
2784
1.30k
                first_half_start,
2785
1.30k
                first_half_end,
2786
1.30k
                first_trailer_size,
2787
1.30k
                hint_length + second_xref_offset,
2788
1.30k
                hint_id,
2789
1.30k
                hint_offset,
2790
1.30k
                hint_length,
2791
1.30k
                (pass == 1),
2792
1.30k
                pass);
2793
1.30k
            qpdf_offset_t endpos = m->pipeline->getCount();
2794
1.30k
            if (pass == 1) {
2795
                // Pad so we have enough room for the real xref stream.
2796
740
                writePad(calculateXrefStreamPadding(endpos - pos));
2797
740
                first_xref_end = m->pipeline->getCount();
2798
740
            } else {
2799
                // Pad so that the next object starts at the same place as in pass 1.
2800
569
                writePad(QIntC::to_size(first_xref_end - endpos));
2801
2802
569
                if (m->pipeline->getCount() != first_xref_end) {
2803
0
                    throw std::logic_error(
2804
0
                        "insufficient padding for first pass xref stream; "
2805
0
                        "first_xref_end=" +
2806
0
                        std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos));
2807
0
                }
2808
569
            }
2809
1.30k
            writeString("\n");
2810
12.5k
        } else {
2811
12.5k
            writeXRefTable(
2812
12.5k
                t_lin_first,
2813
12.5k
                first_half_start,
2814
12.5k
                first_half_end,
2815
12.5k
                first_trailer_size,
2816
12.5k
                hint_length + second_xref_offset,
2817
12.5k
                (pass == 1),
2818
12.5k
                hint_id,
2819
12.5k
                hint_offset,
2820
12.5k
                hint_length,
2821
12.5k
                pass);
2822
12.5k
            writeString("startxref\n0\n%%EOF\n");
2823
12.5k
        }
2824
2825
        // Parts 4 through 9
2826
2827
246k
        for (auto const& cur_object: m->object_queue) {
2828
246k
            if (cur_object.getObjectID() == part6_end_marker) {
2829
13.7k
                first_half_max_obj_offset = m->pipeline->getCount();
2830
13.7k
            }
2831
246k
            writeObject(cur_object);
2832
246k
            if (cur_object.getObjectID() == part4_end_marker) {
2833
13.8k
                if (m->encryption) {
2834
13.8k
                    writeEncryptionDictionary();
2835
13.8k
                }
2836
13.8k
                if (pass == 1) {
2837
7.09k
                    m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2838
7.09k
                } else {
2839
                    // Part 5: hint stream
2840
6.70k
                    writeString(hint_buffer);
2841
6.70k
                }
2842
13.8k
            }
2843
246k
            if (cur_object.getObjectID() == part6_end_marker) {
2844
13.5k
                part6_end_offset = m->pipeline->getCount();
2845
13.5k
            }
2846
246k
        }
2847
2848
        // Part 10: overflow hint stream -- not used
2849
2850
        // Part 11: main cross reference table and trailer
2851
2852
13.8k
        second_xref_offset = m->pipeline->getCount();
2853
13.8k
        if (need_xref_stream) {
2854
1.13k
            pos = m->pipeline->getCount();
2855
1.13k
            space_before_zero = writeXRefStream(
2856
1.13k
                second_half_xref,
2857
1.13k
                second_half_end,
2858
1.13k
                second_xref_offset,
2859
1.13k
                t_lin_second,
2860
1.13k
                0,
2861
1.13k
                second_half_end,
2862
1.13k
                second_trailer_size,
2863
1.13k
                0,
2864
1.13k
                0,
2865
1.13k
                0,
2866
1.13k
                0,
2867
1.13k
                (pass == 1),
2868
1.13k
                pass);
2869
1.13k
            qpdf_offset_t endpos = m->pipeline->getCount();
2870
2871
1.13k
            if (pass == 1) {
2872
                // Pad so we have enough room for the real xref stream.  See comments for previous
2873
                // xref stream on how we calculate the padding.
2874
568
                writePad(calculateXrefStreamPadding(endpos - pos));
2875
568
                writeString("\n");
2876
568
                second_xref_end = m->pipeline->getCount();
2877
568
            } else {
2878
                // Make the file size the same.
2879
565
                writePad(
2880
565
                    QIntC::to_size(second_xref_end + hint_length - 1 - m->pipeline->getCount()));
2881
565
                writeString("\n");
2882
2883
                // If this assertion fails, maybe we didn't have enough padding above.
2884
565
                if (m->pipeline->getCount() != second_xref_end + hint_length) {
2885
0
                    throw std::logic_error(
2886
0
                        "count mismatch after xref stream; possible insufficient padding?");
2887
0
                }
2888
565
            }
2889
12.7k
        } else {
2890
12.7k
            space_before_zero = writeXRefTable(
2891
12.7k
                t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass);
2892
12.7k
        }
2893
13.8k
        writeString("startxref\n");
2894
13.8k
        writeString(std::to_string(first_xref_offset));
2895
13.8k
        writeString("\n%%EOF\n");
2896
2897
13.8k
        if (pass == 1) {
2898
6.71k
            if (m->deterministic_id) {
2899
0
                QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1);
2900
0
                computeDeterministicIDData();
2901
0
                pp_md5 = nullptr;
2902
0
                qpdf_assert_debug(m->md5_pipeline == nullptr);
2903
0
            }
2904
2905
            // Close first pass pipeline
2906
6.71k
            file_size = m->pipeline->getCount();
2907
6.71k
            pp_pass1 = nullptr;
2908
2909
            // Save hint offset since it will be set to zero by calling openObject.
2910
6.71k
            qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset();
2911
2912
            // Write hint stream to a buffer
2913
6.71k
            {
2914
6.71k
                PipelinePopper pp_hint(this);
2915
6.71k
                activatePipelineStack(pp_hint, hint_buffer);
2916
6.71k
                writeHintStream(hint_id);
2917
6.71k
            }
2918
6.71k
            hint_length = QIntC::to_offset(hint_buffer.size());
2919
2920
            // Restore hint offset
2921
6.71k
            m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1);
2922
6.71k
            if (lin_pass1_file) {
2923
                // Write some debugging information
2924
0
                fprintf(
2925
0
                    lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str());
2926
0
                fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str());
2927
0
                fprintf(
2928
0
                    lin_pass1_file,
2929
0
                    "%% second_xref_offset=%s\n",
2930
0
                    std::to_string(second_xref_offset).c_str());
2931
0
                fprintf(
2932
0
                    lin_pass1_file,
2933
0
                    "%% second_xref_end=%s\n",
2934
0
                    std::to_string(second_xref_end).c_str());
2935
0
                fclose(lin_pass1_file);
2936
0
                lin_pass1_file = nullptr;
2937
0
            }
2938
6.71k
        }
2939
13.8k
    }
2940
7.66k
}
2941
2942
void
2943
QPDFWriter::enqueueObjectsStandard()
2944
0
{
2945
0
    if (m->preserve_unreferenced_objects) {
2946
0
        QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard");
2947
0
        for (auto const& oh: m->pdf.getAllObjects()) {
2948
0
            enqueueObject(oh);
2949
0
        }
2950
0
    }
2951
2952
    // Put root first on queue.
2953
0
    QPDFObjectHandle trailer = getTrimmedTrailer();
2954
0
    enqueueObject(trailer.getKey("/Root"));
2955
2956
    // Next place any other objects referenced from the trailer dictionary into the queue, handling
2957
    // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op.
2958
0
    for (auto& item: trailer.as_dictionary()) {
2959
0
        if (!item.second.null()) {
2960
0
            enqueueObject(item.second);
2961
0
        }
2962
0
    }
2963
0
}
2964
2965
void
2966
QPDFWriter::enqueueObjectsPCLm()
2967
0
{
2968
    // Image transform stream content for page strip images. Each of this new stream has to come
2969
    // after every page image strip written in the pclm file.
2970
0
    std::string image_transform_content = "q /image Do Q\n";
2971
2972
    // enqueue all pages first
2973
0
    std::vector<QPDFObjectHandle> all = m->pdf.getAllPages();
2974
0
    for (auto& page: all) {
2975
        // enqueue page
2976
0
        enqueueObject(page);
2977
2978
        // enqueue page contents stream
2979
0
        enqueueObject(page.getKey("/Contents"));
2980
2981
        // enqueue all the strips for each page
2982
0
        QPDFObjectHandle strips = page.getKey("/Resources").getKey("/XObject");
2983
0
        for (auto& image: strips.as_dictionary()) {
2984
0
            if (!image.second.null()) {
2985
0
                enqueueObject(image.second);
2986
0
                enqueueObject(QPDFObjectHandle::newStream(&m->pdf, image_transform_content));
2987
0
            }
2988
0
        }
2989
0
    }
2990
2991
    // Put root in queue.
2992
0
    QPDFObjectHandle trailer = getTrimmedTrailer();
2993
0
    enqueueObject(trailer.getKey("/Root"));
2994
0
}
2995
2996
void
2997
QPDFWriter::indicateProgress(bool decrement, bool finished)
2998
495k
{
2999
495k
    if (decrement) {
3000
123k
        --m->events_seen;
3001
123k
        return;
3002
123k
    }
3003
3004
371k
    ++m->events_seen;
3005
3006
371k
    if (!m->progress_reporter.get()) {
3007
371k
        return;
3008
371k
    }
3009
3010
0
    if (finished || (m->events_seen >= m->next_progress_report)) {
3011
0
        int percentage =
3012
0
            (finished ? 100
3013
0
                 : m->next_progress_report == 0
3014
0
                 ? 0
3015
0
                 : std::min(99, 1 + ((100 * m->events_seen) / m->events_expected)));
3016
0
        m->progress_reporter->reportProgress(percentage);
3017
0
    }
3018
0
    int increment = std::max(1, (m->events_expected / 100));
3019
0
    while (m->events_seen >= m->next_progress_report) {
3020
0
        m->next_progress_report += increment;
3021
0
    }
3022
0
}
3023
3024
void
3025
QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr)
3026
0
{
3027
0
    m->progress_reporter = pr;
3028
0
}
3029
3030
void
3031
QPDFWriter::writeStandard()
3032
0
{
3033
0
    auto pp_md5 = PipelinePopper(this);
3034
0
    if (m->deterministic_id) {
3035
0
        pushMD5Pipeline(pp_md5);
3036
0
    }
3037
3038
    // Start writing
3039
3040
0
    writeHeader();
3041
0
    writeString(m->extra_header_text);
3042
3043
0
    if (m->pclm) {
3044
0
        enqueueObjectsPCLm();
3045
0
    } else {
3046
0
        enqueueObjectsStandard();
3047
0
    }
3048
3049
    // Now start walking queue, outputting each object.
3050
0
    while (m->object_queue_front < m->object_queue.size()) {
3051
0
        QPDFObjectHandle cur_object = m->object_queue.at(m->object_queue_front);
3052
0
        ++m->object_queue_front;
3053
0
        writeObject(cur_object);
3054
0
    }
3055
3056
    // Write out the encryption dictionary, if any
3057
0
    if (m->encryption) {
3058
0
        writeEncryptionDictionary();
3059
0
    }
3060
3061
    // Now write out xref.  next_objid is now the number of objects.
3062
0
    qpdf_offset_t xref_offset = m->pipeline->getCount();
3063
0
    if (m->object_stream_to_objects.empty()) {
3064
        // Write regular cross-reference table
3065
0
        writeXRefTable(t_normal, 0, m->next_objid - 1, m->next_objid);
3066
0
    } else {
3067
        // Write cross-reference stream.
3068
0
        int xref_id = m->next_objid++;
3069
0
        writeXRefStream(
3070
0
            xref_id, xref_id, xref_offset, t_normal, 0, m->next_objid - 1, m->next_objid);
3071
0
    }
3072
0
    writeString("startxref\n");
3073
0
    writeString(std::to_string(xref_offset));
3074
0
    writeString("\n%%EOF\n");
3075
3076
0
    if (m->deterministic_id) {
3077
0
        QTC::TC(
3078
0
            "qpdf",
3079
0
            "QPDFWriter standard deterministic ID",
3080
0
            m->object_stream_to_objects.empty() ? 0 : 1);
3081
0
    }
3082
0
}