Coverage Report

Created: 2024-09-08 06:06

/src/qpdf/libqpdf/QPDFWriter.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/assert_debug.h>
2
3
#include <qpdf/qpdf-config.h> // include early for large file support
4
5
#include <qpdf/QPDFWriter_private.hh>
6
7
#include <qpdf/MD5.hh>
8
#include <qpdf/Pl_AES_PDF.hh>
9
#include <qpdf/Pl_Count.hh>
10
#include <qpdf/Pl_Discard.hh>
11
#include <qpdf/Pl_Flate.hh>
12
#include <qpdf/Pl_MD5.hh>
13
#include <qpdf/Pl_PNGFilter.hh>
14
#include <qpdf/Pl_RC4.hh>
15
#include <qpdf/Pl_StdioFile.hh>
16
#include <qpdf/QIntC.hh>
17
#include <qpdf/QPDF.hh>
18
#include <qpdf/QPDFObjectHandle.hh>
19
#include <qpdf/QPDF_Name.hh>
20
#include <qpdf/QPDF_String.hh>
21
#include <qpdf/QTC.hh>
22
#include <qpdf/QUtil.hh>
23
#include <qpdf/RC4.hh>
24
25
#include <algorithm>
26
#include <cstdlib>
27
#include <stdexcept>
28
29
QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default)
30
0
{
31
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
32
0
}
33
34
QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) :
35
    handler(handler)
36
0
{
37
0
}
38
39
QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT
40
                                                                  // (modernize-use-equals-default)
41
0
{
42
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
43
0
}
44
45
void
46
QPDFWriter::FunctionProgressReporter::reportProgress(int progress)
47
0
{
48
0
    this->handler(progress);
49
0
}
50
51
QPDFWriter::Members::Members(QPDF& pdf) :
52
    pdf(pdf),
53
    root_og(pdf.getRoot().getObjGen().isIndirect() ? pdf.getRoot().getObjGen() : QPDFObjGen(-1, 0))
54
34.6k
{
55
34.6k
}
56
57
QPDFWriter::Members::~Members()
58
34.3k
{
59
34.3k
    if (file && close_file) {
60
0
        fclose(file);
61
0
    }
62
34.3k
    delete output_buffer;
63
34.3k
}
64
65
QPDFWriter::QPDFWriter(QPDF& pdf) :
66
    m(new Members(pdf))
67
34.6k
{
68
34.6k
}
69
70
QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) :
71
    m(new Members(pdf))
72
0
{
73
0
    setOutputFilename(filename);
74
0
}
75
76
QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) :
77
    m(new Members(pdf))
78
0
{
79
0
    setOutputFile(description, file, close_file);
80
0
}
81
82
void
83
QPDFWriter::setOutputFilename(char const* filename)
84
0
{
85
0
    char const* description = filename;
86
0
    FILE* f = nullptr;
87
0
    bool close_file = false;
88
0
    if (filename == nullptr) {
89
0
        description = "standard output";
90
0
        QTC::TC("qpdf", "QPDFWriter write to stdout");
91
0
        f = stdout;
92
0
        QUtil::binary_stdout();
93
0
    } else {
94
0
        QTC::TC("qpdf", "QPDFWriter write to file");
95
0
        f = QUtil::safe_fopen(filename, "wb+");
96
0
        close_file = true;
97
0
    }
98
0
    setOutputFile(description, f, close_file);
99
0
}
100
101
void
102
QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file)
103
0
{
104
0
    m->filename = description;
105
0
    m->file = file;
106
0
    m->close_file = close_file;
107
0
    std::shared_ptr<Pipeline> p = std::make_shared<Pl_StdioFile>("qpdf output", file);
108
0
    m->to_delete.push_back(p);
109
0
    initializePipelineStack(p.get());
110
0
}
111
112
void
113
QPDFWriter::setOutputMemory()
114
0
{
115
0
    m->filename = "memory buffer";
116
0
    m->buffer_pipeline = new Pl_Buffer("qpdf output");
117
0
    m->to_delete.push_back(std::shared_ptr<Pipeline>(m->buffer_pipeline));
118
0
    initializePipelineStack(m->buffer_pipeline);
119
0
}
120
121
Buffer*
122
QPDFWriter::getBuffer()
123
0
{
124
0
    Buffer* result = m->output_buffer;
125
0
    m->output_buffer = nullptr;
126
0
    return result;
127
0
}
128
129
std::shared_ptr<Buffer>
130
QPDFWriter::getBufferSharedPointer()
131
0
{
132
0
    return std::shared_ptr<Buffer>(getBuffer());
133
0
}
134
135
void
136
QPDFWriter::setOutputPipeline(Pipeline* p)
137
34.3k
{
138
34.3k
    m->filename = "custom pipeline";
139
34.3k
    initializePipelineStack(p);
140
34.3k
}
141
142
void
143
QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode)
144
11.2k
{
145
11.2k
    m->object_stream_mode = mode;
146
11.2k
}
147
148
void
149
QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode)
150
0
{
151
0
    switch (mode) {
152
0
    case qpdf_s_uncompress:
153
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
154
0
        m->compress_streams = false;
155
0
        break;
156
157
0
    case qpdf_s_preserve:
158
0
        m->stream_decode_level = qpdf_dl_none;
159
0
        m->compress_streams = false;
160
0
        break;
161
162
0
    case qpdf_s_compress:
163
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
164
0
        m->compress_streams = true;
165
0
        break;
166
0
    }
167
0
    m->stream_decode_level_set = true;
168
0
    m->compress_streams_set = true;
169
0
}
170
171
void
172
QPDFWriter::setCompressStreams(bool val)
173
0
{
174
0
    m->compress_streams = val;
175
0
    m->compress_streams_set = true;
176
0
}
177
178
void
179
QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
180
34.3k
{
181
34.3k
    m->stream_decode_level = val;
182
34.3k
    m->stream_decode_level_set = true;
183
34.3k
}
184
185
void
186
QPDFWriter::setRecompressFlate(bool val)
187
0
{
188
0
    m->recompress_flate = val;
189
0
}
190
191
void
192
QPDFWriter::setContentNormalization(bool val)
193
0
{
194
0
    m->normalize_content_set = true;
195
0
    m->normalize_content = val;
196
0
}
197
198
void
199
QPDFWriter::setQDFMode(bool val)
200
8.61k
{
201
8.61k
    m->qdf_mode = val;
202
8.61k
}
203
204
void
205
QPDFWriter::setPreserveUnreferencedObjects(bool val)
206
0
{
207
0
    m->preserve_unreferenced_objects = val;
208
0
}
209
210
void
211
QPDFWriter::setNewlineBeforeEndstream(bool val)
212
0
{
213
0
    m->newline_before_endstream = val;
214
0
}
215
216
void
217
QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level)
218
60.1k
{
219
60.1k
    bool set_version = false;
220
60.1k
    bool set_extension_level = false;
221
60.1k
    if (m->min_pdf_version.empty()) {
222
33.2k
        set_version = true;
223
33.2k
        set_extension_level = true;
224
33.2k
    } else {
225
26.8k
        int old_major = 0;
226
26.8k
        int old_minor = 0;
227
26.8k
        int min_major = 0;
228
26.8k
        int min_minor = 0;
229
26.8k
        parseVersion(version, old_major, old_minor);
230
26.8k
        parseVersion(m->min_pdf_version, min_major, min_minor);
231
26.8k
        int compare = compareVersions(old_major, old_minor, min_major, min_minor);
232
26.8k
        if (compare > 0) {
233
2.41k
            QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1);
234
2.41k
            set_version = true;
235
2.41k
            set_extension_level = true;
236
24.4k
        } else if (compare == 0) {
237
2.38k
            if (extension_level > m->min_extension_level) {
238
8
                QTC::TC("qpdf", "QPDFWriter increasing extension level");
239
8
                set_extension_level = true;
240
8
            }
241
2.38k
        }
242
26.8k
    }
243
244
60.1k
    if (set_version) {
245
35.6k
        m->min_pdf_version = version;
246
35.6k
    }
247
60.1k
    if (set_extension_level) {
248
35.6k
        m->min_extension_level = extension_level;
249
35.6k
    }
250
60.1k
}
251
252
void
253
QPDFWriter::setMinimumPDFVersion(PDFVersion const& v)
254
0
{
255
0
    std::string version;
256
0
    int extension_level;
257
0
    v.getVersion(version, extension_level);
258
0
    setMinimumPDFVersion(version, extension_level);
259
0
}
260
261
void
262
QPDFWriter::forcePDFVersion(std::string const& version, int extension_level)
263
0
{
264
0
    m->forced_pdf_version = version;
265
0
    m->forced_extension_level = extension_level;
266
0
}
267
268
void
269
QPDFWriter::setExtraHeaderText(std::string const& text)
270
0
{
271
0
    m->extra_header_text = text;
272
0
    if ((m->extra_header_text.length() > 0) && (*(m->extra_header_text.rbegin()) != '\n')) {
273
0
        QTC::TC("qpdf", "QPDFWriter extra header text add newline");
274
0
        m->extra_header_text += "\n";
275
0
    } else {
276
0
        QTC::TC("qpdf", "QPDFWriter extra header text no newline");
277
0
    }
278
0
}
279
280
void
281
QPDFWriter::setStaticID(bool val)
282
20.4k
{
283
20.4k
    m->static_id = val;
284
20.4k
}
285
286
void
287
QPDFWriter::setDeterministicID(bool val)
288
13.8k
{
289
13.8k
    m->deterministic_id = val;
290
13.8k
}
291
292
void
293
QPDFWriter::setStaticAesIV(bool val)
294
0
{
295
0
    if (val) {
296
0
        Pl_AES_PDF::useStaticIV();
297
0
    }
298
0
}
299
300
void
301
QPDFWriter::setSuppressOriginalObjectIDs(bool val)
302
0
{
303
0
    m->suppress_original_object_ids = val;
304
0
}
305
306
void
307
QPDFWriter::setPreserveEncryption(bool val)
308
0
{
309
0
    m->preserve_encryption = val;
310
0
}
311
312
void
313
QPDFWriter::setLinearization(bool val)
314
19.7k
{
315
19.7k
    m->linearized = val;
316
19.7k
    if (val) {
317
19.7k
        m->pclm = false;
318
19.7k
    }
319
19.7k
}
320
321
void
322
QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
323
0
{
324
0
    m->lin_pass1_filename = filename;
325
0
}
326
327
void
328
QPDFWriter::setPCLm(bool val)
329
0
{
330
0
    m->pclm = val;
331
0
    if (val) {
332
0
        m->linearized = false;
333
0
    }
334
0
}
335
336
void
337
QPDFWriter::setR2EncryptionParametersInsecure(
338
    char const* user_password,
339
    char const* owner_password,
340
    bool allow_print,
341
    bool allow_modify,
342
    bool allow_extract,
343
    bool allow_annotate)
344
0
{
345
0
    std::set<int> clear;
346
0
    if (!allow_print) {
347
0
        clear.insert(3);
348
0
    }
349
0
    if (!allow_modify) {
350
0
        clear.insert(4);
351
0
    }
352
0
    if (!allow_extract) {
353
0
        clear.insert(5);
354
0
    }
355
0
    if (!allow_annotate) {
356
0
        clear.insert(6);
357
0
    }
358
359
0
    setEncryptionParameters(user_password, owner_password, 1, 2, 5, clear);
360
0
}
361
362
void
363
QPDFWriter::setR3EncryptionParametersInsecure(
364
    char const* user_password,
365
    char const* owner_password,
366
    bool allow_accessibility,
367
    bool allow_extract,
368
    bool allow_assemble,
369
    bool allow_annotate_and_form,
370
    bool allow_form_filling,
371
    bool allow_modify_other,
372
    qpdf_r3_print_e print)
373
5.98k
{
374
5.98k
    std::set<int> clear;
375
5.98k
    interpretR3EncryptionParameters(
376
5.98k
        clear,
377
5.98k
        user_password,
378
5.98k
        owner_password,
379
5.98k
        allow_accessibility,
380
5.98k
        allow_extract,
381
5.98k
        allow_assemble,
382
5.98k
        allow_annotate_and_form,
383
5.98k
        allow_form_filling,
384
5.98k
        allow_modify_other,
385
5.98k
        print,
386
5.98k
        qpdf_r3m_all);
387
5.98k
    setEncryptionParameters(user_password, owner_password, 2, 3, 16, clear);
388
5.98k
}
389
390
void
391
QPDFWriter::setR4EncryptionParametersInsecure(
392
    char const* user_password,
393
    char const* owner_password,
394
    bool allow_accessibility,
395
    bool allow_extract,
396
    bool allow_assemble,
397
    bool allow_annotate_and_form,
398
    bool allow_form_filling,
399
    bool allow_modify_other,
400
    qpdf_r3_print_e print,
401
    bool encrypt_metadata,
402
    bool use_aes)
403
0
{
404
0
    std::set<int> clear;
405
0
    interpretR3EncryptionParameters(
406
0
        clear,
407
0
        user_password,
408
0
        owner_password,
409
0
        allow_accessibility,
410
0
        allow_extract,
411
0
        allow_assemble,
412
0
        allow_annotate_and_form,
413
0
        allow_form_filling,
414
0
        allow_modify_other,
415
0
        print,
416
0
        qpdf_r3m_all);
417
0
    m->encrypt_use_aes = use_aes;
418
0
    m->encrypt_metadata = encrypt_metadata;
419
0
    setEncryptionParameters(user_password, owner_password, 4, 4, 16, clear);
420
0
}
421
422
void
423
QPDFWriter::setR5EncryptionParameters(
424
    char const* user_password,
425
    char const* owner_password,
426
    bool allow_accessibility,
427
    bool allow_extract,
428
    bool allow_assemble,
429
    bool allow_annotate_and_form,
430
    bool allow_form_filling,
431
    bool allow_modify_other,
432
    qpdf_r3_print_e print,
433
    bool encrypt_metadata)
434
0
{
435
0
    std::set<int> clear;
436
0
    interpretR3EncryptionParameters(
437
0
        clear,
438
0
        user_password,
439
0
        owner_password,
440
0
        allow_accessibility,
441
0
        allow_extract,
442
0
        allow_assemble,
443
0
        allow_annotate_and_form,
444
0
        allow_form_filling,
445
0
        allow_modify_other,
446
0
        print,
447
0
        qpdf_r3m_all);
448
0
    m->encrypt_use_aes = true;
449
0
    m->encrypt_metadata = encrypt_metadata;
450
0
    setEncryptionParameters(user_password, owner_password, 5, 5, 32, clear);
451
0
}
452
453
void
454
QPDFWriter::setR6EncryptionParameters(
455
    char const* user_password,
456
    char const* owner_password,
457
    bool allow_accessibility,
458
    bool allow_extract,
459
    bool allow_assemble,
460
    bool allow_annotate_and_form,
461
    bool allow_form_filling,
462
    bool allow_modify_other,
463
    qpdf_r3_print_e print,
464
    bool encrypt_metadata)
465
14.5k
{
466
14.5k
    std::set<int> clear;
467
14.5k
    interpretR3EncryptionParameters(
468
14.5k
        clear,
469
14.5k
        user_password,
470
14.5k
        owner_password,
471
14.5k
        allow_accessibility,
472
14.5k
        allow_extract,
473
14.5k
        allow_assemble,
474
14.5k
        allow_annotate_and_form,
475
14.5k
        allow_form_filling,
476
14.5k
        allow_modify_other,
477
14.5k
        print,
478
14.5k
        qpdf_r3m_all);
479
14.5k
    m->encrypt_use_aes = true;
480
14.5k
    m->encrypt_metadata = encrypt_metadata;
481
14.5k
    setEncryptionParameters(user_password, owner_password, 5, 6, 32, clear);
482
14.5k
}
483
484
void
485
QPDFWriter::interpretR3EncryptionParameters(
486
    std::set<int>& clear,
487
    char const* user_password,
488
    char const* owner_password,
489
    bool allow_accessibility,
490
    bool allow_extract,
491
    bool allow_assemble,
492
    bool allow_annotate_and_form,
493
    bool allow_form_filling,
494
    bool allow_modify_other,
495
    qpdf_r3_print_e print,
496
    qpdf_r3_modify_e modify)
497
20.4k
{
498
    // Acrobat 5 security options:
499
500
    // Checkboxes:
501
    //   Enable Content Access for the Visually Impaired
502
    //   Allow Content Copying and Extraction
503
504
    // Allowed changes menu:
505
    //   None
506
    //   Only Document Assembly
507
    //   Only Form Field Fill-in or Signing
508
    //   Comment Authoring, Form Field Fill-in or Signing
509
    //   General Editing, Comment and Form Field Authoring
510
511
    // Allowed printing menu:
512
    //   None
513
    //   Low Resolution
514
    //   Full printing
515
516
    // Meanings of bits in P when R >= 3
517
    //
518
    //  3: low-resolution printing
519
    //  4: document modification except as controlled by 6, 9, and 11
520
    //  5: extraction
521
    //  6: add/modify annotations (comment), fill in forms
522
    //     if 4+6 are set, also allows modification of form fields
523
    //  9: fill in forms even if 6 is clear
524
    // 10: accessibility; ignored by readers, should always be set
525
    // 11: document assembly even if 4 is clear
526
    // 12: high-resolution printing
527
528
20.4k
    if (!allow_accessibility) {
529
        // setEncryptionParameters sets this if R > 3
530
0
        clear.insert(10);
531
0
    }
532
20.4k
    if (!allow_extract) {
533
0
        clear.insert(5);
534
0
    }
535
536
    // Note: these switch statements all "fall through" (no break statements).  Each option clears
537
    // successively more access bits.
538
20.4k
    switch (print) {
539
0
    case qpdf_r3p_none:
540
0
        clear.insert(3); // any printing
541
542
0
    case qpdf_r3p_low:
543
0
        clear.insert(12); // high resolution printing
544
545
20.4k
    case qpdf_r3p_full:
546
20.4k
        break;
547
548
        // no default so gcc warns for missing cases
549
20.4k
    }
550
551
    // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full
552
    // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're
553
    // stuck with it. See also allow checks below to control the bits individually.
554
555
    // NOT EXERCISED IN TEST SUITE
556
20.4k
    switch (modify) {
557
0
    case qpdf_r3m_none:
558
0
        clear.insert(11); // document assembly
559
560
0
    case qpdf_r3m_assembly:
561
0
        clear.insert(9); // filling in form fields
562
563
0
    case qpdf_r3m_form:
564
0
        clear.insert(6); // modify annotations, fill in form fields
565
566
0
    case qpdf_r3m_annotate:
567
0
        clear.insert(4); // other modifications
568
569
20.4k
    case qpdf_r3m_all:
570
20.4k
        break;
571
572
        // no default so gcc warns for missing cases
573
20.4k
    }
574
    // END NOT EXERCISED IN TEST SUITE
575
576
20.4k
    if (!allow_assemble) {
577
0
        clear.insert(11);
578
0
    }
579
20.4k
    if (!allow_annotate_and_form) {
580
0
        clear.insert(6);
581
0
    }
582
20.4k
    if (!allow_form_filling) {
583
0
        clear.insert(9);
584
0
    }
585
20.4k
    if (!allow_modify_other) {
586
0
        clear.insert(4);
587
0
    }
588
20.4k
}
589
590
void
591
QPDFWriter::setEncryptionParameters(
592
    char const* user_password,
593
    char const* owner_password,
594
    int V,
595
    int R,
596
    int key_len,
597
    std::set<int>& bits_to_clear)
598
20.4k
{
599
    // PDF specification refers to bits with the low bit numbered 1.
600
    // We have to convert this into a bit field.
601
602
    // Specification always requires bits 1 and 2 to be cleared.
603
20.4k
    bits_to_clear.insert(1);
604
20.4k
    bits_to_clear.insert(2);
605
606
20.4k
    if (R > 3) {
607
        // Bit 10 is deprecated and should always be set.  This used to mean accessibility.  There
608
        // is no way to disable accessibility with R > 3.
609
14.5k
        bits_to_clear.erase(10);
610
14.5k
    }
611
612
20.4k
    int P = 0;
613
    // Create the complement of P, then invert.
614
40.9k
    for (int b: bits_to_clear) {
615
40.9k
        P |= (1 << (b - 1));
616
40.9k
    }
617
20.4k
    P = ~P;
618
619
20.4k
    generateID();
620
20.4k
    std::string O;
621
20.4k
    std::string U;
622
20.4k
    std::string OE;
623
20.4k
    std::string UE;
624
20.4k
    std::string Perms;
625
20.4k
    std::string encryption_key;
626
20.4k
    if (V < 5) {
627
5.97k
        QPDF::compute_encryption_O_U(
628
5.97k
            user_password, owner_password, V, R, key_len, P, m->encrypt_metadata, m->id1, O, U);
629
14.5k
    } else {
630
14.5k
        QPDF::compute_encryption_parameters_V5(
631
14.5k
            user_password,
632
14.5k
            owner_password,
633
14.5k
            V,
634
14.5k
            R,
635
14.5k
            key_len,
636
14.5k
            P,
637
14.5k
            m->encrypt_metadata,
638
14.5k
            m->id1,
639
14.5k
            encryption_key,
640
14.5k
            O,
641
14.5k
            U,
642
14.5k
            OE,
643
14.5k
            UE,
644
14.5k
            Perms);
645
14.5k
    }
646
20.4k
    setEncryptionParametersInternal(
647
20.4k
        V, R, key_len, P, O, U, OE, UE, Perms, m->id1, user_password, encryption_key);
648
20.4k
}
649
650
void
651
QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
652
0
{
653
0
    m->preserve_encryption = false;
654
0
    QPDFObjectHandle trailer = qpdf.getTrailer();
655
0
    if (trailer.hasKey("/Encrypt")) {
656
0
        generateID();
657
0
        m->id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue();
658
0
        QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
659
0
        int V = encrypt.getKey("/V").getIntValueAsInt();
660
0
        int key_len = 5;
661
0
        if (V > 1) {
662
0
            key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8;
663
0
        }
664
0
        if (encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool()) {
665
0
            m->encrypt_metadata = encrypt.getKey("/EncryptMetadata").getBoolValue();
666
0
        }
667
0
        if (V >= 4) {
668
            // When copying encryption parameters, use AES even if the original file did not.
669
            // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of
670
            // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF
671
            // all potentially having different values.
672
0
            m->encrypt_use_aes = true;
673
0
        }
674
0
        QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", m->encrypt_metadata ? 0 : 1);
675
0
        QTC::TC("qpdf", "QPDFWriter copy use_aes", m->encrypt_use_aes ? 0 : 1);
676
0
        std::string OE;
677
0
        std::string UE;
678
0
        std::string Perms;
679
0
        std::string encryption_key;
680
0
        if (V >= 5) {
681
0
            QTC::TC("qpdf", "QPDFWriter copy V5");
682
0
            OE = encrypt.getKey("/OE").getStringValue();
683
0
            UE = encrypt.getKey("/UE").getStringValue();
684
0
            Perms = encrypt.getKey("/Perms").getStringValue();
685
0
            encryption_key = qpdf.getEncryptionKey();
686
0
        }
687
688
0
        setEncryptionParametersInternal(
689
0
            V,
690
0
            encrypt.getKey("/R").getIntValueAsInt(),
691
0
            key_len,
692
0
            static_cast<int>(encrypt.getKey("/P").getIntValue()),
693
0
            encrypt.getKey("/O").getStringValue(),
694
0
            encrypt.getKey("/U").getStringValue(),
695
0
            OE,
696
0
            UE,
697
0
            Perms,
698
0
            m->id1, // m->id1 == the other file's id1
699
0
            qpdf.getPaddedUserPassword(),
700
0
            encryption_key);
701
0
    }
702
0
}
703
704
void
705
QPDFWriter::disableIncompatibleEncryption(int major, int minor, int extension_level)
706
0
{
707
0
    if (!m->encrypted) {
708
0
        return;
709
0
    }
710
711
0
    bool disable = false;
712
0
    if (compareVersions(major, minor, 1, 3) < 0) {
713
0
        disable = true;
714
0
    } else {
715
0
        int V = QUtil::string_to_int(m->encryption_dictionary["/V"].c_str());
716
0
        int R = QUtil::string_to_int(m->encryption_dictionary["/R"].c_str());
717
0
        if (compareVersions(major, minor, 1, 4) < 0) {
718
0
            if ((V > 1) || (R > 2)) {
719
0
                disable = true;
720
0
            }
721
0
        } else if (compareVersions(major, minor, 1, 5) < 0) {
722
0
            if ((V > 2) || (R > 3)) {
723
0
                disable = true;
724
0
            }
725
0
        } else if (compareVersions(major, minor, 1, 6) < 0) {
726
0
            if (m->encrypt_use_aes) {
727
0
                disable = true;
728
0
            }
729
0
        } else if (
730
0
            (compareVersions(major, minor, 1, 7) < 0) ||
731
0
            ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) {
732
0
            if ((V >= 5) || (R >= 5)) {
733
0
                disable = true;
734
0
            }
735
0
        }
736
0
    }
737
0
    if (disable) {
738
0
        QTC::TC("qpdf", "QPDFWriter forced version disabled encryption");
739
0
        m->encrypted = false;
740
0
    }
741
0
}
742
743
void
744
QPDFWriter::parseVersion(std::string const& version, int& major, int& minor) const
745
53.4k
{
746
53.4k
    major = QUtil::string_to_int(version.c_str());
747
53.4k
    minor = 0;
748
53.4k
    size_t p = version.find('.');
749
53.4k
    if ((p != std::string::npos) && (version.length() > p)) {
750
53.3k
        minor = QUtil::string_to_int(version.substr(p + 1).c_str());
751
53.3k
    }
752
53.4k
    std::string tmp = std::to_string(major) + "." + std::to_string(minor);
753
53.4k
    if (tmp != version) {
754
        // The version number in the input is probably invalid. This happens with some files that
755
        // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately
756
        // QPDFWriter doesn't have a way to give a warning, so we just ignore this case.
757
33
    }
758
53.4k
}
759
760
int
761
QPDFWriter::compareVersions(int major1, int minor1, int major2, int minor2) const
762
26.5k
{
763
26.5k
    if (major1 < major2) {
764
91
        return -1;
765
26.4k
    } else if (major1 > major2) {
766
303
        return 1;
767
26.1k
    } else if (minor1 < minor2) {
768
21.6k
        return -1;
769
21.6k
    } else if (minor1 > minor2) {
770
2.10k
        return 1;
771
2.38k
    } else {
772
2.38k
        return 0;
773
2.38k
    }
774
26.5k
}
775
776
void
777
QPDFWriter::setEncryptionParametersInternal(
778
    int V,
779
    int R,
780
    int key_len,
781
    int P,
782
    std::string const& O,
783
    std::string const& U,
784
    std::string const& OE,
785
    std::string const& UE,
786
    std::string const& Perms,
787
    std::string const& id1,
788
    std::string const& user_password,
789
    std::string const& encryption_key)
790
19.5k
{
791
19.5k
    m->encryption_V = V;
792
19.5k
    m->encryption_R = R;
793
19.5k
    m->encryption_dictionary["/Filter"] = "/Standard";
794
19.5k
    m->encryption_dictionary["/V"] = std::to_string(V);
795
19.5k
    m->encryption_dictionary["/Length"] = std::to_string(key_len * 8);
796
19.5k
    m->encryption_dictionary["/R"] = std::to_string(R);
797
19.5k
    m->encryption_dictionary["/P"] = std::to_string(P);
798
19.5k
    m->encryption_dictionary["/O"] = QPDF_String(O).unparse(true);
799
19.5k
    m->encryption_dictionary["/U"] = QPDF_String(U).unparse(true);
800
19.5k
    if (V >= 5) {
801
13.6k
        m->encryption_dictionary["/OE"] = QPDF_String(OE).unparse(true);
802
13.6k
        m->encryption_dictionary["/UE"] = QPDF_String(UE).unparse(true);
803
13.6k
        m->encryption_dictionary["/Perms"] = QPDF_String(Perms).unparse(true);
804
13.6k
    }
805
19.5k
    if (R >= 6) {
806
13.6k
        setMinimumPDFVersion("1.7", 8);
807
13.6k
    } else if (R == 5) {
808
0
        setMinimumPDFVersion("1.7", 3);
809
5.97k
    } else if (R == 4) {
810
0
        setMinimumPDFVersion(m->encrypt_use_aes ? "1.6" : "1.5");
811
5.97k
    } else if (R == 3) {
812
5.97k
        setMinimumPDFVersion("1.4");
813
5.97k
    } else {
814
0
        setMinimumPDFVersion("1.3");
815
0
    }
816
817
19.5k
    if ((R >= 4) && (!m->encrypt_metadata)) {
818
0
        m->encryption_dictionary["/EncryptMetadata"] = "false";
819
0
    }
820
19.5k
    if ((V == 4) || (V == 5)) {
821
        // The spec says the value for the crypt filter key can be anything, and xpdf seems to
822
        // agree.  However, Adobe Reader won't open our files unless we use /StdCF.
823
13.6k
        m->encryption_dictionary["/StmF"] = "/StdCF";
824
13.6k
        m->encryption_dictionary["/StrF"] = "/StdCF";
825
13.6k
        std::string method = (m->encrypt_use_aes ? ((V < 5) ? "/AESV2" : "/AESV3") : "/V2");
826
        // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of
827
        // MacOS won't open encrypted files without it.
828
13.6k
        m->encryption_dictionary["/CF"] = "<< /StdCF << /AuthEvent /DocOpen /CFM " + method +
829
13.6k
            " /Length " + std::string((V < 5) ? "16" : "32") + " >> >>";
830
13.6k
    }
831
832
19.5k
    m->encrypted = true;
833
19.5k
    QPDF::EncryptionData encryption_data(
834
19.5k
        V, R, key_len, P, O, U, OE, UE, Perms, id1, m->encrypt_metadata);
835
19.5k
    if (V < 5) {
836
5.97k
        m->encryption_key = QPDF::compute_encryption_key(user_password, encryption_data);
837
13.6k
    } else {
838
13.6k
        m->encryption_key = encryption_key;
839
13.6k
    }
840
19.5k
}
841
842
void
843
QPDFWriter::setDataKey(int objid)
844
1.24M
{
845
1.24M
    m->cur_data_key = QPDF::compute_data_key(
846
1.24M
        m->encryption_key, objid, 0, m->encrypt_use_aes, m->encryption_V, m->encryption_R);
847
1.24M
}
848
849
unsigned int
850
QPDFWriter::bytesNeeded(long long n)
851
66.2k
{
852
66.2k
    unsigned int bytes = 0;
853
164k
    while (n) {
854
98.4k
        ++bytes;
855
98.4k
        n >>= 8;
856
98.4k
    }
857
66.2k
    return bytes;
858
66.2k
}
859
860
void
861
QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes)
862
1.97M
{
863
1.97M
    if (bytes > sizeof(unsigned long long)) {
864
0
        throw std::logic_error("QPDFWriter::writeBinary called with too many bytes");
865
0
    }
866
1.97M
    unsigned char data[sizeof(unsigned long long)];
867
4.96M
    for (unsigned int i = 0; i < bytes; ++i) {
868
2.98M
        data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff);
869
2.98M
        val >>= 8;
870
2.98M
    }
871
1.97M
    m->pipeline->write(data, bytes);
872
1.97M
}
873
874
void
875
QPDFWriter::writeString(std::string_view str)
876
131M
{
877
131M
    m->pipeline->write(reinterpret_cast<unsigned char const*>(str.data()), str.size());
878
131M
}
879
880
void
881
QPDFWriter::writeBuffer(std::shared_ptr<Buffer>& b)
882
518k
{
883
518k
    m->pipeline->write(b->getBuffer(), b->getSize());
884
518k
}
885
886
void
887
QPDFWriter::writeStringQDF(std::string_view str)
888
42.4M
{
889
42.4M
    if (m->qdf_mode) {
890
7.06M
        m->pipeline->write(reinterpret_cast<unsigned char const*>(str.data()), str.size());
891
7.06M
    }
892
42.4M
}
893
894
void
895
QPDFWriter::writeStringNoQDF(std::string_view str)
896
855k
{
897
855k
    if (!m->qdf_mode) {
898
744k
        m->pipeline->write(reinterpret_cast<unsigned char const*>(str.data()), str.size());
899
744k
    }
900
855k
}
901
902
void
903
QPDFWriter::writePad(size_t nspaces)
904
77.1k
{
905
77.1k
    writeString(std::string(nspaces, ' '));
906
77.1k
}
907
908
Pipeline*
909
QPDFWriter::pushPipeline(Pipeline* p)
910
1.13M
{
911
1.13M
    qpdf_assert_debug(dynamic_cast<Pl_Count*>(p) == nullptr);
912
1.13M
    m->pipeline_stack.push_back(p);
913
1.13M
    return p;
914
1.13M
}
915
916
void
917
QPDFWriter::initializePipelineStack(Pipeline* p)
918
34.3k
{
919
34.3k
    m->pipeline = new Pl_Count("pipeline stack base", p);
920
34.3k
    m->to_delete.push_back(std::shared_ptr<Pipeline>(m->pipeline));
921
34.3k
    m->pipeline_stack.push_back(m->pipeline);
922
34.3k
}
923
924
void
925
QPDFWriter::activatePipelineStack(PipelinePopper& pp)
926
1.27M
{
927
1.27M
    std::string stack_id("stack " + std::to_string(m->next_stack_id));
928
1.27M
    auto* c = new Pl_Count(stack_id.c_str(), m->pipeline_stack.back());
929
1.27M
    ++m->next_stack_id;
930
1.27M
    m->pipeline_stack.push_back(c);
931
1.27M
    m->pipeline = c;
932
1.27M
    pp.stack_id = stack_id;
933
1.27M
}
934
935
QPDFWriter::PipelinePopper::~PipelinePopper()
936
1.28M
{
937
1.28M
    if (stack_id.empty()) {
938
17.0k
        return;
939
17.0k
    }
940
1.27M
    qpdf_assert_debug(qw->m->pipeline_stack.size() >= 2);
941
1.27M
    qw->m->pipeline->finish();
942
1.27M
    qpdf_assert_debug(dynamic_cast<Pl_Count*>(qw->m->pipeline_stack.back()) == qw->m->pipeline);
943
    // It might be possible for this assertion to fail if writeLinearized exits by exception when
944
    // deterministic ID, but I don't think so. As of this writing, this is the only case in which
945
    // two dynamically allocated PipelinePopper objects ever exist at the same time, so the
946
    // assertion will fail if they get popped out of order from automatic destruction.
947
1.27M
    qpdf_assert_debug(qw->m->pipeline->getIdentifier() == stack_id);
948
1.27M
    delete qw->m->pipeline_stack.back();
949
1.27M
    qw->m->pipeline_stack.pop_back();
950
2.40M
    while (dynamic_cast<Pl_Count*>(qw->m->pipeline_stack.back()) == nullptr) {
951
1.13M
        Pipeline* p = qw->m->pipeline_stack.back();
952
1.13M
        if (dynamic_cast<Pl_MD5*>(p) == qw->m->md5_pipeline) {
953
956k
            qw->m->md5_pipeline = nullptr;
954
956k
        }
955
1.13M
        qw->m->pipeline_stack.pop_back();
956
1.13M
        auto* buf = dynamic_cast<Pl_Buffer*>(p);
957
1.13M
        if (bp && buf) {
958
520k
            *bp = buf->getBufferSharedPointer();
959
520k
        }
960
1.13M
        delete p;
961
1.13M
    }
962
1.27M
    qw->m->pipeline = dynamic_cast<Pl_Count*>(qw->m->pipeline_stack.back());
963
1.27M
}
964
965
void
966
QPDFWriter::adjustAESStreamLength(size_t& length)
967
482k
{
968
482k
    if (m->encrypted && (!m->cur_data_key.empty()) && m->encrypt_use_aes) {
969
        // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16.  It will
970
        // also be prepended by 16 bits of random data.
971
242k
        length += 32 - (length & 0xf);
972
242k
    }
973
482k
}
974
975
void
976
QPDFWriter::pushEncryptionFilter(PipelinePopper& pp)
977
481k
{
978
481k
    if (m->encrypted && (!m->cur_data_key.empty())) {
979
291k
        Pipeline* p = nullptr;
980
291k
        if (m->encrypt_use_aes) {
981
242k
            p = new Pl_AES_PDF(
982
242k
                "aes stream encryption",
983
242k
                m->pipeline,
984
242k
                true,
985
242k
                QUtil::unsigned_char_pointer(m->cur_data_key),
986
242k
                m->cur_data_key.length());
987
242k
        } else {
988
49.1k
            p = new Pl_RC4(
989
49.1k
                "rc4 stream encryption",
990
49.1k
                m->pipeline,
991
49.1k
                QUtil::unsigned_char_pointer(m->cur_data_key),
992
49.1k
                QIntC::to_int(m->cur_data_key.length()));
993
49.1k
        }
994
291k
        pushPipeline(p);
995
291k
    }
996
    // Must call this unconditionally so we can call popPipelineStack to balance
997
    // pushEncryptionFilter().
998
481k
    activatePipelineStack(pp);
999
481k
}
1000
1001
void
1002
QPDFWriter::pushDiscardFilter(PipelinePopper& pp)
1003
53.8k
{
1004
53.8k
    pushPipeline(new Pl_Discard());
1005
53.8k
    activatePipelineStack(pp);
1006
53.8k
}
1007
1008
void
1009
QPDFWriter::pushMD5Pipeline(PipelinePopper& pp)
1010
12.9k
{
1011
12.9k
    if (!m->id2.empty()) {
1012
        // Can't happen in the code
1013
0
        throw std::logic_error("Deterministic ID computation enabled after ID"
1014
0
                               " generation has already occurred.");
1015
0
    }
1016
12.9k
    qpdf_assert_debug(m->deterministic_id);
1017
12.9k
    qpdf_assert_debug(m->md5_pipeline == nullptr);
1018
12.9k
    qpdf_assert_debug(m->pipeline->getCount() == 0);
1019
12.9k
    m->md5_pipeline = new Pl_MD5("qpdf md5", m->pipeline);
1020
12.9k
    m->md5_pipeline->persistAcrossFinish(true);
1021
    // Special case code in popPipelineStack clears m->md5_pipeline upon deletion.
1022
12.9k
    pushPipeline(m->md5_pipeline);
1023
12.9k
    activatePipelineStack(pp);
1024
12.9k
}
1025
1026
void
1027
QPDFWriter::computeDeterministicIDData()
1028
11.9k
{
1029
11.9k
    qpdf_assert_debug(m->md5_pipeline != nullptr);
1030
11.9k
    qpdf_assert_debug(m->deterministic_id_data.empty());
1031
11.9k
    m->deterministic_id_data = m->md5_pipeline->getHexDigest();
1032
11.9k
    m->md5_pipeline->enable(false);
1033
11.9k
}
1034
1035
int
1036
QPDFWriter::openObject(int objid)
1037
1.40M
{
1038
1.40M
    if (objid == 0) {
1039
5.75k
        objid = m->next_objid++;
1040
5.75k
    }
1041
1.40M
    m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount());
1042
1.40M
    writeString(std::to_string(objid));
1043
1.40M
    writeString(" 0 obj\n");
1044
1.40M
    return objid;
1045
1.40M
}
1046
1047
void
1048
QPDFWriter::closeObject(int objid)
1049
1.40M
{
1050
    // Write a newline before endobj as it makes the file easier to repair.
1051
1.40M
    writeString("\nendobj\n");
1052
1.40M
    writeStringQDF("\n");
1053
1.40M
    auto& new_obj = m->new_obj[objid];
1054
1.40M
    new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset();
1055
1.40M
}
1056
1057
void
1058
QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const& og)
1059
457k
{
1060
457k
    int objid = og.getObj();
1061
457k
    if ((og.getGen() != 0) || (m->object_stream_to_objects.count(objid) == 0)) {
1062
        // This is not an object stream.
1063
445k
        return;
1064
445k
    }
1065
1066
    // Reserve numbers for the objects that belong to this object stream.
1067
190k
    for (auto const& iter: m->object_stream_to_objects[objid]) {
1068
190k
        m->obj[iter].renumber = m->next_objid++;
1069
190k
    }
1070
11.7k
}
1071
1072
void
1073
QPDFWriter::enqueueObject(QPDFObjectHandle object)
1074
51.9M
{
1075
51.9M
    if (object.isIndirect()) {
1076
        // This owner check can only be done for indirect objects. It is possible for a direct
1077
        // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle from
1078
        // one file was insert into another file without copying. Doing that is safe even if the
1079
        // original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from its owner.
1080
3.23M
        if (object.getOwningQPDF() != &(m->pdf)) {
1081
0
            QTC::TC("qpdf", "QPDFWriter foreign object");
1082
0
            throw std::logic_error("QPDFObjectHandle from different QPDF found while writing.  Use "
1083
0
                                   "QPDF::copyForeignObject to add objects from another file.");
1084
0
        }
1085
1086
3.23M
        if (m->qdf_mode && object.isStreamOfType("/XRef")) {
1087
            // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so
1088
            // will confuse fix-qdf, which expects to see only one XRef stream at the end of the
1089
            // file. This case can occur when creating a QDF from a file with object streams when
1090
            // preserving unreferenced objects since the old cross reference streams are not
1091
            // actually referenced by object number.
1092
3.06k
            QTC::TC("qpdf", "QPDFWriter ignore XRef in qdf mode");
1093
3.06k
            return;
1094
3.06k
        }
1095
1096
3.23M
        QPDFObjGen og = object.getObjGen();
1097
3.23M
        auto& obj = m->obj[og];
1098
1099
3.23M
        if (obj.renumber == 0) {
1100
832k
            if (obj.object_stream > 0) {
1101
                // This is in an object stream.  Don't process it here.  Instead, enqueue the object
1102
                // stream.  Object streams always have generation 0.
1103
                // Detect loops by storing invalid object ID -1, which will get overwritten later.
1104
3.36k
                obj.renumber = -1;
1105
3.36k
                enqueueObject(m->pdf.getObject(obj.object_stream, 0));
1106
829k
            } else {
1107
829k
                m->object_queue.push_back(object);
1108
829k
                obj.renumber = m->next_objid++;
1109
1110
829k
                if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) {
1111
                    // For linearized files, uncompressed objects go at end, and we take care of
1112
                    // assigning numbers to them elsewhere.
1113
11.7k
                    if (!m->linearized) {
1114
2.10k
                        assignCompressedObjectNumbers(og);
1115
2.10k
                    }
1116
817k
                } else if ((!m->direct_stream_lengths) && object.isStream()) {
1117
                    // reserve next object ID for length
1118
72.7k
                    ++m->next_objid;
1119
72.7k
                }
1120
829k
            }
1121
2.39M
        } else if (obj.renumber == -1) {
1122
            // This can happen if a specially constructed file indicates that an object stream is
1123
            // inside itself.
1124
3.75k
        }
1125
3.23M
        return;
1126
48.6M
    } else if (!m->linearized) {
1127
48.6M
        if (object.isArray()) {
1128
29.8M
            for (auto& item: object.getArrayAsVector()) {
1129
29.8M
                enqueueObject(item);
1130
29.8M
            }
1131
47.7M
        } else if (object.isDictionary()) {
1132
7.14M
            for (auto& item: object.getDictAsMap()) {
1133
7.14M
                if (!item.second.isNull()) {
1134
6.74M
                    enqueueObject(item.second);
1135
6.74M
                }
1136
7.14M
            }
1137
2.06M
        }
1138
48.6M
    } else {
1139
        // ignore
1140
373
    }
1141
51.9M
}
1142
1143
void
1144
QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
1145
39.1M
{
1146
39.1M
    if (!m->linearized) {
1147
14.7M
        enqueueObject(child);
1148
14.7M
    }
1149
39.1M
    if (child.isIndirect()) {
1150
4.61M
        writeString(std::to_string(m->obj[child].renumber));
1151
4.61M
        writeString(" 0 R");
1152
34.5M
    } else {
1153
34.5M
        unparseObject(child, level, flags);
1154
34.5M
    }
1155
39.1M
}
1156
1157
void
1158
QPDFWriter::writeTrailer(
1159
    trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass)
1160
74.7k
{
1161
74.7k
    QPDFObjectHandle trailer = getTrimmedTrailer();
1162
74.7k
    if (xref_stream) {
1163
22.0k
        m->cur_data_key.clear();
1164
52.6k
    } else {
1165
52.6k
        writeString("trailer <<");
1166
52.6k
    }
1167
74.7k
    writeStringQDF("\n");
1168
74.7k
    if (which == t_lin_second) {
1169
29.9k
        writeString(" /Size ");
1170
29.9k
        writeString(std::to_string(size));
1171
44.7k
    } else {
1172
178k
        for (auto const& key: trailer.getKeys()) {
1173
178k
            writeStringQDF("  ");
1174
178k
            writeStringNoQDF(" ");
1175
178k
            writeString(QPDF_Name::normalizeName(key));
1176
178k
            writeString(" ");
1177
178k
            if (key == "/Size") {
1178
36.7k
                writeString(std::to_string(size));
1179
36.7k
                if (which == t_lin_first) {
1180
25.5k
                    writeString(" /Prev ");
1181
25.5k
                    qpdf_offset_t pos = m->pipeline->getCount();
1182
25.5k
                    writeString(std::to_string(prev));
1183
25.5k
                    writePad(QIntC::to_size(pos - m->pipeline->getCount() + 21));
1184
25.5k
                }
1185
142k
            } else {
1186
142k
                unparseChild(trailer.getKey(key), 1, 0);
1187
142k
            }
1188
178k
            writeStringQDF("\n");
1189
178k
        }
1190
44.7k
    }
1191
1192
    // Write ID
1193
74.7k
    writeStringQDF(" ");
1194
74.7k
    writeString(" /ID [");
1195
74.7k
    if (linearization_pass == 1) {
1196
30.9k
        std::string original_id1 = getOriginalID1();
1197
30.9k
        if (original_id1.empty()) {
1198
10.8k
            writeString("<00000000000000000000000000000000>");
1199
20.0k
        } else {
1200
            // Write a string of zeroes equal in length to the representation of the original ID.
1201
            // While writing the original ID would have the same number of bytes, it would cause a
1202
            // change to the deterministic ID generated by older versions of the software that
1203
            // hard-coded the length of the ID to 16 bytes.
1204
20.0k
            writeString("<");
1205
20.0k
            size_t len = QPDF_String(original_id1).unparse(true).length() - 2;
1206
1.45M
            for (size_t i = 0; i < len; ++i) {
1207
1.43M
                writeString("0");
1208
1.43M
            }
1209
20.0k
            writeString(">");
1210
20.0k
        }
1211
30.9k
        writeString("<00000000000000000000000000000000>");
1212
43.7k
    } else {
1213
43.7k
        if ((linearization_pass == 0) && (m->deterministic_id)) {
1214
8.07k
            computeDeterministicIDData();
1215
8.07k
        }
1216
43.7k
        generateID();
1217
43.7k
        writeString(QPDF_String(m->id1).unparse(true));
1218
43.7k
        writeString(QPDF_String(m->id2).unparse(true));
1219
43.7k
    }
1220
74.7k
    writeString("]");
1221
1222
74.7k
    if (which != t_lin_second) {
1223
        // Write reference to encryption dictionary
1224
43.1k
        if (m->encrypted) {
1225
28.1k
            writeString(" /Encrypt ");
1226
28.1k
            writeString(std::to_string(m->encryption_dict_objid));
1227
28.1k
            writeString(" 0 R");
1228
28.1k
        }
1229
43.1k
    }
1230
1231
74.7k
    writeStringQDF("\n");
1232
74.7k
    writeStringNoQDF(" ");
1233
74.7k
    writeString(">>");
1234
74.7k
}
1235
1236
bool
1237
QPDFWriter::willFilterStream(
1238
    QPDFObjectHandle stream,
1239
    bool& compress_stream, // out only
1240
    bool& is_metadata,     // out only
1241
    std::shared_ptr<Buffer>* stream_data)
1242
633k
{
1243
633k
    compress_stream = false;
1244
633k
    is_metadata = false;
1245
1246
633k
    QPDFObjGen old_og = stream.getObjGen();
1247
633k
    QPDFObjectHandle stream_dict = stream.getDict();
1248
1249
633k
    if (stream_dict.isDictionaryOfType("/Metadata")) {
1250
14.6k
        is_metadata = true;
1251
14.6k
    }
1252
633k
    bool filter = (stream.isDataModified() || m->compress_streams || m->stream_decode_level);
1253
633k
    bool filter_on_write = stream.getFilterOnWrite();
1254
633k
    if (!filter_on_write) {
1255
27.9k
        QTC::TC("qpdf", "QPDFWriter getFilterOnWrite false");
1256
27.9k
        filter = false;
1257
27.9k
    }
1258
633k
    if (filter_on_write && m->compress_streams) {
1259
        // Don't filter if the stream is already compressed with FlateDecode. This way we don't make
1260
        // it worse if the original file used a better Flate algorithm, and we don't spend time and
1261
        // CPU cycles uncompressing and recompressing stuff. This can be overridden with
1262
        // setRecompressFlate(true).
1263
534k
        QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
1264
534k
        if ((!m->recompress_flate) && (!stream.isDataModified()) && filter_obj.isName() &&
1265
534k
            ((filter_obj.getName() == "/FlateDecode") || (filter_obj.getName() == "/Fl"))) {
1266
240k
            QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode");
1267
240k
            filter = false;
1268
240k
        }
1269
534k
    }
1270
633k
    bool normalize = false;
1271
633k
    bool uncompress = false;
1272
633k
    if (filter_on_write && is_metadata && ((!m->encrypted) || (m->encrypt_metadata == false))) {
1273
5.50k
        QTC::TC("qpdf", "QPDFWriter not compressing metadata");
1274
5.50k
        filter = true;
1275
5.50k
        compress_stream = false;
1276
5.50k
        uncompress = true;
1277
628k
    } else if (filter_on_write && m->normalize_content && m->normalized_streams.count(old_og)) {
1278
15.6k
        normalize = true;
1279
15.6k
        filter = true;
1280
612k
    } else if (filter_on_write && filter && m->compress_streams) {
1281
289k
        compress_stream = true;
1282
289k
        QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");
1283
289k
    }
1284
1285
633k
    bool filtered = false;
1286
667k
    for (int attempt = 1; attempt <= 2; ++attempt) {
1287
667k
        pushPipeline(new Pl_Buffer("stream data"));
1288
667k
        PipelinePopper pp_stream_data(this, stream_data);
1289
667k
        activatePipelineStack(pp_stream_data);
1290
667k
        try {
1291
667k
            filtered = stream.pipeStreamData(
1292
667k
                m->pipeline,
1293
667k
                (((filter && normalize) ? qpdf_ef_normalize : 0) |
1294
667k
                 ((filter && compress_stream) ? qpdf_ef_compress : 0)),
1295
667k
                (filter ? (uncompress ? qpdf_dl_all : m->stream_decode_level) : qpdf_dl_none),
1296
667k
                false,
1297
667k
                (attempt == 1));
1298
667k
        } catch (std::runtime_error& e) {
1299
455
            throw std::runtime_error(
1300
455
                "error while getting stream data for " + stream.unparse() + ": " + e.what());
1301
455
        }
1302
667k
        if (filter && !filtered) {
1303
            // Try again
1304
34.2k
            filter = false;
1305
34.2k
            stream.setFilterOnWrite(false);
1306
633k
        } else {
1307
633k
            break;
1308
633k
        }
1309
667k
    }
1310
633k
    if (!filtered) {
1311
303k
        compress_stream = false;
1312
303k
    }
1313
633k
    return filtered;
1314
633k
}
1315
1316
void
1317
QPDFWriter::unparseObject(
1318
    QPDFObjectHandle object, int level, int flags, size_t stream_length, bool compress)
1319
36.8M
{
1320
36.8M
    QPDFObjGen old_og = object.getObjGen();
1321
36.8M
    int child_flags = flags & ~f_stream;
1322
36.8M
    if (level < 0) {
1323
0
        throw std::logic_error("invalid level in QPDFWriter::unparseObject");
1324
0
    }
1325
    // For non-qdf, "indent" is a single space between tokens. For qdf, indent includes the
1326
    // preceding newline.
1327
36.8M
    std::string indent = " ";
1328
36.8M
    if (m->qdf_mode) {
1329
6.11M
        indent.append(static_cast<size_t>(2 * level), ' ');
1330
6.11M
        indent[0] = '\n';
1331
6.11M
    }
1332
1333
36.8M
    if (auto const tc = object.getTypeCode(); tc == ::ot_array) {
1334
        // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the
1335
        // [ in the /H key of the linearization parameter dictionary.  We'll do this unconditionally
1336
        // for all arrays because it looks nicer and doesn't make the files that much bigger.
1337
1.39M
        writeString("[");
1338
28.7M
        for (auto const& item: object.getArrayAsVector()) {
1339
28.7M
            writeString(indent);
1340
28.7M
            writeStringQDF("  ");
1341
28.7M
            unparseChild(item, level + 1, child_flags);
1342
28.7M
        }
1343
1.39M
        writeString(indent);
1344
1.39M
        writeString("]");
1345
35.4M
    } else if (tc == ::ot_dictionary) {
1346
        // Make a shallow copy of this object so we can modify it safely without affecting the
1347
        // original. This code has logic to skip certain keys in agreement with prepareFileForWrite
1348
        // and with skip_stream_parameters so that replacing them doesn't leave unreferenced objects
1349
        // in the output. We can use unsafeShallowCopy here because all we are doing is removing or
1350
        // replacing top-level keys.
1351
2.85M
        object = object.unsafeShallowCopy();
1352
1353
        // Handle special cases for specific dictionaries.
1354
1355
        // Extensions dictionaries.
1356
1357
        // We have one of several cases:
1358
        //
1359
        // * We need ADBE
1360
        //    - We already have Extensions
1361
        //       - If it has the right ADBE, preserve it
1362
        //       - Otherwise, replace ADBE
1363
        //    - We don't have Extensions: create one from scratch
1364
        // * We don't want ADBE
1365
        //    - We already have Extensions
1366
        //       - If it only has ADBE, remove it
1367
        //       - If it has other things, keep those and remove ADBE
1368
        //    - We have no extensions: no action required
1369
        //
1370
        // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE
1371
        // dictionary, so we can modify in place.
1372
1373
2.85M
        const bool is_root = (old_og == m->root_og);
1374
2.85M
        bool have_extensions_other = false;
1375
2.85M
        bool have_extensions_adbe = false;
1376
1377
2.85M
        QPDFObjectHandle extensions;
1378
2.85M
        if (is_root) {
1379
43.9k
            if (object.hasKey("/Extensions") && object.getKey("/Extensions").isDictionary()) {
1380
844
                extensions = object.getKey("/Extensions");
1381
844
            }
1382
43.9k
        }
1383
1384
2.85M
        if (extensions.isInitialized()) {
1385
844
            std::set<std::string> keys = extensions.getKeys();
1386
844
            if (keys.count("/ADBE") > 0) {
1387
782
                have_extensions_adbe = true;
1388
782
                keys.erase("/ADBE");
1389
782
            }
1390
844
            if (keys.size() > 0) {
1391
130
                have_extensions_other = true;
1392
130
            }
1393
844
        }
1394
1395
2.85M
        bool need_extensions_adbe = (m->final_extension_level > 0);
1396
1397
2.85M
        if (is_root) {
1398
43.9k
            if (need_extensions_adbe) {
1399
22.2k
                if (!(have_extensions_other || have_extensions_adbe)) {
1400
                    // We need Extensions and don't have it.  Create it here.
1401
21.5k
                    QTC::TC("qpdf", "QPDFWriter create Extensions", m->qdf_mode ? 0 : 1);
1402
21.5k
                    extensions = object.replaceKeyAndGetNew(
1403
21.5k
                        "/Extensions", QPDFObjectHandle::newDictionary());
1404
21.5k
                }
1405
22.2k
            } else if (!have_extensions_other) {
1406
                // We have Extensions dictionary and don't want one.
1407
21.6k
                if (have_extensions_adbe) {
1408
105
                    QTC::TC("qpdf", "QPDFWriter remove existing Extensions");
1409
105
                    object.removeKey("/Extensions");
1410
105
                    extensions = QPDFObjectHandle(); // uninitialized
1411
105
                }
1412
21.6k
            }
1413
43.9k
        }
1414
1415
2.85M
        if (extensions.isInitialized()) {
1416
22.2k
            QTC::TC("qpdf", "QPDFWriter preserve Extensions");
1417
22.2k
            QPDFObjectHandle adbe = extensions.getKey("/ADBE");
1418
22.2k
            if (adbe.isDictionary() &&
1419
22.2k
                adbe.getKey("/BaseVersion").isNameAndEquals("/" + m->final_pdf_version) &&
1420
22.2k
                adbe.getKey("/ExtensionLevel").isInteger() &&
1421
22.2k
                (adbe.getKey("/ExtensionLevel").getIntValue() == m->final_extension_level)) {
1422
420
                QTC::TC("qpdf", "QPDFWriter preserve ADBE");
1423
21.8k
            } else {
1424
21.8k
                if (need_extensions_adbe) {
1425
21.7k
                    extensions.replaceKey(
1426
21.7k
                        "/ADBE",
1427
21.7k
                        QPDFObjectHandle::parse(
1428
21.7k
                            "<< /BaseVersion /" + m->final_pdf_version + " /ExtensionLevel " +
1429
21.7k
                            std::to_string(m->final_extension_level) + " >>"));
1430
21.7k
                } else {
1431
48
                    QTC::TC("qpdf", "QPDFWriter remove ADBE");
1432
48
                    extensions.removeKey("/ADBE");
1433
48
                }
1434
21.8k
            }
1435
22.2k
        }
1436
1437
        // Stream dictionaries.
1438
1439
2.85M
        if (flags & f_stream) {
1440
            // Suppress /Length since we will write it manually
1441
448k
            object.removeKey("/Length");
1442
1443
            // If /DecodeParms is an empty list, remove it.
1444
448k
            if (object.getKey("/DecodeParms").isArray() &&
1445
448k
                (0 == object.getKey("/DecodeParms").getArrayNItems())) {
1446
239
                QTC::TC("qpdf", "QPDFWriter remove empty DecodeParms");
1447
239
                object.removeKey("/DecodeParms");
1448
239
            }
1449
1450
448k
            if (flags & f_filtered) {
1451
                // We will supply our own filter and decode
1452
                // parameters.
1453
240k
                object.removeKey("/Filter");
1454
240k
                object.removeKey("/DecodeParms");
1455
240k
            } else {
1456
                // Make sure, no matter what else we have, that we don't have /Crypt in the output
1457
                // filters.
1458
207k
                QPDFObjectHandle filter = object.getKey("/Filter");
1459
207k
                QPDFObjectHandle decode_parms = object.getKey("/DecodeParms");
1460
207k
                if (filter.isOrHasName("/Crypt")) {
1461
79
                    if (filter.isName()) {
1462
15
                        object.removeKey("/Filter");
1463
15
                        object.removeKey("/DecodeParms");
1464
64
                    } else {
1465
64
                        int idx = -1;
1466
143
                        for (int i = 0; i < filter.getArrayNItems(); ++i) {
1467
143
                            QPDFObjectHandle item = filter.getArrayItem(i);
1468
143
                            if (item.isNameAndEquals("/Crypt")) {
1469
64
                                idx = i;
1470
64
                                break;
1471
64
                            }
1472
143
                        }
1473
64
                        if (idx >= 0) {
1474
                            // If filter is an array, then the code in QPDF_Stream has already
1475
                            // verified that DecodeParms and Filters are arrays of the same length,
1476
                            // but if they weren't for some reason, eraseItem does type and bounds
1477
                            // checking.
1478
64
                            QTC::TC("qpdf", "QPDFWriter remove Crypt");
1479
64
                            filter.eraseItem(idx);
1480
64
                            decode_parms.eraseItem(idx);
1481
64
                        }
1482
64
                    }
1483
79
                }
1484
207k
            }
1485
448k
        }
1486
1487
2.85M
        writeString("<<");
1488
1489
10.8M
        for (auto& item: object.getDictAsMap()) {
1490
10.8M
            if (!item.second.isNull()) {
1491
10.2M
                auto const& key = item.first;
1492
10.2M
                writeString(indent);
1493
10.2M
                writeStringQDF("  ");
1494
10.2M
                writeString(QPDF_Name::normalizeName(key));
1495
10.2M
                writeString(" ");
1496
10.2M
                if (key == "/Contents" && object.isDictionaryOfType("/Sig") &&
1497
10.2M
                    object.hasKey("/ByteRange")) {
1498
111
                    QTC::TC("qpdf", "QPDFWriter no encryption sig contents");
1499
111
                    unparseChild(
1500
111
                        item.second, level + 1, child_flags | f_hex_string | f_no_encryption);
1501
10.2M
                } else {
1502
10.2M
                    unparseChild(item.second, level + 1, child_flags);
1503
10.2M
                }
1504
10.2M
            }
1505
10.8M
        }
1506
1507
2.85M
        if (flags & f_stream) {
1508
448k
            writeString(indent);
1509
448k
            writeStringQDF("  ");
1510
448k
            writeString("/Length ");
1511
1512
448k
            if (m->direct_stream_lengths) {
1513
377k
                writeString(std::to_string(stream_length));
1514
377k
            } else {
1515
70.8k
                writeString(std::to_string(m->cur_stream_length_id));
1516
70.8k
                writeString(" 0 R");
1517
70.8k
            }
1518
448k
            if (compress && (flags & f_filtered)) {
1519
177k
                writeString(indent);
1520
177k
                writeStringQDF("  ");
1521
177k
                writeString("/Filter /FlateDecode");
1522
177k
            }
1523
448k
        }
1524
1525
2.85M
        writeString(indent);
1526
2.85M
        writeString(">>");
1527
32.5M
    } else if (tc == ::ot_stream) {
1528
        // Write stream data to a buffer.
1529
448k
        if (!m->direct_stream_lengths) {
1530
70.9k
            m->cur_stream_length_id = m->obj[old_og].renumber + 1;
1531
70.9k
        }
1532
1533
448k
        flags |= f_stream;
1534
448k
        bool compress_stream = false;
1535
448k
        bool is_metadata = false;
1536
448k
        std::shared_ptr<Buffer> stream_data;
1537
448k
        if (willFilterStream(object, compress_stream, is_metadata, &stream_data)) {
1538
240k
            flags |= f_filtered;
1539
240k
        }
1540
448k
        QPDFObjectHandle stream_dict = object.getDict();
1541
1542
448k
        m->cur_stream_length = stream_data->getSize();
1543
448k
        if (is_metadata && m->encrypted && (!m->encrypt_metadata)) {
1544
            // Don't encrypt stream data for the metadata stream
1545
0
            m->cur_data_key.clear();
1546
0
        }
1547
448k
        adjustAESStreamLength(m->cur_stream_length);
1548
448k
        unparseObject(stream_dict, 0, flags, m->cur_stream_length, compress_stream);
1549
448k
        unsigned char last_char = '\0';
1550
448k
        writeString("\nstream\n");
1551
448k
        {
1552
448k
            PipelinePopper pp_enc(this);
1553
448k
            pushEncryptionFilter(pp_enc);
1554
448k
            writeBuffer(stream_data);
1555
448k
            last_char = m->pipeline->getLastChar();
1556
448k
        }
1557
1558
448k
        if (m->newline_before_endstream || (m->qdf_mode && (last_char != '\n'))) {
1559
27.1k
            writeString("\n");
1560
27.1k
            m->added_newline = true;
1561
421k
        } else {
1562
421k
            m->added_newline = false;
1563
421k
        }
1564
448k
        writeString("endstream");
1565
32.1M
    } else if (tc == ::ot_string) {
1566
1.06M
        std::string val;
1567
1.06M
        if (m->encrypted && (!(flags & f_in_ostream)) && (!(flags & f_no_encryption)) &&
1568
1.06M
            (!m->cur_data_key.empty())) {
1569
570k
            val = object.getStringValue();
1570
570k
            if (m->encrypt_use_aes) {
1571
472k
                Pl_Buffer bufpl("encrypted string");
1572
472k
                Pl_AES_PDF pl(
1573
472k
                    "aes encrypt string",
1574
472k
                    &bufpl,
1575
472k
                    true,
1576
472k
                    QUtil::unsigned_char_pointer(m->cur_data_key),
1577
472k
                    m->cur_data_key.length());
1578
472k
                pl.writeString(val);
1579
472k
                pl.finish();
1580
472k
                val = QPDF_String(bufpl.getString()).unparse(true);
1581
472k
            } else {
1582
98.4k
                auto tmp_ph = QUtil::make_unique_cstr(val);
1583
98.4k
                char* tmp = tmp_ph.get();
1584
98.4k
                size_t vlen = val.length();
1585
98.4k
                RC4 rc4(
1586
98.4k
                    QUtil::unsigned_char_pointer(m->cur_data_key),
1587
98.4k
                    QIntC::to_int(m->cur_data_key.length()));
1588
98.4k
                auto data = QUtil::unsigned_char_pointer(tmp);
1589
98.4k
                rc4.process(data, vlen, data);
1590
98.4k
                val = QPDF_String(std::string(tmp, vlen)).unparse();
1591
98.4k
            }
1592
570k
        } else if (flags & f_hex_string) {
1593
111
            val = QPDF_String(object.getStringValue()).unparse(true);
1594
496k
        } else {
1595
496k
            val = object.unparseResolved();
1596
496k
        }
1597
1.06M
        writeString(val);
1598
31.0M
    } else {
1599
31.0M
        writeString(object.unparseResolved());
1600
31.0M
    }
1601
36.8M
}
1602
1603
void
1604
QPDFWriter::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj)
1605
37.9k
{
1606
660k
    for (size_t i = 0; i < offsets.size(); ++i) {
1607
622k
        if (i != 0) {
1608
585k
            writeStringQDF("\n");
1609
585k
            writeStringNoQDF(" ");
1610
585k
        }
1611
622k
        writeString(std::to_string(i + QIntC::to_size(first_obj)));
1612
622k
        writeString(" ");
1613
622k
        writeString(std::to_string(offsets.at(i)));
1614
622k
    }
1615
37.9k
    writeString("\n");
1616
37.9k
}
1617
1618
void
1619
QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1620
18.9k
{
1621
    // Note: object might be null if this is a place-holder for an object stream that we are
1622
    // generating from scratch.
1623
1624
18.9k
    QPDFObjGen old_og = object.getObjGen();
1625
18.9k
    qpdf_assert_debug(old_og.getGen() == 0);
1626
18.9k
    int old_id = old_og.getObj();
1627
18.9k
    int new_stream_id = m->obj[old_og].renumber;
1628
1629
18.9k
    std::vector<qpdf_offset_t> offsets;
1630
18.9k
    qpdf_offset_t first = 0;
1631
1632
    // Generate stream itself.  We have to do this in two passes so we can calculate offsets in the
1633
    // first pass.
1634
18.9k
    std::shared_ptr<Buffer> stream_buffer;
1635
18.9k
    int first_obj = -1;
1636
18.9k
    bool compressed = false;
1637
56.8k
    for (int pass = 1; pass <= 2; ++pass) {
1638
        // stream_buffer will be initialized only for pass 2
1639
37.9k
        PipelinePopper pp_ostream(this, &stream_buffer);
1640
37.9k
        if (pass == 1) {
1641
18.9k
            pushDiscardFilter(pp_ostream);
1642
18.9k
        } else {
1643
            // Adjust offsets to skip over comment before first object
1644
18.9k
            first = offsets.at(0);
1645
311k
            for (auto& iter: offsets) {
1646
311k
                iter -= first;
1647
311k
            }
1648
1649
            // Take one pass at writing pairs of numbers so we can get their size information
1650
18.9k
            {
1651
18.9k
                PipelinePopper pp_discard(this);
1652
18.9k
                pushDiscardFilter(pp_discard);
1653
18.9k
                writeObjectStreamOffsets(offsets, first_obj);
1654
18.9k
                first += m->pipeline->getCount();
1655
18.9k
            }
1656
1657
            // Set up a stream to write the stream data into a buffer.
1658
18.9k
            Pipeline* next = pushPipeline(new Pl_Buffer("object stream"));
1659
18.9k
            if (m->compress_streams && !m->qdf_mode) {
1660
16.8k
                compressed = true;
1661
16.8k
                next =
1662
16.8k
                    pushPipeline(new Pl_Flate("compress object stream", next, Pl_Flate::a_deflate));
1663
16.8k
            }
1664
18.9k
            activatePipelineStack(pp_ostream);
1665
18.9k
            writeObjectStreamOffsets(offsets, first_obj);
1666
18.9k
        }
1667
1668
37.9k
        int count = -1;
1669
622k
        for (auto const& obj: m->object_stream_to_objects[old_id]) {
1670
622k
            ++count;
1671
622k
            int new_obj = m->obj[obj].renumber;
1672
622k
            if (first_obj == -1) {
1673
18.9k
                first_obj = new_obj;
1674
18.9k
            }
1675
622k
            if (m->qdf_mode) {
1676
70.6k
                writeString(
1677
70.6k
                    "%% Object stream: object " + std::to_string(new_obj) + ", index " +
1678
70.6k
                    std::to_string(count));
1679
70.6k
                if (!m->suppress_original_object_ids) {
1680
70.6k
                    writeString("; original object ID: " + std::to_string(obj.getObj()));
1681
                    // For compatibility, only write the generation if non-zero.  While object
1682
                    // streams only allow objects with generation 0, if we are generating object
1683
                    // streams, the old object could have a non-zero generation.
1684
70.6k
                    if (obj.getGen() != 0) {
1685
0
                        QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
1686
0
                        writeString(" " + std::to_string(obj.getGen()));
1687
0
                    }
1688
70.6k
                }
1689
70.6k
                writeString("\n");
1690
70.6k
            }
1691
622k
            if (pass == 1) {
1692
311k
                offsets.push_back(m->pipeline->getCount());
1693
                // To avoid double-counting objects being written in object streams for progress
1694
                // reporting, decrement in pass 1.
1695
311k
                indicateProgress(true, false);
1696
311k
            }
1697
622k
            QPDFObjectHandle obj_to_write = m->pdf.getObject(obj);
1698
622k
            if (obj_to_write.isStream()) {
1699
                // This condition occurred in a fuzz input. Ideally we should block it at parse
1700
                // time, but it's not clear to me how to construct a case for this.
1701
0
                QTC::TC("qpdf", "QPDFWriter stream in ostream");
1702
0
                obj_to_write.warnIfPossible("stream found inside object stream; treating as null");
1703
0
                obj_to_write = QPDFObjectHandle::newNull();
1704
0
            }
1705
622k
            writeObject(obj_to_write, count);
1706
1707
622k
            m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count);
1708
622k
        }
1709
37.9k
    }
1710
1711
    // Write the object
1712
18.9k
    openObject(new_stream_id);
1713
18.9k
    setDataKey(new_stream_id);
1714
18.9k
    writeString("<<");
1715
18.9k
    writeStringQDF("\n ");
1716
18.9k
    writeString(" /Type /ObjStm");
1717
18.9k
    writeStringQDF("\n ");
1718
18.9k
    size_t length = stream_buffer->getSize();
1719
18.9k
    adjustAESStreamLength(length);
1720
18.9k
    writeString(" /Length " + std::to_string(length));
1721
18.9k
    writeStringQDF("\n ");
1722
18.9k
    if (compressed) {
1723
16.8k
        writeString(" /Filter /FlateDecode");
1724
16.8k
    }
1725
18.9k
    writeString(" /N " + std::to_string(offsets.size()));
1726
18.9k
    writeStringQDF("\n ");
1727
18.9k
    writeString(" /First " + std::to_string(first));
1728
18.9k
    if (!object.isNull()) {
1729
        // If the original object has an /Extends key, preserve it.
1730
10.6k
        QPDFObjectHandle dict = object.getDict();
1731
10.6k
        QPDFObjectHandle extends = dict.getKey("/Extends");
1732
10.6k
        if (extends.isIndirect()) {
1733
646
            QTC::TC("qpdf", "QPDFWriter copy Extends");
1734
646
            writeStringQDF("\n ");
1735
646
            writeString(" /Extends ");
1736
646
            unparseChild(extends, 1, f_in_ostream);
1737
646
        }
1738
10.6k
    }
1739
18.9k
    writeStringQDF("\n");
1740
18.9k
    writeStringNoQDF(" ");
1741
18.9k
    writeString(">>\nstream\n");
1742
18.9k
    if (m->encrypted) {
1743
8.66k
        QTC::TC("qpdf", "QPDFWriter encrypt object stream");
1744
8.66k
    }
1745
18.9k
    {
1746
18.9k
        PipelinePopper pp_enc(this);
1747
18.9k
        pushEncryptionFilter(pp_enc);
1748
18.9k
        writeBuffer(stream_buffer);
1749
18.9k
    }
1750
18.9k
    if (m->newline_before_endstream) {
1751
0
        writeString("\n");
1752
0
    }
1753
18.9k
    writeString("endstream");
1754
18.9k
    m->cur_data_key.clear();
1755
18.9k
    closeObject(new_stream_id);
1756
18.9k
}
1757
1758
void
1759
QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
1760
1.85M
{
1761
1.85M
    QPDFObjGen old_og = object.getObjGen();
1762
1763
1.85M
    if ((object_stream_index == -1) && (old_og.getGen() == 0) &&
1764
1.85M
        (m->object_stream_to_objects.count(old_og.getObj()))) {
1765
18.9k
        writeObjectStream(object);
1766
18.9k
        return;
1767
18.9k
    }
1768
1769
1.83M
    indicateProgress(false, false);
1770
1.83M
    auto new_id = m->obj[old_og].renumber;
1771
1.83M
    if (m->qdf_mode) {
1772
289k
        if (m->page_object_to_seq.count(old_og)) {
1773
33.0k
            writeString("%% Page ");
1774
33.0k
            writeString(std::to_string(m->page_object_to_seq[old_og]));
1775
33.0k
            writeString("\n");
1776
33.0k
        }
1777
289k
        if (m->contents_to_page_seq.count(old_og)) {
1778
16.8k
            writeString("%% Contents for page ");
1779
16.8k
            writeString(std::to_string(m->contents_to_page_seq[old_og]));
1780
16.8k
            writeString("\n");
1781
16.8k
        }
1782
289k
    }
1783
1.83M
    if (object_stream_index == -1) {
1784
1.21M
        if (m->qdf_mode && (!m->suppress_original_object_ids)) {
1785
218k
            writeString("%% Original object ID: " + object.getObjGen().unparse(' ') + "\n");
1786
218k
        }
1787
1.21M
        openObject(new_id);
1788
1.21M
        setDataKey(new_id);
1789
1.21M
        unparseObject(object, 0, 0);
1790
1.21M
        m->cur_data_key.clear();
1791
1.21M
        closeObject(new_id);
1792
1.21M
    } else {
1793
622k
        unparseObject(object, 0, f_in_ostream);
1794
622k
        writeString("\n");
1795
622k
    }
1796
1797
1.83M
    if ((!m->direct_stream_lengths) && object.isStream()) {
1798
70.8k
        if (m->qdf_mode) {
1799
70.8k
            if (m->added_newline) {
1800
27.1k
                writeString("%QDF: ignore_newline\n");
1801
27.1k
            }
1802
70.8k
        }
1803
70.8k
        openObject(new_id + 1);
1804
70.8k
        writeString(std::to_string(m->cur_stream_length));
1805
70.8k
        closeObject(new_id + 1);
1806
70.8k
    }
1807
1.83M
}
1808
1809
std::string
1810
QPDFWriter::getOriginalID1()
1811
63.3k
{
1812
63.3k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1813
63.3k
    if (trailer.hasKey("/ID")) {
1814
41.5k
        return trailer.getKey("/ID").getArrayItem(0).getStringValue();
1815
41.5k
    } else {
1816
21.8k
        return "";
1817
21.8k
    }
1818
63.3k
}
1819
1820
void
1821
QPDFWriter::generateID()
1822
64.2k
{
1823
    // Generate the ID lazily so that we can handle the user's preference to use static or
1824
    // deterministic ID generation.
1825
1826
64.2k
    if (!m->id2.empty()) {
1827
31.8k
        return;
1828
31.8k
    }
1829
1830
32.4k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1831
1832
32.4k
    std::string result;
1833
1834
32.4k
    if (m->static_id) {
1835
        // For test suite use only...
1836
20.4k
        static unsigned char tmp[] = {
1837
20.4k
            0x31,
1838
20.4k
            0x41,
1839
20.4k
            0x59,
1840
20.4k
            0x26,
1841
20.4k
            0x53,
1842
20.4k
            0x58,
1843
20.4k
            0x97,
1844
20.4k
            0x93,
1845
20.4k
            0x23,
1846
20.4k
            0x84,
1847
20.4k
            0x62,
1848
20.4k
            0x64,
1849
20.4k
            0x33,
1850
20.4k
            0x83,
1851
20.4k
            0x27,
1852
20.4k
            0x95,
1853
20.4k
            0x00};
1854
20.4k
        result = reinterpret_cast<char*>(tmp);
1855
20.4k
    } else {
1856
        // The PDF specification has guidelines for creating IDs, but it states clearly that the
1857
        // only thing that's really important is that it is very likely to be unique.  We can't
1858
        // really follow the guidelines in the spec exactly because we haven't written the file yet.
1859
        // This scheme should be fine though.  The deterministic ID case uses a digest of a
1860
        // sufficient portion of the file's contents such no two non-matching files would match in
1861
        // the subsets used for this computation.  Note that we explicitly omit the filename from
1862
        // the digest calculation for deterministic ID so that the same file converted with qpdf, in
1863
        // that case, would have the same ID regardless of the output file's name.
1864
1865
11.9k
        std::string seed;
1866
11.9k
        if (m->deterministic_id) {
1867
11.9k
            if (m->deterministic_id_data.empty()) {
1868
0
                QTC::TC("qpdf", "QPDFWriter deterministic with no data");
1869
0
                throw std::logic_error("INTERNAL ERROR: QPDFWriter::generateID has no data for "
1870
0
                                       "deterministic ID.  This may happen if deterministic ID and "
1871
0
                                       "file encryption are requested together.");
1872
0
            }
1873
11.9k
            seed += m->deterministic_id_data;
1874
11.9k
        } else {
1875
0
            seed += std::to_string(QUtil::get_current_time());
1876
0
            seed += m->filename;
1877
0
            seed += " ";
1878
0
        }
1879
11.9k
        seed += " QPDF ";
1880
11.9k
        if (trailer.hasKey("/Info")) {
1881
3.52k
            QPDFObjectHandle info = trailer.getKey("/Info");
1882
13.5k
            for (auto const& key: info.getKeys()) {
1883
13.5k
                QPDFObjectHandle obj = info.getKey(key);
1884
13.5k
                if (obj.isString()) {
1885
10.4k
                    seed += " ";
1886
10.4k
                    seed += obj.getStringValue();
1887
10.4k
                }
1888
13.5k
            }
1889
3.52k
        }
1890
1891
11.9k
        MD5 m;
1892
11.9k
        m.encodeString(seed.c_str());
1893
11.9k
        MD5::Digest digest;
1894
11.9k
        m.digest(digest);
1895
11.9k
        result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest));
1896
11.9k
    }
1897
1898
    // If /ID already exists, follow the spec: use the original first word and generate a new second
1899
    // word.  Otherwise, we'll use the generated ID for both.
1900
1901
32.4k
    m->id2 = result;
1902
    // Note: keep /ID from old file even if --static-id was given.
1903
32.4k
    m->id1 = getOriginalID1();
1904
32.4k
    if (m->id1.empty()) {
1905
12.6k
        m->id1 = m->id2;
1906
12.6k
    }
1907
32.4k
}
1908
1909
void
1910
QPDFWriter::initializeSpecialStreams()
1911
33.4k
{
1912
    // Mark all page content streams in case we are filtering or
1913
    // normalizing.
1914
33.4k
    std::vector<QPDFObjectHandle> pages = m->pdf.getAllPages();
1915
33.4k
    int num = 0;
1916
178k
    for (auto& page: pages) {
1917
178k
        m->page_object_to_seq[page.getObjGen()] = ++num;
1918
178k
        QPDFObjectHandle contents = page.getKey("/Contents");
1919
178k
        std::vector<QPDFObjGen> contents_objects;
1920
178k
        if (contents.isArray()) {
1921
2.68k
            int n = contents.getArrayNItems();
1922
35.6k
            for (int i = 0; i < n; ++i) {
1923
32.9k
                contents_objects.push_back(contents.getArrayItem(i).getObjGen());
1924
32.9k
            }
1925
175k
        } else if (contents.isStream()) {
1926
97.7k
            contents_objects.push_back(contents.getObjGen());
1927
97.7k
        }
1928
1929
178k
        for (auto const& c: contents_objects) {
1930
130k
            m->contents_to_page_seq[c] = num;
1931
130k
            m->normalized_streams.insert(c);
1932
130k
        }
1933
178k
    }
1934
33.4k
}
1935
1936
void
1937
QPDFWriter::preserveObjectStreams()
1938
22.0k
{
1939
22.0k
    auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
1940
    // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
1941
    // streams out of old objects that have generation numbers greater than zero. However in an
1942
    // existing PDF, all object stream objects and all objects in them must have generation 0
1943
    // because the PDF spec does not provide any way to do otherwise. This code filters out objects
1944
    // that are not allowed to be in object streams. In addition to removing objects that were
1945
    // erroneously included in object streams in the source PDF, it also prevents unreferenced
1946
    // objects from being included.
1947
22.0k
    auto end = xref.cend();
1948
22.0k
    m->obj.streams_empty = true;
1949
22.0k
    if (m->preserve_unreferenced_objects) {
1950
0
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
1951
0
            if (iter->second.getType() == 2) {
1952
                // Pdf contains object streams.
1953
0
                QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
1954
0
                m->obj.streams_empty = false;
1955
0
                m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
1956
0
            }
1957
0
        }
1958
22.0k
    } else {
1959
        // Start by scanning for first compressed object in case we don't have any object streams to
1960
        // process.
1961
886k
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
1962
866k
            if (iter->second.getType() == 2) {
1963
                // Pdf contains object streams.
1964
2.63k
                QTC::TC("qpdf", "QPDFWriter preserve object streams");
1965
2.63k
                m->obj.streams_empty = false;
1966
2.63k
                auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
1967
                // The object pointed to by iter may be a previous generation, in which case it is
1968
                // removed by getCompressibleObjSet. We need to restart the loop (while the object
1969
                // table may contain multiple generations of an object).
1970
349k
                for (iter = xref.cbegin(); iter != end; ++iter) {
1971
346k
                    if (iter->second.getType() == 2) {
1972
237k
                        auto id = static_cast<size_t>(iter->first.getObj());
1973
237k
                        if (id < eligible.size() && eligible[id]) {
1974
122k
                            m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
1975
122k
                        } else {
1976
114k
                            QTC::TC("qpdf", "QPDFWriter exclude from object stream");
1977
114k
                        }
1978
237k
                    }
1979
346k
                }
1980
2.63k
                return;
1981
2.63k
            }
1982
866k
        }
1983
22.0k
    }
1984
22.0k
}
1985
1986
void
1987
QPDFWriter::generateObjectStreams()
1988
5.27k
{
1989
    // Basic strategy: make a list of objects that can go into an object stream.  Then figure out
1990
    // how many object streams are needed so that we can distribute objects approximately evenly
1991
    // without having any object stream exceed 100 members.  We don't have to worry about linearized
1992
    // files here -- if the file is linearized, we take care of excluding things that aren't allowed
1993
    // here later.
1994
1995
    // This code doesn't do anything with /Extends.
1996
1997
5.27k
    std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf);
1998
5.27k
    size_t n_object_streams = (eligible.size() + 99U) / 100U;
1999
2000
5.27k
    initializeTables(2U * n_object_streams);
2001
5.27k
    if (n_object_streams == 0) {
2002
30
        m->obj.streams_empty = true;
2003
30
        return;
2004
30
    }
2005
5.24k
    size_t n_per = eligible.size() / n_object_streams;
2006
5.24k
    if (n_per * n_object_streams < eligible.size()) {
2007
122
        ++n_per;
2008
122
    }
2009
5.24k
    unsigned int n = 0;
2010
5.24k
    int cur_ostream = m->pdf.newIndirectNull().getObjectID();
2011
129k
    for (auto const& item: eligible) {
2012
129k
        if (n == n_per) {
2013
233
            QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
2014
233
            n = 0;
2015
            // Construct a new null object as the "original" object stream.  The rest of the code
2016
            // knows that this means we're creating the object stream from scratch.
2017
233
            cur_ostream = m->pdf.newIndirectNull().getObjectID();
2018
233
        }
2019
129k
        auto& obj = m->obj[item];
2020
129k
        obj.object_stream = cur_ostream;
2021
129k
        obj.gen = item.getGen();
2022
129k
        ++n;
2023
129k
    }
2024
5.24k
}
2025
2026
QPDFObjectHandle
2027
QPDFWriter::getTrimmedTrailer()
2028
88.8k
{
2029
    // Remove keys from the trailer that necessarily have to be replaced when writing the file.
2030
2031
88.8k
    QPDFObjectHandle trailer = m->pdf.getTrailer().unsafeShallowCopy();
2032
2033
    // Remove encryption keys
2034
88.8k
    trailer.removeKey("/ID");
2035
88.8k
    trailer.removeKey("/Encrypt");
2036
2037
    // Remove modification information
2038
88.8k
    trailer.removeKey("/Prev");
2039
2040
    // Remove all trailer keys that potentially come from a cross-reference stream
2041
88.8k
    trailer.removeKey("/Index");
2042
88.8k
    trailer.removeKey("/W");
2043
88.8k
    trailer.removeKey("/Length");
2044
88.8k
    trailer.removeKey("/Filter");
2045
88.8k
    trailer.removeKey("/DecodeParms");
2046
88.8k
    trailer.removeKey("/Type");
2047
88.8k
    trailer.removeKey("/XRefStm");
2048
2049
88.8k
    return trailer;
2050
88.8k
}
2051
2052
// Make document extension level information direct as required by the spec.
2053
void
2054
QPDFWriter::prepareFileForWrite()
2055
32.3k
{
2056
32.3k
    m->pdf.fixDanglingReferences();
2057
32.3k
    auto root = m->pdf.getRoot();
2058
32.3k
    auto oh = root.getKey("/Extensions");
2059
32.3k
    if (oh.isDictionary()) {
2060
844
        const bool extensions_indirect = oh.isIndirect();
2061
844
        if (extensions_indirect) {
2062
0
            QTC::TC("qpdf", "QPDFWriter make Extensions direct");
2063
0
            oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy());
2064
0
        }
2065
844
        if (oh.hasKey("/ADBE")) {
2066
771
            auto adbe = oh.getKey("/ADBE");
2067
771
            if (adbe.isIndirect()) {
2068
78
                QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1);
2069
78
                adbe.makeDirect();
2070
78
                oh.replaceKey("/ADBE", adbe);
2071
78
            }
2072
771
        }
2073
844
    }
2074
32.3k
}
2075
2076
void
2077
QPDFWriter::initializeTables(size_t extra)
2078
33.2k
{
2079
33.2k
    auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra;
2080
33.2k
    m->obj.resize(size);
2081
33.2k
    m->new_obj.resize(size);
2082
33.2k
}
2083
2084
void
2085
QPDFWriter::doWriteSetup()
2086
33.4k
{
2087
33.4k
    if (m->did_write_setup) {
2088
0
        return;
2089
0
    }
2090
33.4k
    m->did_write_setup = true;
2091
2092
    // Do preliminary setup
2093
2094
33.4k
    if (m->linearized) {
2095
18.9k
        m->qdf_mode = false;
2096
18.9k
    }
2097
2098
33.4k
    if (m->pclm) {
2099
0
        m->stream_decode_level = qpdf_dl_none;
2100
0
        m->compress_streams = false;
2101
0
        m->encrypted = false;
2102
0
    }
2103
2104
33.4k
    if (m->qdf_mode) {
2105
8.61k
        if (!m->normalize_content_set) {
2106
8.61k
            m->normalize_content = true;
2107
8.61k
        }
2108
8.61k
        if (!m->compress_streams_set) {
2109
8.61k
            m->compress_streams = false;
2110
8.61k
        }
2111
8.61k
        if (!m->stream_decode_level_set) {
2112
0
            m->stream_decode_level = qpdf_dl_generalized;
2113
0
        }
2114
8.61k
    }
2115
2116
33.4k
    if (m->encrypted) {
2117
        // Encryption has been explicitly set
2118
19.5k
        m->preserve_encryption = false;
2119
19.5k
    } else if (m->normalize_content || m->stream_decode_level || m->pclm || m->qdf_mode) {
2120
        // Encryption makes looking at contents pretty useless.  If the user explicitly encrypted
2121
        // though, we still obey that.
2122
13.8k
        m->preserve_encryption = false;
2123
13.8k
    }
2124
2125
33.4k
    if (m->preserve_encryption) {
2126
0
        copyEncryptionParameters(m->pdf);
2127
0
    }
2128
2129
33.4k
    if (!m->forced_pdf_version.empty()) {
2130
0
        int major = 0;
2131
0
        int minor = 0;
2132
0
        parseVersion(m->forced_pdf_version, major, minor);
2133
0
        disableIncompatibleEncryption(major, minor, m->forced_extension_level);
2134
0
        if (compareVersions(major, minor, 1, 5) < 0) {
2135
0
            QTC::TC("qpdf", "QPDFWriter forcing object stream disable");
2136
0
            m->object_stream_mode = qpdf_o_disable;
2137
0
        }
2138
0
    }
2139
2140
33.4k
    if (m->qdf_mode || m->normalize_content || m->stream_decode_level) {
2141
33.4k
        initializeSpecialStreams();
2142
33.4k
    }
2143
2144
33.4k
    if (m->qdf_mode) {
2145
        // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing
2146
        // recomputed stream length data. Certain streams such as object streams, xref streams, and
2147
        // hint streams always get direct stream lengths.
2148
8.53k
        m->direct_stream_lengths = false;
2149
8.53k
    }
2150
2151
33.4k
    switch (m->object_stream_mode) {
2152
5.97k
    case qpdf_o_disable:
2153
5.97k
        initializeTables();
2154
5.97k
        m->obj.streams_empty = true;
2155
5.97k
        break;
2156
2157
22.0k
    case qpdf_o_preserve:
2158
22.0k
        initializeTables();
2159
22.0k
        preserveObjectStreams();
2160
22.0k
        break;
2161
2162
5.27k
    case qpdf_o_generate:
2163
5.27k
        generateObjectStreams();
2164
5.27k
        break;
2165
2166
        // no default so gcc will warn for missing case tag
2167
33.4k
    }
2168
2169
33.1k
    if (!m->obj.streams_empty) {
2170
7.69k
        if (m->linearized) {
2171
            // Page dictionaries are not allowed to be compressed objects.
2172
47.5k
            for (auto& page: m->pdf.getAllPages()) {
2173
47.5k
                if (m->obj[page].object_stream > 0) {
2174
44.5k
                    QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
2175
44.5k
                    m->obj[page].object_stream = 0;
2176
44.5k
                }
2177
47.5k
            }
2178
6.92k
        }
2179
2180
7.69k
        if (m->linearized || m->encrypted) {
2181
            // The document catalog is not allowed to be compressed in linearized files either.  It
2182
            // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
2183
            // handle encrypted files with compressed document catalogs, so we disable them in that
2184
            // case as well.
2185
6.92k
            if (m->obj[m->root_og].object_stream > 0) {
2186
5.15k
                QTC::TC("qpdf", "QPDFWriter uncompressing root");
2187
5.15k
                m->obj[m->root_og].object_stream = 0;
2188
5.15k
            }
2189
6.92k
        }
2190
2191
        // Generate reverse mapping from object stream to objects
2192
4.98M
        m->obj.forEach([this](auto id, auto const& item) -> void {
2193
4.98M
            if (item.object_stream > 0) {
2194
202k
                auto& vec = m->object_stream_to_objects[item.object_stream];
2195
202k
                vec.emplace_back(id, item.gen);
2196
202k
                if (m->max_ostream_index < vec.size()) {
2197
131k
                    ++m->max_ostream_index;
2198
131k
                }
2199
202k
            }
2200
4.98M
        });
2201
7.69k
        --m->max_ostream_index;
2202
2203
7.69k
        if (m->object_stream_to_objects.empty()) {
2204
254
            m->obj.streams_empty = true;
2205
7.44k
        } else {
2206
7.44k
            setMinimumPDFVersion("1.5");
2207
7.44k
        }
2208
7.69k
    }
2209
2210
33.1k
    setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel());
2211
33.1k
    m->final_pdf_version = m->min_pdf_version;
2212
33.1k
    m->final_extension_level = m->min_extension_level;
2213
33.1k
    if (!m->forced_pdf_version.empty()) {
2214
0
        QTC::TC("qpdf", "QPDFWriter using forced PDF version");
2215
0
        m->final_pdf_version = m->forced_pdf_version;
2216
0
        m->final_extension_level = m->forced_extension_level;
2217
0
    }
2218
33.1k
}
2219
2220
void
2221
QPDFWriter::write()
2222
33.4k
{
2223
33.4k
    doWriteSetup();
2224
2225
    // Set up progress reporting. For linearized files, we write two passes. events_expected is an
2226
    // approximation, but it's good enough for progress reporting, which is mostly a guess anyway.
2227
33.4k
    m->events_expected = QIntC::to_int(m->pdf.getObjectCount() * (m->linearized ? 2 : 1));
2228
2229
33.4k
    prepareFileForWrite();
2230
2231
33.4k
    if (m->linearized) {
2232
18.1k
        writeLinearized();
2233
18.1k
    } else {
2234
15.3k
        writeStandard();
2235
15.3k
    }
2236
2237
33.4k
    m->pipeline->finish();
2238
33.4k
    if (m->close_file) {
2239
0
        fclose(m->file);
2240
0
    }
2241
33.4k
    m->file = nullptr;
2242
33.4k
    if (m->buffer_pipeline) {
2243
0
        m->output_buffer = m->buffer_pipeline->getBuffer();
2244
0
        m->buffer_pipeline = nullptr;
2245
0
    }
2246
33.4k
    indicateProgress(false, true);
2247
33.4k
}
2248
2249
QPDFObjGen
2250
QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
2251
0
{
2252
0
    return QPDFObjGen(m->obj[og].renumber, 0);
2253
0
}
2254
2255
std::map<QPDFObjGen, QPDFXRefEntry>
2256
QPDFWriter::getWrittenXRefTable()
2257
0
{
2258
0
    std::map<QPDFObjGen, QPDFXRefEntry> result;
2259
2260
0
    auto it = result.begin();
2261
0
    m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void {
2262
0
        if (item.xref.getType() != 0) {
2263
0
            it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref);
2264
0
        }
2265
0
    });
2266
0
    return result;
2267
0
}
2268
2269
void
2270
QPDFWriter::enqueuePart(std::vector<QPDFObjectHandle>& part)
2271
80.9k
{
2272
452k
    for (auto const& oh: part) {
2273
452k
        enqueueObject(oh);
2274
452k
    }
2275
80.9k
}
2276
2277
void
2278
QPDFWriter::writeEncryptionDictionary()
2279
28.1k
{
2280
28.1k
    m->encryption_dict_objid = openObject(m->encryption_dict_objid);
2281
28.1k
    writeString("<<");
2282
331k
    for (auto const& iter: m->encryption_dictionary) {
2283
331k
        writeString(" ");
2284
331k
        writeString(iter.first);
2285
331k
        writeString(" ");
2286
331k
        writeString(iter.second);
2287
331k
    }
2288
28.1k
    writeString(" >>");
2289
28.1k
    closeObject(m->encryption_dict_objid);
2290
28.1k
}
2291
2292
std::string
2293
QPDFWriter::getFinalVersion()
2294
0
{
2295
0
    doWriteSetup();
2296
0
    return m->final_pdf_version;
2297
0
}
2298
2299
void
2300
QPDFWriter::writeHeader()
2301
45.0k
{
2302
45.0k
    writeString("%PDF-");
2303
45.0k
    writeString(m->final_pdf_version);
2304
45.0k
    if (m->pclm) {
2305
        // PCLm version
2306
0
        writeString("\n%PCLm 1.0\n");
2307
45.0k
    } else {
2308
        // This string of binary characters would not be valid UTF-8, so it really should be treated
2309
        // as binary.
2310
45.0k
        writeString("\n%\xbf\xf7\xa2\xfe\n");
2311
45.0k
    }
2312
45.0k
    writeStringQDF("%QDF-1.0\n\n");
2313
2314
    // Note: do not write extra header text here.  Linearized PDFs must include the entire
2315
    // linearization parameter dictionary within the first 1024 characters of the PDF file, so for
2316
    // linearized files, we have to write extra header text after the linearization parameter
2317
    // dictionary.
2318
45.0k
}
2319
2320
void
2321
QPDFWriter::writeHintStream(int hint_id)
2322
14.9k
{
2323
14.9k
    std::shared_ptr<Buffer> hint_buffer;
2324
14.9k
    int S = 0;
2325
14.9k
    int O = 0;
2326
14.9k
    bool compressed = (m->compress_streams && !m->qdf_mode);
2327
14.9k
    QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed);
2328
2329
14.9k
    openObject(hint_id);
2330
14.9k
    setDataKey(hint_id);
2331
2332
14.9k
    size_t hlen = hint_buffer->getSize();
2333
2334
14.9k
    writeString("<< ");
2335
14.9k
    if (compressed) {
2336
14.9k
        writeString("/Filter /FlateDecode ");
2337
14.9k
    }
2338
14.9k
    writeString("/S ");
2339
14.9k
    writeString(std::to_string(S));
2340
14.9k
    if (O) {
2341
3.11k
        writeString(" /O ");
2342
3.11k
        writeString(std::to_string(O));
2343
3.11k
    }
2344
14.9k
    writeString(" /Length ");
2345
14.9k
    adjustAESStreamLength(hlen);
2346
14.9k
    writeString(std::to_string(hlen));
2347
14.9k
    writeString(" >>\nstream\n");
2348
2349
14.9k
    if (m->encrypted) {
2350
11.1k
        QTC::TC("qpdf", "QPDFWriter encrypted hint stream");
2351
11.1k
    }
2352
14.9k
    unsigned char last_char = '\0';
2353
14.9k
    {
2354
14.9k
        PipelinePopper pp_enc(this);
2355
14.9k
        pushEncryptionFilter(pp_enc);
2356
14.9k
        writeBuffer(hint_buffer);
2357
14.9k
        last_char = m->pipeline->getLastChar();
2358
14.9k
    }
2359
2360
14.9k
    if (last_char != '\n') {
2361
14.9k
        writeString("\n");
2362
14.9k
    }
2363
14.9k
    writeString("endstream");
2364
14.9k
    closeObject(hint_id);
2365
14.9k
}
2366
2367
qpdf_offset_t
2368
QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size)
2369
13.2k
{
2370
    // There are too many extra arguments to replace overloaded function with defaults in the header
2371
    // file...too much risk of leaving something off.
2372
13.2k
    return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0);
2373
13.2k
}
2374
2375
qpdf_offset_t
2376
QPDFWriter::writeXRefTable(
2377
    trailer_e which,
2378
    int first,
2379
    int last,
2380
    int size,
2381
    qpdf_offset_t prev,
2382
    bool suppress_offsets,
2383
    int hint_id,
2384
    qpdf_offset_t hint_offset,
2385
    qpdf_offset_t hint_length,
2386
    int linearization_pass)
2387
52.6k
{
2388
52.6k
    writeString("xref\n");
2389
52.6k
    writeString(std::to_string(first));
2390
52.6k
    writeString(" ");
2391
52.6k
    writeString(std::to_string(last - first + 1));
2392
52.6k
    qpdf_offset_t space_before_zero = m->pipeline->getCount();
2393
52.6k
    writeString("\n");
2394
1.15M
    for (int i = first; i <= last; ++i) {
2395
1.09M
        if (i == 0) {
2396
32.8k
            writeString("0000000000 65535 f \n");
2397
1.06M
        } else {
2398
1.06M
            qpdf_offset_t offset = 0;
2399
1.06M
            if (!suppress_offsets) {
2400
893k
                offset = m->new_obj[i].xref.getOffset();
2401
893k
                if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2402
93.4k
                    offset += hint_length;
2403
93.4k
                }
2404
893k
            }
2405
1.06M
            writeString(QUtil::int_to_string(offset, 10));
2406
1.06M
            writeString(" 00000 n \n");
2407
1.06M
        }
2408
1.09M
    }
2409
52.6k
    writeTrailer(which, size, false, prev, linearization_pass);
2410
52.6k
    writeString("\n");
2411
52.6k
    return space_before_zero;
2412
52.6k
}
2413
2414
qpdf_offset_t
2415
QPDFWriter::writeXRefStream(
2416
    int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size)
2417
608
{
2418
    // There are too many extra arguments to replace overloaded function with defaults in the header
2419
    // file...too much risk of leaving something off.
2420
608
    return writeXRefStream(
2421
608
        objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0);
2422
608
}
2423
2424
qpdf_offset_t
2425
QPDFWriter::writeXRefStream(
2426
    int xref_id,
2427
    int max_id,
2428
    qpdf_offset_t max_offset,
2429
    trailer_e which,
2430
    int first,
2431
    int last,
2432
    int size,
2433
    qpdf_offset_t prev,
2434
    int hint_id,
2435
    qpdf_offset_t hint_offset,
2436
    qpdf_offset_t hint_length,
2437
    bool skip_compression,
2438
    int linearization_pass)
2439
22.0k
{
2440
22.0k
    qpdf_offset_t xref_offset = m->pipeline->getCount();
2441
22.0k
    qpdf_offset_t space_before_zero = xref_offset - 1;
2442
2443
    // field 1 contains offsets and object stream identifiers
2444
22.0k
    unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id));
2445
2446
    // field 2 contains object stream indices
2447
22.0k
    unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index));
2448
2449
22.0k
    unsigned int esize = 1 + f1_size + f2_size;
2450
2451
    // Must store in xref table in advance of writing the actual data rather than waiting for
2452
    // openObject to do it.
2453
22.0k
    m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2454
2455
22.0k
    Pipeline* p = pushPipeline(new Pl_Buffer("xref stream"));
2456
22.0k
    bool compressed = false;
2457
22.0k
    if (m->compress_streams && !m->qdf_mode) {
2458
21.4k
        compressed = true;
2459
21.4k
        if (!skip_compression) {
2460
            // Write the stream dictionary for compression but don't actually compress.  This helps
2461
            // us with computation of padding for pass 1 of linearization.
2462
10.2k
            p = pushPipeline(new Pl_Flate("compress xref", p, Pl_Flate::a_deflate));
2463
10.2k
        }
2464
21.4k
        p = pushPipeline(new Pl_PNGFilter("pngify xref", p, Pl_PNGFilter::a_encode, esize));
2465
21.4k
    }
2466
22.0k
    std::shared_ptr<Buffer> xref_data;
2467
22.0k
    {
2468
22.0k
        PipelinePopper pp_xref(this, &xref_data);
2469
22.0k
        activatePipelineStack(pp_xref);
2470
681k
        for (int i = first; i <= last; ++i) {
2471
659k
            QPDFXRefEntry& e = m->new_obj[i].xref;
2472
659k
            switch (e.getType()) {
2473
124k
            case 0:
2474
124k
                writeBinary(0, 1);
2475
124k
                writeBinary(0, f1_size);
2476
124k
                writeBinary(0, f2_size);
2477
124k
                break;
2478
2479
290k
            case 1:
2480
290k
                {
2481
290k
                    qpdf_offset_t offset = e.getOffset();
2482
290k
                    if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2483
33.5k
                        offset += hint_length;
2484
33.5k
                    }
2485
290k
                    writeBinary(1, 1);
2486
290k
                    writeBinary(QIntC::to_ulonglong(offset), f1_size);
2487
290k
                    writeBinary(0, f2_size);
2488
290k
                }
2489
290k
                break;
2490
2491
244k
            case 2:
2492
244k
                writeBinary(2, 1);
2493
244k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size);
2494
244k
                writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size);
2495
244k
                break;
2496
2497
0
            default:
2498
0
                throw std::logic_error("invalid type writing xref stream");
2499
0
                break;
2500
659k
            }
2501
659k
        }
2502
22.0k
    }
2503
2504
22.0k
    openObject(xref_id);
2505
22.0k
    writeString("<<");
2506
22.0k
    writeStringQDF("\n ");
2507
22.0k
    writeString(" /Type /XRef");
2508
22.0k
    writeStringQDF("\n ");
2509
22.0k
    writeString(" /Length " + std::to_string(xref_data->getSize()));
2510
22.0k
    if (compressed) {
2511
21.4k
        writeStringQDF("\n ");
2512
21.4k
        writeString(" /Filter /FlateDecode");
2513
21.4k
        writeStringQDF("\n ");
2514
21.4k
        writeString(" /DecodeParms << /Columns " + std::to_string(esize) + " /Predictor 12 >>");
2515
21.4k
    }
2516
22.0k
    writeStringQDF("\n ");
2517
22.0k
    writeString(" /W [ 1 " + std::to_string(f1_size) + " " + std::to_string(f2_size) + " ]");
2518
22.0k
    if (!((first == 0) && (last == size - 1))) {
2519
11.1k
        writeString(
2520
11.1k
            " /Index [ " + std::to_string(first) + " " + std::to_string(last - first + 1) + " ]");
2521
11.1k
    }
2522
22.0k
    writeTrailer(which, size, true, prev, linearization_pass);
2523
22.0k
    writeString("\nstream\n");
2524
22.0k
    writeBuffer(xref_data);
2525
22.0k
    writeString("\nendstream");
2526
22.0k
    closeObject(xref_id);
2527
22.0k
    return space_before_zero;
2528
22.0k
}
2529
2530
size_t
2531
QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
2532
10.4k
{
2533
    // This routine is called right after a linearization first pass xref stream has been written
2534
    // without compression.  Calculate the amount of padding that would be required in the worst
2535
    // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is
2536
    // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add
2537
    // 10 extra bytes for number length increases.
2538
2539
10.4k
    return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384)));
2540
10.4k
}
2541
2542
void
2543
QPDFWriter::writeLinearized()
2544
18.1k
{
2545
    // Optimize file and enqueue objects in order
2546
2547
18.1k
    std::map<int, int> stream_cache;
2548
2549
379k
    auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) {
2550
379k
        auto& result = stream_cache[stream.getObjectID()];
2551
379k
        if (result == 0) {
2552
185k
            bool compress_stream;
2553
185k
            bool is_metadata;
2554
185k
            if (willFilterStream(stream, compress_stream, is_metadata, nullptr)) {
2555
89.1k
                result = 2;
2556
96.1k
            } else {
2557
96.1k
                result = 1;
2558
96.1k
            }
2559
185k
        }
2560
379k
        return result;
2561
379k
    };
2562
2563
18.1k
    QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters);
2564
2565
18.1k
    std::vector<QPDFObjectHandle> part4;
2566
18.1k
    std::vector<QPDFObjectHandle> part6;
2567
18.1k
    std::vector<QPDFObjectHandle> part7;
2568
18.1k
    std::vector<QPDFObjectHandle> part8;
2569
18.1k
    std::vector<QPDFObjectHandle> part9;
2570
18.1k
    QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9);
2571
2572
    // Object number sequence:
2573
    //
2574
    //  second half
2575
    //    second half uncompressed objects
2576
    //    second half xref stream, if any
2577
    //    second half compressed objects
2578
    //  first half
2579
    //    linearization dictionary
2580
    //    first half xref stream, if any
2581
    //    part 4 uncompresesd objects
2582
    //    encryption dictionary, if any
2583
    //    hint stream
2584
    //    part 6 uncompressed objects
2585
    //    first half compressed objects
2586
    //
2587
2588
    // Second half objects
2589
18.1k
    int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size());
2590
18.1k
    int second_half_first_obj = 1;
2591
18.1k
    int after_second_half = 1 + second_half_uncompressed;
2592
18.1k
    m->next_objid = after_second_half;
2593
18.1k
    int second_half_xref = 0;
2594
18.1k
    bool need_xref_stream = !m->obj.streams_empty;
2595
18.1k
    if (need_xref_stream) {
2596
6.13k
        second_half_xref = m->next_objid++;
2597
6.13k
    }
2598
    // Assign numbers to all compressed objects in the second half.
2599
18.1k
    std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9};
2600
67.4k
    for (int i = 0; i < 3; ++i) {
2601
268k
        for (auto const& oh: *vecs2[i]) {
2602
268k
            assignCompressedObjectNumbers(oh.getObjGen());
2603
268k
        }
2604
49.3k
    }
2605
18.1k
    int second_half_end = m->next_objid - 1;
2606
18.1k
    int second_trailer_size = m->next_objid;
2607
2608
    // First half objects
2609
18.1k
    int first_half_start = m->next_objid;
2610
18.1k
    int lindict_id = m->next_objid++;
2611
18.1k
    int first_half_xref = 0;
2612
18.1k
    if (need_xref_stream) {
2613
6.13k
        first_half_xref = m->next_objid++;
2614
6.13k
    }
2615
18.1k
    int part4_first_obj = m->next_objid;
2616
18.1k
    m->next_objid += QIntC::to_int(part4.size());
2617
18.1k
    int after_part4 = m->next_objid;
2618
18.1k
    if (m->encrypted) {
2619
11.7k
        m->encryption_dict_objid = m->next_objid++;
2620
11.7k
    }
2621
18.1k
    int hint_id = m->next_objid++;
2622
18.1k
    int part6_first_obj = m->next_objid;
2623
18.1k
    m->next_objid += QIntC::to_int(part6.size());
2624
18.1k
    int after_part6 = m->next_objid;
2625
    // Assign numbers to all compressed objects in the first half
2626
18.1k
    std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6};
2627
51.0k
    for (int i = 0; i < 2; ++i) {
2628
186k
        for (auto const& oh: *vecs1[i]) {
2629
186k
            assignCompressedObjectNumbers(oh.getObjGen());
2630
186k
        }
2631
32.8k
    }
2632
18.1k
    int first_half_end = m->next_objid - 1;
2633
18.1k
    int first_trailer_size = m->next_objid;
2634
2635
18.1k
    int part4_end_marker = part4.back().getObjectID();
2636
18.1k
    int part6_end_marker = part6.back().getObjectID();
2637
18.1k
    qpdf_offset_t space_before_zero = 0;
2638
18.1k
    qpdf_offset_t file_size = 0;
2639
18.1k
    qpdf_offset_t part6_end_offset = 0;
2640
18.1k
    qpdf_offset_t first_half_max_obj_offset = 0;
2641
18.1k
    qpdf_offset_t second_xref_offset = 0;
2642
18.1k
    qpdf_offset_t first_xref_end = 0;
2643
18.1k
    qpdf_offset_t second_xref_end = 0;
2644
2645
18.1k
    m->next_objid = part4_first_obj;
2646
18.1k
    enqueuePart(part4);
2647
18.1k
    if (m->next_objid != after_part4) {
2648
        // This can happen with very botched files as in the fuzzer test. There are likely some
2649
        // faulty assumptions in calculateLinearizationData
2650
266
        throw std::runtime_error("error encountered after writing part 4 of linearized data");
2651
266
    }
2652
17.8k
    m->next_objid = part6_first_obj;
2653
17.8k
    enqueuePart(part6);
2654
17.8k
    if (m->next_objid != after_part6) {
2655
88
        throw std::runtime_error("error encountered after writing part 6 of linearized data");
2656
88
    }
2657
17.7k
    m->next_objid = second_half_first_obj;
2658
17.7k
    enqueuePart(part7);
2659
17.7k
    enqueuePart(part8);
2660
17.7k
    enqueuePart(part9);
2661
17.7k
    if (m->next_objid != after_second_half) {
2662
178
        throw std::runtime_error("error encountered after writing part 9 of linearized data");
2663
178
    }
2664
2665
17.6k
    qpdf_offset_t hint_length = 0;
2666
17.6k
    std::shared_ptr<Buffer> hint_buffer;
2667
2668
    // Write file in two passes.  Part numbers refer to PDF spec 1.4.
2669
2670
17.6k
    FILE* lin_pass1_file = nullptr;
2671
17.6k
    auto pp_pass1 = std::make_shared<PipelinePopper>(this);
2672
17.6k
    auto pp_md5 = std::make_shared<PipelinePopper>(this);
2673
48.5k
    for (int pass = 1; pass <= 2; ++pass) {
2674
30.9k
        if (pass == 1) {
2675
15.9k
            if (!m->lin_pass1_filename.empty()) {
2676
0
                lin_pass1_file = QUtil::safe_fopen(m->lin_pass1_filename.c_str(), "wb");
2677
0
                pushPipeline(new Pl_StdioFile("linearization pass1", lin_pass1_file));
2678
0
                activatePipelineStack(*pp_pass1);
2679
15.9k
            } else {
2680
15.9k
                pushDiscardFilter(*pp_pass1);
2681
15.9k
            }
2682
15.9k
            if (m->deterministic_id) {
2683
4.65k
                pushMD5Pipeline(*pp_md5);
2684
4.65k
            }
2685
15.9k
        }
2686
2687
        // Part 1: header
2688
2689
30.9k
        writeHeader();
2690
2691
        // Part 2: linearization parameter dictionary.  Save enough space to write real dictionary.
2692
        // 200 characters is enough space if all numerical values in the parameter dictionary that
2693
        // contain offsets are 20 digits long plus a few extra characters for safety.  The entire
2694
        // linearization parameter dictionary must appear within the first 1024 characters of the
2695
        // file.
2696
2697
30.9k
        qpdf_offset_t pos = m->pipeline->getCount();
2698
30.9k
        openObject(lindict_id);
2699
30.9k
        writeString("<<");
2700
30.9k
        if (pass == 2) {
2701
14.9k
            std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages();
2702
14.9k
            int first_page_object = m->obj[pages.at(0)].renumber;
2703
14.9k
            int npages = QIntC::to_int(pages.size());
2704
2705
14.9k
            writeString(" /Linearized 1 /L ");
2706
14.9k
            writeString(std::to_string(file_size + hint_length));
2707
            // Implementation note 121 states that a space is mandatory after this open bracket.
2708
14.9k
            writeString(" /H [ ");
2709
14.9k
            writeString(std::to_string(m->new_obj[hint_id].xref.getOffset()));
2710
14.9k
            writeString(" ");
2711
14.9k
            writeString(std::to_string(hint_length));
2712
14.9k
            writeString(" ] /O ");
2713
14.9k
            writeString(std::to_string(first_page_object));
2714
14.9k
            writeString(" /E ");
2715
14.9k
            writeString(std::to_string(part6_end_offset + hint_length));
2716
14.9k
            writeString(" /N ");
2717
14.9k
            writeString(std::to_string(npages));
2718
14.9k
            writeString(" /T ");
2719
14.9k
            writeString(std::to_string(space_before_zero + hint_length));
2720
14.9k
        }
2721
30.9k
        writeString(" >>");
2722
30.9k
        closeObject(lindict_id);
2723
30.9k
        static int const pad = 200;
2724
30.9k
        writePad(QIntC::to_size(pos - m->pipeline->getCount() + pad));
2725
30.9k
        writeString("\n");
2726
2727
        // If the user supplied any additional header text, write it here after the linearization
2728
        // parameter dictionary.
2729
30.9k
        writeString(m->extra_header_text);
2730
2731
        // Part 3: first page cross reference table and trailer.
2732
2733
30.9k
        qpdf_offset_t first_xref_offset = m->pipeline->getCount();
2734
30.9k
        qpdf_offset_t hint_offset = 0;
2735
30.9k
        if (pass == 2) {
2736
14.9k
            hint_offset = m->new_obj[hint_id].xref.getOffset();
2737
14.9k
        }
2738
30.9k
        if (need_xref_stream) {
2739
            // Must pad here too.
2740
11.1k
            if (pass == 1) {
2741
                // Set first_half_max_obj_offset to a value large enough to force four bytes to be
2742
                // reserved for each file offset.  This would provide adequate space for the xref
2743
                // stream as long as the last object in page 1 starts with in the first 4 GB of the
2744
                // file, which is extremely likely.  In the second pass, we will know the actual
2745
                // value for this, but it's okay if it's smaller.
2746
6.01k
                first_half_max_obj_offset = 1 << 25;
2747
6.01k
            }
2748
11.1k
            pos = m->pipeline->getCount();
2749
11.1k
            writeXRefStream(
2750
11.1k
                first_half_xref,
2751
11.1k
                first_half_end,
2752
11.1k
                first_half_max_obj_offset,
2753
11.1k
                t_lin_first,
2754
11.1k
                first_half_start,
2755
11.1k
                first_half_end,
2756
11.1k
                first_trailer_size,
2757
11.1k
                hint_length + second_xref_offset,
2758
11.1k
                hint_id,
2759
11.1k
                hint_offset,
2760
11.1k
                hint_length,
2761
11.1k
                (pass == 1),
2762
11.1k
                pass);
2763
11.1k
            qpdf_offset_t endpos = m->pipeline->getCount();
2764
11.1k
            if (pass == 1) {
2765
                // Pad so we have enough room for the real xref stream.
2766
5.26k
                writePad(calculateXrefStreamPadding(endpos - pos));
2767
5.26k
                first_xref_end = m->pipeline->getCount();
2768
5.90k
            } else {
2769
                // Pad so that the next object starts at the same place as in pass 1.
2770
5.90k
                writePad(QIntC::to_size(first_xref_end - endpos));
2771
2772
5.90k
                if (m->pipeline->getCount() != first_xref_end) {
2773
0
                    throw std::logic_error(
2774
0
                        "insufficient padding for first pass xref stream; "
2775
0
                        "first_xref_end=" +
2776
0
                        std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos));
2777
0
                }
2778
5.90k
            }
2779
11.1k
            writeString("\n");
2780
19.7k
        } else {
2781
19.7k
            writeXRefTable(
2782
19.7k
                t_lin_first,
2783
19.7k
                first_half_start,
2784
19.7k
                first_half_end,
2785
19.7k
                first_trailer_size,
2786
19.7k
                hint_length + second_xref_offset,
2787
19.7k
                (pass == 1),
2788
19.7k
                hint_id,
2789
19.7k
                hint_offset,
2790
19.7k
                hint_length,
2791
19.7k
                pass);
2792
19.7k
            writeString("startxref\n0\n%%EOF\n");
2793
19.7k
        }
2794
2795
        // Parts 4 through 9
2796
2797
862k
        for (auto const& cur_object: m->object_queue) {
2798
862k
            if (cur_object.getObjectID() == part6_end_marker) {
2799
30.1k
                first_half_max_obj_offset = m->pipeline->getCount();
2800
30.1k
            }
2801
862k
            writeObject(cur_object);
2802
862k
            if (cur_object.getObjectID() == part4_end_marker) {
2803
30.1k
                if (m->encrypted) {
2804
22.3k
                    writeEncryptionDictionary();
2805
22.3k
                }
2806
30.1k
                if (pass == 1) {
2807
15.1k
                    m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2808
15.1k
                } else {
2809
                    // Part 5: hint stream
2810
14.9k
                    writeBuffer(hint_buffer);
2811
14.9k
                }
2812
30.1k
            }
2813
862k
            if (cur_object.getObjectID() == part6_end_marker) {
2814
30.0k
                part6_end_offset = m->pipeline->getCount();
2815
30.0k
            }
2816
862k
        }
2817
2818
        // Part 10: overflow hint stream -- not used
2819
2820
        // Part 11: main cross reference table and trailer
2821
2822
30.9k
        second_xref_offset = m->pipeline->getCount();
2823
30.9k
        if (need_xref_stream) {
2824
10.3k
            pos = m->pipeline->getCount();
2825
10.3k
            space_before_zero = writeXRefStream(
2826
10.3k
                second_half_xref,
2827
10.3k
                second_half_end,
2828
10.3k
                second_xref_offset,
2829
10.3k
                t_lin_second,
2830
10.3k
                0,
2831
10.3k
                second_half_end,
2832
10.3k
                second_trailer_size,
2833
10.3k
                0,
2834
10.3k
                0,
2835
10.3k
                0,
2836
10.3k
                0,
2837
10.3k
                (pass == 1),
2838
10.3k
                pass);
2839
10.3k
            qpdf_offset_t endpos = m->pipeline->getCount();
2840
2841
10.3k
            if (pass == 1) {
2842
                // Pad so we have enough room for the real xref stream.  See comments for previous
2843
                // xref stream on how we calculate the padding.
2844
5.16k
                writePad(calculateXrefStreamPadding(endpos - pos));
2845
5.16k
                writeString("\n");
2846
5.16k
                second_xref_end = m->pipeline->getCount();
2847
5.16k
            } else {
2848
                // Make the file size the same.
2849
5.14k
                writePad(
2850
5.14k
                    QIntC::to_size(second_xref_end + hint_length - 1 - m->pipeline->getCount()));
2851
5.14k
                writeString("\n");
2852
2853
                // If this assertion fails, maybe we didn't have enough padding above.
2854
5.14k
                if (m->pipeline->getCount() != second_xref_end + hint_length) {
2855
0
                    throw std::logic_error(
2856
0
                        "count mismatch after xref stream; possible insufficient padding?");
2857
0
                }
2858
5.14k
            }
2859
20.6k
        } else {
2860
20.6k
            space_before_zero = writeXRefTable(
2861
20.6k
                t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass);
2862
20.6k
        }
2863
30.9k
        writeString("startxref\n");
2864
30.9k
        writeString(std::to_string(first_xref_offset));
2865
30.9k
        writeString("\n%%EOF\n");
2866
2867
30.9k
        if (pass == 1) {
2868
14.9k
            if (m->deterministic_id) {
2869
3.85k
                QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1);
2870
3.85k
                computeDeterministicIDData();
2871
3.85k
                pp_md5 = nullptr;
2872
3.85k
                qpdf_assert_debug(m->md5_pipeline == nullptr);
2873
3.85k
            }
2874
2875
            // Close first pass pipeline
2876
14.9k
            file_size = m->pipeline->getCount();
2877
14.9k
            pp_pass1 = nullptr;
2878
2879
            // Save hint offset since it will be set to zero by calling openObject.
2880
14.9k
            qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset();
2881
2882
            // Write hint stream to a buffer
2883
14.9k
            {
2884
14.9k
                pushPipeline(new Pl_Buffer("hint buffer"));
2885
14.9k
                PipelinePopper pp_hint(this, &hint_buffer);
2886
14.9k
                activatePipelineStack(pp_hint);
2887
14.9k
                writeHintStream(hint_id);
2888
14.9k
            }
2889
14.9k
            hint_length = QIntC::to_offset(hint_buffer->getSize());
2890
2891
            // Restore hint offset
2892
14.9k
            m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1);
2893
14.9k
            if (lin_pass1_file) {
2894
                // Write some debugging information
2895
0
                fprintf(
2896
0
                    lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str());
2897
0
                fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str());
2898
0
                fprintf(
2899
0
                    lin_pass1_file,
2900
0
                    "%% second_xref_offset=%s\n",
2901
0
                    std::to_string(second_xref_offset).c_str());
2902
0
                fprintf(
2903
0
                    lin_pass1_file,
2904
0
                    "%% second_xref_end=%s\n",
2905
0
                    std::to_string(second_xref_end).c_str());
2906
0
                fclose(lin_pass1_file);
2907
0
                lin_pass1_file = nullptr;
2908
0
            }
2909
14.9k
        }
2910
30.9k
    }
2911
17.6k
}
2912
2913
void
2914
QPDFWriter::enqueueObjectsStandard()
2915
14.1k
{
2916
14.1k
    if (m->preserve_unreferenced_objects) {
2917
0
        QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard");
2918
0
        for (auto const& oh: m->pdf.getAllObjects()) {
2919
0
            enqueueObject(oh);
2920
0
        }
2921
0
    }
2922
2923
    // Put root first on queue.
2924
14.1k
    QPDFObjectHandle trailer = getTrimmedTrailer();
2925
14.1k
    enqueueObject(trailer.getKey("/Root"));
2926
2927
    // Next place any other objects referenced from the trailer dictionary into the queue, handling
2928
    // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op.
2929
61.0k
    for (auto const& key: trailer.getKeys()) {
2930
61.0k
        enqueueObject(trailer.getKey(key));
2931
61.0k
    }
2932
14.1k
}
2933
2934
void
2935
QPDFWriter::enqueueObjectsPCLm()
2936
0
{
2937
    // Image transform stream content for page strip images. Each of this new stream has to come
2938
    // after every page image strip written in the pclm file.
2939
0
    std::string image_transform_content = "q /image Do Q\n";
2940
2941
    // enqueue all pages first
2942
0
    std::vector<QPDFObjectHandle> all = m->pdf.getAllPages();
2943
0
    for (auto& page: all) {
2944
        // enqueue page
2945
0
        enqueueObject(page);
2946
2947
        // enqueue page contents stream
2948
0
        enqueueObject(page.getKey("/Contents"));
2949
2950
        // enqueue all the strips for each page
2951
0
        QPDFObjectHandle strips = page.getKey("/Resources").getKey("/XObject");
2952
0
        for (auto const& image: strips.getKeys()) {
2953
0
            enqueueObject(strips.getKey(image));
2954
0
            enqueueObject(QPDFObjectHandle::newStream(&m->pdf, image_transform_content));
2955
0
        }
2956
0
    }
2957
2958
    // Put root in queue.
2959
0
    QPDFObjectHandle trailer = getTrimmedTrailer();
2960
0
    enqueueObject(trailer.getKey("/Root"));
2961
0
}
2962
2963
void
2964
QPDFWriter::indicateProgress(bool decrement, bool finished)
2965
2.17M
{
2966
2.17M
    if (decrement) {
2967
311k
        --m->events_seen;
2968
311k
        return;
2969
311k
    }
2970
2971
1.86M
    ++m->events_seen;
2972
2973
1.86M
    if (!m->progress_reporter.get()) {
2974
1.86M
        return;
2975
1.86M
    }
2976
2977
0
    if (finished || (m->events_seen >= m->next_progress_report)) {
2978
0
        int percentage =
2979
0
            (finished ? 100
2980
0
                 : m->next_progress_report == 0
2981
0
                 ? 0
2982
0
                 : std::min(99, 1 + ((100 * m->events_seen) / m->events_expected)));
2983
0
        m->progress_reporter->reportProgress(percentage);
2984
0
    }
2985
0
    int increment = std::max(1, (m->events_expected / 100));
2986
0
    while (m->events_seen >= m->next_progress_report) {
2987
0
        m->next_progress_report += increment;
2988
0
    }
2989
0
}
2990
2991
void
2992
QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr)
2993
0
{
2994
0
    m->progress_reporter = pr;
2995
0
}
2996
2997
void
2998
QPDFWriter::writeStandard()
2999
14.1k
{
3000
14.1k
    auto pp_md5 = std::make_shared<PipelinePopper>(this);
3001
14.1k
    if (m->deterministic_id) {
3002
8.28k
        pushMD5Pipeline(*pp_md5);
3003
8.28k
    }
3004
3005
    // Start writing
3006
3007
14.1k
    writeHeader();
3008
14.1k
    writeString(m->extra_header_text);
3009
3010
14.1k
    if (m->pclm) {
3011
0
        enqueueObjectsPCLm();
3012
14.1k
    } else {
3013
14.1k
        enqueueObjectsStandard();
3014
14.1k
    }
3015
3016
    // Now start walking queue, outputting each object.
3017
386k
    while (m->object_queue_front < m->object_queue.size()) {
3018
372k
        QPDFObjectHandle cur_object = m->object_queue.at(m->object_queue_front);
3019
372k
        ++m->object_queue_front;
3020
372k
        writeObject(cur_object);
3021
372k
    }
3022
3023
    // Write out the encryption dictionary, if any
3024
14.1k
    if (m->encrypted) {
3025
5.75k
        writeEncryptionDictionary();
3026
5.75k
    }
3027
3028
    // Now write out xref.  next_objid is now the number of objects.
3029
14.1k
    qpdf_offset_t xref_offset = m->pipeline->getCount();
3030
14.1k
    if (m->object_stream_to_objects.empty()) {
3031
        // Write regular cross-reference table
3032
13.2k
        writeXRefTable(t_normal, 0, m->next_objid - 1, m->next_objid);
3033
13.2k
    } else {
3034
        // Write cross-reference stream.
3035
888
        int xref_id = m->next_objid++;
3036
888
        writeXRefStream(
3037
888
            xref_id, xref_id, xref_offset, t_normal, 0, m->next_objid - 1, m->next_objid);
3038
888
    }
3039
14.1k
    writeString("startxref\n");
3040
14.1k
    writeString(std::to_string(xref_offset));
3041
14.1k
    writeString("\n%%EOF\n");
3042
3043
14.1k
    if (m->deterministic_id) {
3044
7.22k
        QTC::TC(
3045
7.22k
            "qpdf",
3046
7.22k
            "QPDFWriter standard deterministic ID",
3047
7.22k
            m->object_stream_to_objects.empty() ? 0 : 1);
3048
7.22k
        pp_md5 = nullptr;
3049
7.22k
        qpdf_assert_debug(m->md5_pipeline == nullptr);
3050
7.22k
    }
3051
14.1k
}