Coverage Report

Created: 2025-07-11 07:00

/src/qpdf/libqpdf/QPDFWriter.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/assert_debug.h>
2
3
#include <qpdf/qpdf-config.h> // include early for large file support
4
5
#include <qpdf/QPDFWriter_private.hh>
6
7
#include <qpdf/MD5.hh>
8
#include <qpdf/Pl_AES_PDF.hh>
9
#include <qpdf/Pl_Flate.hh>
10
#include <qpdf/Pl_MD5.hh>
11
#include <qpdf/Pl_PNGFilter.hh>
12
#include <qpdf/Pl_RC4.hh>
13
#include <qpdf/Pl_StdioFile.hh>
14
#include <qpdf/Pl_String.hh>
15
#include <qpdf/QIntC.hh>
16
#include <qpdf/QPDFObjectHandle_private.hh>
17
#include <qpdf/QPDFObject_private.hh>
18
#include <qpdf/QPDF_private.hh>
19
#include <qpdf/QTC.hh>
20
#include <qpdf/QUtil.hh>
21
#include <qpdf/RC4.hh>
22
#include <qpdf/Util.hh>
23
24
#include <algorithm>
25
#include <cstdlib>
26
#include <stdexcept>
27
28
using namespace std::literals;
29
using namespace qpdf;
30
31
QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default)
32
0
{
33
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
34
0
}
35
36
QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) :
37
0
    handler(handler)
38
0
{
39
0
}
40
41
QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT
42
                                                                  // (modernize-use-equals-default)
43
0
{
44
    // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
45
0
}
46
47
void
48
QPDFWriter::FunctionProgressReporter::reportProgress(int progress)
49
0
{
50
0
    this->handler(progress);
51
0
}
52
53
class QPDFWriter::Members
54
{
55
    friend class QPDFWriter;
56
57
  public:
58
    ~Members();
59
60
  private:
61
    Members(QPDF& pdf);
62
    Members(Members const&) = delete;
63
64
    QPDF& pdf;
65
    QPDFObjGen root_og{-1, 0};
66
    char const* filename{"unspecified"};
67
    FILE* file{nullptr};
68
    bool close_file{false};
69
    Pl_Buffer* buffer_pipeline{nullptr};
70
    Buffer* output_buffer{nullptr};
71
    bool normalize_content_set{false};
72
    bool normalize_content{false};
73
    bool compress_streams{true};
74
    bool compress_streams_set{false};
75
    qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_generalized};
76
    bool stream_decode_level_set{false};
77
    bool recompress_flate{false};
78
    bool qdf_mode{false};
79
    bool preserve_unreferenced_objects{false};
80
    bool newline_before_endstream{false};
81
    bool static_id{false};
82
    bool suppress_original_object_ids{false};
83
    bool direct_stream_lengths{true};
84
    bool preserve_encryption{true};
85
    bool linearized{false};
86
    bool pclm{false};
87
    qpdf_object_stream_e object_stream_mode{qpdf_o_preserve};
88
89
    std::unique_ptr<QPDF::EncryptionData> encryption;
90
    std::string encryption_key;
91
    bool encrypt_use_aes{false};
92
93
    std::string id1; // for /ID key of
94
    std::string id2; // trailer dictionary
95
    std::string final_pdf_version;
96
    int final_extension_level{0};
97
    std::string min_pdf_version;
98
    int min_extension_level{0};
99
    std::string forced_pdf_version;
100
    int forced_extension_level{0};
101
    std::string extra_header_text;
102
    int encryption_dict_objid{0};
103
    std::string cur_data_key;
104
    std::list<std::shared_ptr<Pipeline>> to_delete;
105
    qpdf::pl::Count* pipeline{nullptr};
106
    std::vector<QPDFObjectHandle> object_queue;
107
    size_t object_queue_front{0};
108
    QPDFWriter::ObjTable obj;
109
    QPDFWriter::NewObjTable new_obj;
110
    int next_objid{1};
111
    int cur_stream_length_id{0};
112
    size_t cur_stream_length{0};
113
    bool added_newline{false};
114
    size_t max_ostream_index{0};
115
    std::set<QPDFObjGen> normalized_streams;
116
    std::map<QPDFObjGen, int> page_object_to_seq;
117
    std::map<QPDFObjGen, int> contents_to_page_seq;
118
    std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects;
119
    std::vector<Pipeline*> pipeline_stack;
120
    unsigned long next_stack_id{2};
121
    std::string count_buffer;
122
    bool deterministic_id{false};
123
    Pl_MD5* md5_pipeline{nullptr};
124
    std::string deterministic_id_data;
125
    bool did_write_setup{false};
126
127
    // For linearization only
128
    std::string lin_pass1_filename;
129
130
    // For progress reporting
131
    std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter;
132
    int events_expected{0};
133
    int events_seen{0};
134
    int next_progress_report{0};
135
};
136
137
QPDFWriter::Members::Members(QPDF& pdf) :
138
7.98k
    pdf(pdf),
139
7.98k
    root_og(pdf.getRoot().getObjGen().isIndirect() ? pdf.getRoot().getObjGen() : QPDFObjGen(-1, 0))
140
7.98k
{
141
7.98k
}
142
143
QPDFWriter::Members::~Members()
144
7.75k
{
145
7.75k
    if (file && close_file) {
146
0
        fclose(file);
147
0
    }
148
7.75k
    delete output_buffer;
149
7.75k
}
150
151
QPDFWriter::QPDFWriter(QPDF& pdf) :
152
7.98k
    m(new Members(pdf))
153
7.98k
{
154
7.98k
}
155
156
QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) :
157
0
    m(new Members(pdf))
158
0
{
159
0
    setOutputFilename(filename);
160
0
}
161
162
QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) :
163
0
    m(new Members(pdf))
164
0
{
165
0
    setOutputFile(description, file, close_file);
166
0
}
167
168
void
169
QPDFWriter::setOutputFilename(char const* filename)
170
0
{
171
0
    char const* description = filename;
172
0
    FILE* f = nullptr;
173
0
    bool close_file = false;
174
0
    if (filename == nullptr) {
175
0
        description = "standard output";
176
0
        QTC::TC("qpdf", "QPDFWriter write to stdout");
177
0
        f = stdout;
178
0
        QUtil::binary_stdout();
179
0
    } else {
180
0
        QTC::TC("qpdf", "QPDFWriter write to file");
181
0
        f = QUtil::safe_fopen(filename, "wb+");
182
0
        close_file = true;
183
0
    }
184
0
    setOutputFile(description, f, close_file);
185
0
}
186
187
void
188
QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file)
189
0
{
190
0
    m->filename = description;
191
0
    m->file = file;
192
0
    m->close_file = close_file;
193
0
    std::shared_ptr<Pipeline> p = std::make_shared<Pl_StdioFile>("qpdf output", file);
194
0
    m->to_delete.push_back(p);
195
0
    initializePipelineStack(p.get());
196
0
}
197
198
void
199
QPDFWriter::setOutputMemory()
200
0
{
201
0
    m->filename = "memory buffer";
202
0
    m->buffer_pipeline = new Pl_Buffer("qpdf output");
203
0
    m->to_delete.push_back(std::shared_ptr<Pipeline>(m->buffer_pipeline));
204
0
    initializePipelineStack(m->buffer_pipeline);
205
0
}
206
207
Buffer*
208
QPDFWriter::getBuffer()
209
0
{
210
0
    Buffer* result = m->output_buffer;
211
0
    m->output_buffer = nullptr;
212
0
    return result;
213
0
}
214
215
std::shared_ptr<Buffer>
216
QPDFWriter::getBufferSharedPointer()
217
0
{
218
0
    return std::shared_ptr<Buffer>(getBuffer());
219
0
}
220
221
void
222
QPDFWriter::setOutputPipeline(Pipeline* p)
223
7.75k
{
224
7.75k
    m->filename = "custom pipeline";
225
7.75k
    initializePipelineStack(p);
226
7.75k
}
227
228
void
229
QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode)
230
7.75k
{
231
7.75k
    m->object_stream_mode = mode;
232
7.75k
}
233
234
void
235
QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode)
236
0
{
237
0
    switch (mode) {
238
0
    case qpdf_s_uncompress:
239
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
240
0
        m->compress_streams = false;
241
0
        break;
242
243
0
    case qpdf_s_preserve:
244
0
        m->stream_decode_level = qpdf_dl_none;
245
0
        m->compress_streams = false;
246
0
        break;
247
248
0
    case qpdf_s_compress:
249
0
        m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level);
250
0
        m->compress_streams = true;
251
0
        break;
252
0
    }
253
0
    m->stream_decode_level_set = true;
254
0
    m->compress_streams_set = true;
255
0
}
256
257
void
258
QPDFWriter::setCompressStreams(bool val)
259
0
{
260
0
    m->compress_streams = val;
261
0
    m->compress_streams_set = true;
262
0
}
263
264
void
265
QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
266
7.75k
{
267
7.75k
    m->stream_decode_level = val;
268
7.75k
    m->stream_decode_level_set = true;
269
7.75k
}
270
271
void
272
QPDFWriter::setRecompressFlate(bool val)
273
0
{
274
0
    m->recompress_flate = val;
275
0
}
276
277
void
278
QPDFWriter::setContentNormalization(bool val)
279
0
{
280
0
    m->normalize_content_set = true;
281
0
    m->normalize_content = val;
282
0
}
283
284
void
285
QPDFWriter::setQDFMode(bool val)
286
0
{
287
0
    m->qdf_mode = val;
288
0
}
289
290
void
291
QPDFWriter::setPreserveUnreferencedObjects(bool val)
292
0
{
293
0
    m->preserve_unreferenced_objects = val;
294
0
}
295
296
void
297
QPDFWriter::setNewlineBeforeEndstream(bool val)
298
0
{
299
0
    m->newline_before_endstream = val;
300
0
}
301
302
void
303
QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level)
304
15.4k
{
305
15.4k
    bool set_version = false;
306
15.4k
    bool set_extension_level = false;
307
15.4k
    if (m->min_pdf_version.empty()) {
308
7.74k
        set_version = true;
309
7.74k
        set_extension_level = true;
310
7.74k
    } else {
311
7.70k
        int old_major = 0;
312
7.70k
        int old_minor = 0;
313
7.70k
        int min_major = 0;
314
7.70k
        int min_minor = 0;
315
7.70k
        parseVersion(version, old_major, old_minor);
316
7.70k
        parseVersion(m->min_pdf_version, min_major, min_minor);
317
7.70k
        int compare = compareVersions(old_major, old_minor, min_major, min_minor);
318
7.70k
        if (compare > 0) {
319
701
            QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1);
320
701
            set_version = true;
321
701
            set_extension_level = true;
322
7.00k
        } else if (compare == 0) {
323
454
            if (extension_level > m->min_extension_level) {
324
1
                QTC::TC("qpdf", "QPDFWriter increasing extension level");
325
1
                set_extension_level = true;
326
1
            }
327
454
        }
328
7.70k
    }
329
330
15.4k
    if (set_version) {
331
8.44k
        m->min_pdf_version = version;
332
8.44k
    }
333
15.4k
    if (set_extension_level) {
334
8.44k
        m->min_extension_level = extension_level;
335
8.44k
    }
336
15.4k
}
337
338
void
339
QPDFWriter::setMinimumPDFVersion(PDFVersion const& v)
340
0
{
341
0
    std::string version;
342
0
    int extension_level;
343
0
    v.getVersion(version, extension_level);
344
0
    setMinimumPDFVersion(version, extension_level);
345
0
}
346
347
void
348
QPDFWriter::forcePDFVersion(std::string const& version, int extension_level)
349
0
{
350
0
    m->forced_pdf_version = version;
351
0
    m->forced_extension_level = extension_level;
352
0
}
353
354
void
355
QPDFWriter::setExtraHeaderText(std::string const& text)
356
0
{
357
0
    m->extra_header_text = text;
358
0
    if (!m->extra_header_text.empty() && *m->extra_header_text.rbegin() != '\n') {
359
0
        QTC::TC("qpdf", "QPDFWriter extra header text add newline");
360
0
        m->extra_header_text += "\n";
361
0
    } else {
362
0
        QTC::TC("qpdf", "QPDFWriter extra header text no newline");
363
0
    }
364
0
}
365
366
void
367
QPDFWriter::setStaticID(bool val)
368
7.75k
{
369
7.75k
    m->static_id = val;
370
7.75k
}
371
372
void
373
QPDFWriter::setDeterministicID(bool val)
374
0
{
375
0
    m->deterministic_id = val;
376
0
}
377
378
void
379
QPDFWriter::setStaticAesIV(bool val)
380
0
{
381
0
    if (val) {
382
0
        Pl_AES_PDF::useStaticIV();
383
0
    }
384
0
}
385
386
void
387
QPDFWriter::setSuppressOriginalObjectIDs(bool val)
388
0
{
389
0
    m->suppress_original_object_ids = val;
390
0
}
391
392
void
393
QPDFWriter::setPreserveEncryption(bool val)
394
0
{
395
0
    m->preserve_encryption = val;
396
0
}
397
398
void
399
QPDFWriter::setLinearization(bool val)
400
0
{
401
0
    m->linearized = val;
402
0
    if (val) {
403
0
        m->pclm = false;
404
0
    }
405
0
}
406
407
void
408
QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
409
0
{
410
0
    m->lin_pass1_filename = filename;
411
0
}
412
413
void
414
QPDFWriter::setPCLm(bool val)
415
0
{
416
0
    m->pclm = val;
417
0
    if (val) {
418
0
        m->linearized = false;
419
0
    }
420
0
}
421
422
void
423
QPDFWriter::setR2EncryptionParametersInsecure(
424
    char const* user_password,
425
    char const* owner_password,
426
    bool allow_print,
427
    bool allow_modify,
428
    bool allow_extract,
429
    bool allow_annotate)
430
0
{
431
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(1, 2, 5, true);
432
0
    if (!allow_print) {
433
0
        m->encryption->setP(3, false);
434
0
    }
435
0
    if (!allow_modify) {
436
0
        m->encryption->setP(4, false);
437
0
    }
438
0
    if (!allow_extract) {
439
0
        m->encryption->setP(5, false);
440
0
    }
441
0
    if (!allow_annotate) {
442
0
        m->encryption->setP(6, false);
443
0
    }
444
0
    setEncryptionParameters(user_password, owner_password);
445
0
}
446
447
void
448
QPDFWriter::setR3EncryptionParametersInsecure(
449
    char const* user_password,
450
    char const* owner_password,
451
    bool allow_accessibility,
452
    bool allow_extract,
453
    bool allow_assemble,
454
    bool allow_annotate_and_form,
455
    bool allow_form_filling,
456
    bool allow_modify_other,
457
    qpdf_r3_print_e print)
458
7.75k
{
459
7.75k
    m->encryption = std::make_unique<QPDF::EncryptionData>(2, 3, 16, true);
460
7.75k
    interpretR3EncryptionParameters(
461
7.75k
        allow_accessibility,
462
7.75k
        allow_extract,
463
7.75k
        allow_assemble,
464
7.75k
        allow_annotate_and_form,
465
7.75k
        allow_form_filling,
466
7.75k
        allow_modify_other,
467
7.75k
        print,
468
7.75k
        qpdf_r3m_all);
469
7.75k
    setEncryptionParameters(user_password, owner_password);
470
7.75k
}
471
472
void
473
QPDFWriter::setR4EncryptionParametersInsecure(
474
    char const* user_password,
475
    char const* owner_password,
476
    bool allow_accessibility,
477
    bool allow_extract,
478
    bool allow_assemble,
479
    bool allow_annotate_and_form,
480
    bool allow_form_filling,
481
    bool allow_modify_other,
482
    qpdf_r3_print_e print,
483
    bool encrypt_metadata,
484
    bool use_aes)
485
0
{
486
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(4, 4, 16, encrypt_metadata);
487
0
    m->encrypt_use_aes = use_aes;
488
0
    interpretR3EncryptionParameters(
489
0
        allow_accessibility,
490
0
        allow_extract,
491
0
        allow_assemble,
492
0
        allow_annotate_and_form,
493
0
        allow_form_filling,
494
0
        allow_modify_other,
495
0
        print,
496
0
        qpdf_r3m_all);
497
0
    setEncryptionParameters(user_password, owner_password);
498
0
}
499
500
void
501
QPDFWriter::setR5EncryptionParameters(
502
    char const* user_password,
503
    char const* owner_password,
504
    bool allow_accessibility,
505
    bool allow_extract,
506
    bool allow_assemble,
507
    bool allow_annotate_and_form,
508
    bool allow_form_filling,
509
    bool allow_modify_other,
510
    qpdf_r3_print_e print,
511
    bool encrypt_metadata)
512
0
{
513
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(5, 5, 32, encrypt_metadata);
514
0
    m->encrypt_use_aes = true;
515
0
    interpretR3EncryptionParameters(
516
0
        allow_accessibility,
517
0
        allow_extract,
518
0
        allow_assemble,
519
0
        allow_annotate_and_form,
520
0
        allow_form_filling,
521
0
        allow_modify_other,
522
0
        print,
523
0
        qpdf_r3m_all);
524
0
    setEncryptionParameters(user_password, owner_password);
525
0
}
526
527
void
528
QPDFWriter::setR6EncryptionParameters(
529
    char const* user_password,
530
    char const* owner_password,
531
    bool allow_accessibility,
532
    bool allow_extract,
533
    bool allow_assemble,
534
    bool allow_annotate_and_form,
535
    bool allow_form_filling,
536
    bool allow_modify_other,
537
    qpdf_r3_print_e print,
538
    bool encrypt_metadata)
539
0
{
540
0
    m->encryption = std::make_unique<QPDF::EncryptionData>(5, 6, 32, encrypt_metadata);
541
0
    interpretR3EncryptionParameters(
542
0
        allow_accessibility,
543
0
        allow_extract,
544
0
        allow_assemble,
545
0
        allow_annotate_and_form,
546
0
        allow_form_filling,
547
0
        allow_modify_other,
548
0
        print,
549
0
        qpdf_r3m_all);
550
0
    m->encrypt_use_aes = true;
551
0
    setEncryptionParameters(user_password, owner_password);
552
0
}
553
554
void
555
QPDFWriter::interpretR3EncryptionParameters(
556
    bool allow_accessibility,
557
    bool allow_extract,
558
    bool allow_assemble,
559
    bool allow_annotate_and_form,
560
    bool allow_form_filling,
561
    bool allow_modify_other,
562
    qpdf_r3_print_e print,
563
    qpdf_r3_modify_e modify)
564
7.75k
{
565
    // Acrobat 5 security options:
566
567
    // Checkboxes:
568
    //   Enable Content Access for the Visually Impaired
569
    //   Allow Content Copying and Extraction
570
571
    // Allowed changes menu:
572
    //   None
573
    //   Only Document Assembly
574
    //   Only Form Field Fill-in or Signing
575
    //   Comment Authoring, Form Field Fill-in or Signing
576
    //   General Editing, Comment and Form Field Authoring
577
578
    // Allowed printing menu:
579
    //   None
580
    //   Low Resolution
581
    //   Full printing
582
583
    // Meanings of bits in P when R >= 3
584
    //
585
    //  3: low-resolution printing
586
    //  4: document modification except as controlled by 6, 9, and 11
587
    //  5: extraction
588
    //  6: add/modify annotations (comment), fill in forms
589
    //     if 4+6 are set, also allows modification of form fields
590
    //  9: fill in forms even if 6 is clear
591
    // 10: accessibility; ignored by readers, should always be set
592
    // 11: document assembly even if 4 is clear
593
    // 12: high-resolution printing
594
7.75k
    if (!allow_accessibility && m->encryption->getR() <= 3) {
595
        // Bit 10 is deprecated and should always be set.  This used to mean accessibility.  There
596
        // is no way to disable accessibility with R > 3.
597
0
        m->encryption->setP(10, false);
598
0
    }
599
7.75k
    if (!allow_extract) {
600
0
        m->encryption->setP(5, false);
601
0
    }
602
603
7.75k
    switch (print) {
604
0
    case qpdf_r3p_none:
605
0
        m->encryption->setP(3, false); // any printing
606
0
        [[fallthrough]];
607
0
    case qpdf_r3p_low:
608
0
        m->encryption->setP(12, false); // high resolution printing
609
0
        [[fallthrough]];
610
7.75k
    case qpdf_r3p_full:
611
7.75k
        break;
612
        // no default so gcc warns for missing cases
613
7.75k
    }
614
615
    // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full
616
    // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're
617
    // stuck with it. See also allow checks below to control the bits individually.
618
619
    // NOT EXERCISED IN TEST SUITE
620
7.75k
    switch (modify) {
621
0
    case qpdf_r3m_none:
622
0
        m->encryption->setP(11, false); // document assembly
623
0
        [[fallthrough]];
624
0
    case qpdf_r3m_assembly:
625
0
        m->encryption->setP(9, false); // filling in form fields
626
0
        [[fallthrough]];
627
0
    case qpdf_r3m_form:
628
0
        m->encryption->setP(6, false); // modify annotations, fill in form fields
629
0
        [[fallthrough]];
630
0
    case qpdf_r3m_annotate:
631
0
        m->encryption->setP(4, false); // other modifications
632
0
        [[fallthrough]];
633
7.75k
    case qpdf_r3m_all:
634
7.75k
        break;
635
        // no default so gcc warns for missing cases
636
7.75k
    }
637
    // END NOT EXERCISED IN TEST SUITE
638
639
7.75k
    if (!allow_assemble) {
640
0
        m->encryption->setP(11, false);
641
0
    }
642
7.75k
    if (!allow_annotate_and_form) {
643
0
        m->encryption->setP(6, false);
644
0
    }
645
7.75k
    if (!allow_form_filling) {
646
0
        m->encryption->setP(9, false);
647
0
    }
648
7.75k
    if (!allow_modify_other) {
649
0
        m->encryption->setP(4, false);
650
0
    }
651
7.75k
}
652
653
void
654
QPDFWriter::setEncryptionParameters(char const* user_password, char const* owner_password)
655
7.75k
{
656
7.75k
    generateID();
657
7.75k
    m->encryption->setId1(m->id1);
658
7.75k
    m->encryption_key = m->encryption->compute_parameters(user_password, owner_password);
659
7.75k
    setEncryptionMinimumVersion();
660
7.75k
}
661
662
void
663
QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
664
0
{
665
0
    m->preserve_encryption = false;
666
0
    QPDFObjectHandle trailer = qpdf.getTrailer();
667
0
    if (trailer.hasKey("/Encrypt")) {
668
0
        generateID();
669
0
        m->id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue();
670
0
        QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
671
0
        int V = encrypt.getKey("/V").getIntValueAsInt();
672
0
        int key_len = 5;
673
0
        if (V > 1) {
674
0
            key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8;
675
0
        }
676
0
        const bool encrypt_metadata =
677
0
            encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool()
678
0
            ? encrypt.getKey("/EncryptMetadata").getBoolValue()
679
0
            : true;
680
0
        if (V >= 4) {
681
            // When copying encryption parameters, use AES even if the original file did not.
682
            // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of
683
            // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF
684
            // all potentially having different values.
685
0
            m->encrypt_use_aes = true;
686
0
        }
687
0
        QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", encrypt_metadata ? 0 : 1);
688
0
        QTC::TC("qpdf", "QPDFWriter copy use_aes", m->encrypt_use_aes ? 0 : 1);
689
690
0
        m->encryption = std::make_unique<QPDF::EncryptionData>(
691
0
            V,
692
0
            encrypt.getKey("/R").getIntValueAsInt(),
693
0
            key_len,
694
0
            static_cast<int>(encrypt.getKey("/P").getIntValue()),
695
0
            encrypt.getKey("/O").getStringValue(),
696
0
            encrypt.getKey("/U").getStringValue(),
697
0
            V < 5 ? "" : encrypt.getKey("/OE").getStringValue(),
698
0
            V < 5 ? "" : encrypt.getKey("/UE").getStringValue(),
699
0
            V < 5 ? "" : encrypt.getKey("/Perms").getStringValue(),
700
0
            m->id1, // m->id1 == the other file's id1
701
0
            encrypt_metadata);
702
0
        m->encryption_key = V >= 5
703
0
            ? qpdf.getEncryptionKey()
704
0
            : m->encryption->compute_encryption_key(qpdf.getPaddedUserPassword());
705
0
        setEncryptionMinimumVersion();
706
0
    }
707
0
}
708
709
void
710
QPDFWriter::disableIncompatibleEncryption(int major, int minor, int extension_level)
711
0
{
712
0
    if (!m->encryption) {
713
0
        return;
714
0
    }
715
0
    if (compareVersions(major, minor, 1, 3) < 0) {
716
0
        m->encryption = nullptr;
717
0
        return;
718
0
    }
719
0
    int V = m->encryption->getV();
720
0
    int R = m->encryption->getR();
721
0
    if (compareVersions(major, minor, 1, 4) < 0) {
722
0
        if (V > 1 || R > 2) {
723
0
            m->encryption = nullptr;
724
0
        }
725
0
    } else if (compareVersions(major, minor, 1, 5) < 0) {
726
0
        if (V > 2 || R > 3) {
727
0
            m->encryption = nullptr;
728
0
        }
729
0
    } else if (compareVersions(major, minor, 1, 6) < 0) {
730
0
        if (m->encrypt_use_aes) {
731
0
            m->encryption = nullptr;
732
0
        }
733
0
    } else if (
734
0
        (compareVersions(major, minor, 1, 7) < 0) ||
735
0
        ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) {
736
0
        if (V >= 5 || R >= 5) {
737
0
            m->encryption = nullptr;
738
0
        }
739
0
    }
740
741
0
    if (!m->encryption) {
742
0
        QTC::TC("qpdf", "QPDFWriter forced version disabled encryption");
743
0
    }
744
0
}
745
746
void
747
QPDFWriter::parseVersion(std::string const& version, int& major, int& minor) const
748
15.4k
{
749
15.4k
    major = QUtil::string_to_int(version.c_str());
750
15.4k
    minor = 0;
751
15.4k
    size_t p = version.find('.');
752
15.4k
    if ((p != std::string::npos) && (version.length() > p)) {
753
15.4k
        minor = QUtil::string_to_int(version.substr(p + 1).c_str());
754
15.4k
    }
755
15.4k
    std::string tmp = std::to_string(major) + "." + std::to_string(minor);
756
15.4k
    if (tmp != version) {
757
        // The version number in the input is probably invalid. This happens with some files that
758
        // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately
759
        // QPDFWriter doesn't have a way to give a warning, so we just ignore this case.
760
30
    }
761
15.4k
}
762
763
int
764
QPDFWriter::compareVersions(int major1, int minor1, int major2, int minor2) const
765
7.69k
{
766
7.69k
    if (major1 < major2) {
767
116
        return -1;
768
7.57k
    } else if (major1 > major2) {
769
125
        return 1;
770
7.45k
    } else if (minor1 < minor2) {
771
6.42k
        return -1;
772
6.42k
    } else if (minor1 > minor2) {
773
576
        return 1;
774
576
    } else {
775
454
        return 0;
776
454
    }
777
7.69k
}
778
779
void
780
QPDFWriter::setEncryptionMinimumVersion()
781
7.74k
{
782
7.74k
    auto const R = m->encryption->getR();
783
7.74k
    if (R >= 6) {
784
0
        setMinimumPDFVersion("1.7", 8);
785
7.74k
    } else if (R == 5) {
786
0
        setMinimumPDFVersion("1.7", 3);
787
7.74k
    } else if (R == 4) {
788
0
        setMinimumPDFVersion(m->encrypt_use_aes ? "1.6" : "1.5");
789
7.74k
    } else if (R == 3) {
790
7.74k
        setMinimumPDFVersion("1.4");
791
7.74k
    } else {
792
0
        setMinimumPDFVersion("1.3");
793
0
    }
794
7.74k
}
795
796
void
797
QPDFWriter::setDataKey(int objid)
798
80.4k
{
799
80.4k
    if (m->encryption) {
800
80.4k
        m->cur_data_key = QPDF::compute_data_key(
801
80.4k
            m->encryption_key,
802
80.4k
            objid,
803
80.4k
            0,
804
80.4k
            m->encrypt_use_aes,
805
80.4k
            m->encryption->getV(),
806
80.4k
            m->encryption->getR());
807
80.4k
    }
808
80.4k
}
809
810
unsigned int
811
QPDFWriter::bytesNeeded(long long n)
812
0
{
813
0
    unsigned int bytes = 0;
814
0
    while (n) {
815
0
        ++bytes;
816
0
        n >>= 8;
817
0
    }
818
0
    return bytes;
819
0
}
820
821
void
822
QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes)
823
0
{
824
0
    if (bytes > sizeof(unsigned long long)) {
825
0
        throw std::logic_error("QPDFWriter::writeBinary called with too many bytes");
826
0
    }
827
0
    unsigned char data[sizeof(unsigned long long)];
828
0
    for (unsigned int i = 0; i < bytes; ++i) {
829
0
        data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff);
830
0
        val >>= 8;
831
0
    }
832
0
    m->pipeline->write(data, bytes);
833
0
}
834
835
void
836
QPDFWriter::writeString(std::string_view str)
837
3.50M
{
838
3.50M
    m->pipeline->write(reinterpret_cast<unsigned char const*>(str.data()), str.size());
839
3.50M
}
840
841
void
842
QPDFWriter::writeStringQDF(std::string_view str)
843
902k
{
844
902k
    if (m->qdf_mode) {
845
0
        m->pipeline->write(reinterpret_cast<unsigned char const*>(str.data()), str.size());
846
0
    }
847
902k
}
848
849
void
850
QPDFWriter::writeStringNoQDF(std::string_view str)
851
21.3k
{
852
21.3k
    if (!m->qdf_mode) {
853
21.3k
        m->pipeline->write(reinterpret_cast<unsigned char const*>(str.data()), str.size());
854
21.3k
    }
855
21.3k
}
856
857
void
858
QPDFWriter::writePad(size_t nspaces)
859
0
{
860
0
    writeString(std::string(nspaces, ' '));
861
0
}
862
863
Pipeline*
864
QPDFWriter::pushPipeline(Pipeline* p)
865
21.8k
{
866
21.8k
    qpdf_assert_debug(!dynamic_cast<pl::Count*>(p));
867
21.8k
    m->pipeline_stack.emplace_back(p);
868
21.8k
    return p;
869
21.8k
}
870
871
void
872
QPDFWriter::initializePipelineStack(Pipeline* p)
873
7.75k
{
874
7.75k
    m->pipeline = new pl::Count(1, p);
875
7.75k
    m->to_delete.emplace_back(std::shared_ptr<Pipeline>(m->pipeline));
876
7.75k
    m->pipeline_stack.emplace_back(m->pipeline);
877
7.75k
}
878
879
void
880
QPDFWriter::activatePipelineStack(PipelinePopper& pp, std::string& str)
881
30.9k
{
882
30.9k
    activatePipelineStack(pp, false, &str, nullptr);
883
30.9k
}
884
885
void
886
QPDFWriter::activatePipelineStack(PipelinePopper& pp, std::unique_ptr<pl::Link> link)
887
0
{
888
0
    m->count_buffer.clear();
889
0
    activatePipelineStack(pp, false, &m->count_buffer, std::move(link));
890
0
}
891
892
void
893
QPDFWriter::activatePipelineStack(
894
    PipelinePopper& pp, bool discard, std::string* str, std::unique_ptr<pl::Link> link)
895
52.8k
{
896
52.8k
    pl::Count* c;
897
52.8k
    if (link) {
898
0
        c = new pl::Count(m->next_stack_id, m->count_buffer, std::move(link));
899
52.8k
    } else if (discard) {
900
0
        c = new pl::Count(m->next_stack_id, nullptr);
901
52.8k
    } else if (!str) {
902
21.8k
        c = new pl::Count(m->next_stack_id, m->pipeline_stack.back());
903
30.9k
    } else {
904
30.9k
        c = new pl::Count(m->next_stack_id, *str);
905
30.9k
    }
906
52.8k
    pp.stack_id = m->next_stack_id;
907
52.8k
    m->pipeline_stack.emplace_back(c);
908
52.8k
    m->pipeline = c;
909
52.8k
    ++m->next_stack_id;
910
52.8k
}
911
912
QPDFWriter::PipelinePopper::~PipelinePopper()
913
60.4k
{
914
60.4k
    if (!stack_id) {
915
7.63k
        return;
916
7.63k
    }
917
52.8k
    qpdf_assert_debug(qw->m->pipeline_stack.size() >= 2);
918
52.8k
    qw->m->pipeline->finish();
919
52.8k
    qpdf_assert_debug(dynamic_cast<pl::Count*>(qw->m->pipeline_stack.back()) == qw->m->pipeline);
920
    // It might be possible for this assertion to fail if writeLinearized exits by exception when
921
    // deterministic ID, but I don't think so. As of this writing, this is the only case in which
922
    // two dynamically allocated PipelinePopper objects ever exist at the same time, so the
923
    // assertion will fail if they get popped out of order from automatic destruction.
924
52.8k
    qpdf_assert_debug(qw->m->pipeline->id() == stack_id);
925
52.8k
    delete qw->m->pipeline_stack.back();
926
52.8k
    qw->m->pipeline_stack.pop_back();
927
74.6k
    while (!dynamic_cast<pl::Count*>(qw->m->pipeline_stack.back())) {
928
21.8k
        Pipeline* p = qw->m->pipeline_stack.back();
929
21.8k
        if (dynamic_cast<Pl_MD5*>(p) == qw->m->md5_pipeline) {
930
21.8k
            qw->m->md5_pipeline = nullptr;
931
21.8k
        }
932
21.8k
        qw->m->pipeline_stack.pop_back();
933
21.8k
        delete p;
934
21.8k
    }
935
52.8k
    qw->m->pipeline = dynamic_cast<pl::Count*>(qw->m->pipeline_stack.back());
936
52.8k
}
937
938
void
939
QPDFWriter::adjustAESStreamLength(size_t& length)
940
21.9k
{
941
21.9k
    if (m->encryption && !m->cur_data_key.empty() && m->encrypt_use_aes) {
942
        // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16.  It will
943
        // also be prepended by 16 bits of random data.
944
0
        length += 32 - (length & 0xf);
945
0
    }
946
21.9k
}
947
948
void
949
QPDFWriter::pushEncryptionFilter(PipelinePopper& pp)
950
21.8k
{
951
21.8k
    if (m->encryption && !m->cur_data_key.empty()) {
952
21.8k
        Pipeline* p = nullptr;
953
21.8k
        if (m->encrypt_use_aes) {
954
0
            p = new Pl_AES_PDF(
955
0
                "aes stream encryption",
956
0
                m->pipeline,
957
0
                true,
958
0
                QUtil::unsigned_char_pointer(m->cur_data_key),
959
0
                m->cur_data_key.length());
960
21.8k
        } else {
961
21.8k
            p = new Pl_RC4(
962
21.8k
                "rc4 stream encryption",
963
21.8k
                m->pipeline,
964
21.8k
                QUtil::unsigned_char_pointer(m->cur_data_key),
965
21.8k
                QIntC::to_int(m->cur_data_key.length()));
966
21.8k
        }
967
21.8k
        pushPipeline(p);
968
21.8k
    }
969
    // Must call this unconditionally so we can call popPipelineStack to balance
970
    // pushEncryptionFilter().
971
21.8k
    activatePipelineStack(pp);
972
21.8k
}
973
974
void
975
QPDFWriter::pushMD5Pipeline(PipelinePopper& pp)
976
0
{
977
0
    if (!m->id2.empty()) {
978
        // Can't happen in the code
979
0
        throw std::logic_error(
980
0
            "Deterministic ID computation enabled after ID generation has already occurred.");
981
0
    }
982
0
    qpdf_assert_debug(m->deterministic_id);
983
0
    qpdf_assert_debug(m->md5_pipeline == nullptr);
984
0
    qpdf_assert_debug(m->pipeline->getCount() == 0);
985
0
    m->md5_pipeline = new Pl_MD5("qpdf md5", m->pipeline);
986
0
    m->md5_pipeline->persistAcrossFinish(true);
987
    // Special case code in popPipelineStack clears m->md5_pipeline upon deletion.
988
0
    pushPipeline(m->md5_pipeline);
989
0
    activatePipelineStack(pp);
990
0
}
991
992
void
993
QPDFWriter::computeDeterministicIDData()
994
0
{
995
0
    qpdf_assert_debug(m->md5_pipeline != nullptr);
996
0
    qpdf_assert_debug(m->deterministic_id_data.empty());
997
0
    m->deterministic_id_data = m->md5_pipeline->getHexDigest();
998
0
    m->md5_pipeline->enable(false);
999
0
}
1000
1001
int
1002
QPDFWriter::openObject(int objid)
1003
87.9k
{
1004
87.9k
    if (objid == 0) {
1005
7.50k
        objid = m->next_objid++;
1006
7.50k
    }
1007
87.9k
    m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount());
1008
87.9k
    writeString(std::to_string(objid));
1009
87.9k
    writeString(" 0 obj\n");
1010
87.9k
    return objid;
1011
87.9k
}
1012
1013
void
1014
QPDFWriter::closeObject(int objid)
1015
87.8k
{
1016
    // Write a newline before endobj as it makes the file easier to repair.
1017
87.8k
    writeString("\nendobj\n");
1018
87.8k
    writeStringQDF("\n");
1019
87.8k
    auto& new_obj = m->new_obj[objid];
1020
87.8k
    new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset();
1021
87.8k
}
1022
1023
void
1024
QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen og)
1025
0
{
1026
0
    int objid = og.getObj();
1027
0
    if ((og.getGen() != 0) || (!m->object_stream_to_objects.contains(objid))) {
1028
        // This is not an object stream.
1029
0
        return;
1030
0
    }
1031
1032
    // Reserve numbers for the objects that belong to this object stream.
1033
0
    for (auto const& iter: m->object_stream_to_objects[objid]) {
1034
0
        m->obj[iter].renumber = m->next_objid++;
1035
0
    }
1036
0
}
1037
1038
void
1039
QPDFWriter::enqueueObject(QPDFObjectHandle object)
1040
2.52M
{
1041
2.52M
    if (object.isIndirect()) {
1042
        // This owner check can only be done for indirect objects. It is possible for a direct
1043
        // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle from
1044
        // one file was insert into another file without copying. Doing that is safe even if the
1045
        // original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from its owner.
1046
225k
        if (object.getOwningQPDF() != &(m->pdf)) {
1047
0
            QTC::TC("qpdf", "QPDFWriter foreign object");
1048
0
            throw std::logic_error(
1049
0
                "QPDFObjectHandle from different QPDF found while writing.  Use "
1050
0
                "QPDF::copyForeignObject to add objects from another file.");
1051
0
        }
1052
1053
225k
        if (m->qdf_mode && object.isStreamOfType("/XRef")) {
1054
            // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so
1055
            // will confuse fix-qdf, which expects to see only one XRef stream at the end of the
1056
            // file. This case can occur when creating a QDF from a file with object streams when
1057
            // preserving unreferenced objects since the old cross reference streams are not
1058
            // actually referenced by object number.
1059
0
            QTC::TC("qpdf", "QPDFWriter ignore XRef in qdf mode");
1060
0
            return;
1061
0
        }
1062
1063
225k
        QPDFObjGen og = object.getObjGen();
1064
225k
        auto& obj = m->obj[og];
1065
1066
225k
        if (obj.renumber == 0) {
1067
80.6k
            if (obj.object_stream > 0) {
1068
                // This is in an object stream.  Don't process it here.  Instead, enqueue the object
1069
                // stream.  Object streams always have generation 0.
1070
                // Detect loops by storing invalid object ID -1, which will get overwritten later.
1071
0
                obj.renumber = -1;
1072
0
                enqueueObject(m->pdf.getObject(obj.object_stream, 0));
1073
80.6k
            } else {
1074
80.6k
                m->object_queue.push_back(object);
1075
80.6k
                obj.renumber = m->next_objid++;
1076
1077
80.6k
                if ((og.getGen() == 0) && m->object_stream_to_objects.contains(og.getObj())) {
1078
                    // For linearized files, uncompressed objects go at end, and we take care of
1079
                    // assigning numbers to them elsewhere.
1080
0
                    if (!m->linearized) {
1081
0
                        assignCompressedObjectNumbers(og);
1082
0
                    }
1083
80.6k
                } else if ((!m->direct_stream_lengths) && object.isStream()) {
1084
                    // reserve next object ID for length
1085
0
                    ++m->next_objid;
1086
0
                }
1087
80.6k
            }
1088
144k
        } else if (obj.renumber == -1) {
1089
            // This can happen if a specially constructed file indicates that an object stream is
1090
            // inside itself.
1091
0
        }
1092
225k
        return;
1093
2.29M
    } else if (!m->linearized) {
1094
2.29M
        if (object.isArray()) {
1095
1.61M
            for (auto& item: object.as_array()) {
1096
1.61M
                enqueueObject(item);
1097
1.61M
            }
1098
1.29M
        } else if (auto d = object.as_dictionary()) {
1099
1.29M
            for (auto const& item: d) {
1100
199k
                if (!item.second.null()) {
1101
150k
                    enqueueObject(item.second);
1102
150k
                }
1103
199k
            }
1104
1.29M
        }
1105
2.29M
    } else {
1106
        // ignore
1107
0
    }
1108
2.52M
}
1109
1110
void
1111
QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
1112
737k
{
1113
737k
    if (!m->linearized) {
1114
737k
        enqueueObject(child);
1115
737k
    }
1116
737k
    if (child.isIndirect()) {
1117
122k
        writeString(std::to_string(m->obj[child].renumber));
1118
122k
        writeString(" 0 R");
1119
614k
    } else {
1120
614k
        unparseObject(child, level, flags);
1121
614k
    }
1122
737k
}
1123
1124
void
1125
QPDFWriter::writeTrailer(
1126
    trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass)
1127
7.50k
{
1128
7.50k
    QPDFObjectHandle trailer = getTrimmedTrailer();
1129
7.50k
    if (xref_stream) {
1130
0
        m->cur_data_key.clear();
1131
7.50k
    } else {
1132
7.50k
        writeString("trailer <<");
1133
7.50k
    }
1134
7.50k
    writeStringQDF("\n");
1135
7.50k
    if (which == t_lin_second) {
1136
0
        writeString(" /Size ");
1137
0
        writeString(std::to_string(size));
1138
7.50k
    } else {
1139
16.6k
        for (auto const& [key, value]: trailer.as_dictionary()) {
1140
16.6k
            if (value.null()) {
1141
2.80k
                continue;
1142
2.80k
            }
1143
13.8k
            writeStringQDF("  ");
1144
13.8k
            writeStringNoQDF(" ");
1145
13.8k
            writeString(Name::normalize(key));
1146
13.8k
            writeString(" ");
1147
13.8k
            if (key == "/Size") {
1148
1.88k
                writeString(std::to_string(size));
1149
1.88k
                if (which == t_lin_first) {
1150
0
                    writeString(" /Prev ");
1151
0
                    qpdf_offset_t pos = m->pipeline->getCount();
1152
0
                    writeString(std::to_string(prev));
1153
0
                    writePad(QIntC::to_size(pos - m->pipeline->getCount() + 21));
1154
0
                }
1155
11.9k
            } else {
1156
11.9k
                unparseChild(value, 1, 0);
1157
11.9k
            }
1158
13.8k
            writeStringQDF("\n");
1159
13.8k
        }
1160
7.50k
    }
1161
1162
    // Write ID
1163
7.50k
    writeStringQDF(" ");
1164
7.50k
    writeString(" /ID [");
1165
7.50k
    if (linearization_pass == 1) {
1166
0
        std::string original_id1 = getOriginalID1();
1167
0
        if (original_id1.empty()) {
1168
0
            writeString("<00000000000000000000000000000000>");
1169
0
        } else {
1170
            // Write a string of zeroes equal in length to the representation of the original ID.
1171
            // While writing the original ID would have the same number of bytes, it would cause a
1172
            // change to the deterministic ID generated by older versions of the software that
1173
            // hard-coded the length of the ID to 16 bytes.
1174
0
            writeString("<");
1175
0
            size_t len = QPDF_String(original_id1).unparse(true).length() - 2;
1176
0
            for (size_t i = 0; i < len; ++i) {
1177
0
                writeString("0");
1178
0
            }
1179
0
            writeString(">");
1180
0
        }
1181
0
        writeString("<00000000000000000000000000000000>");
1182
7.50k
    } else {
1183
7.50k
        if ((linearization_pass == 0) && (m->deterministic_id)) {
1184
0
            computeDeterministicIDData();
1185
0
        }
1186
7.50k
        generateID();
1187
7.50k
        writeString(QPDF_String(m->id1).unparse(true));
1188
7.50k
        writeString(QPDF_String(m->id2).unparse(true));
1189
7.50k
    }
1190
7.50k
    writeString("]");
1191
1192
7.50k
    if (which != t_lin_second) {
1193
        // Write reference to encryption dictionary
1194
7.50k
        if (m->encryption) {
1195
7.50k
            writeString(" /Encrypt ");
1196
7.50k
            writeString(std::to_string(m->encryption_dict_objid));
1197
7.50k
            writeString(" 0 R");
1198
7.50k
        }
1199
7.50k
    }
1200
1201
7.50k
    writeStringQDF("\n");
1202
7.50k
    writeStringNoQDF(" ");
1203
7.50k
    writeString(">>");
1204
7.50k
}
1205
1206
bool
1207
QPDFWriter::willFilterStream(
1208
    QPDFObjectHandle stream,
1209
    bool& compress_stream,  // out only
1210
    bool& is_root_metadata, // out only
1211
    std::string* stream_data)
1212
21.9k
{
1213
21.9k
    compress_stream = false;
1214
21.9k
    is_root_metadata = false;
1215
1216
21.9k
    QPDFObjGen old_og = stream.getObjGen();
1217
21.9k
    QPDFObjectHandle stream_dict = stream.getDict();
1218
1219
21.9k
    if (stream.isRootMetadata()) {
1220
61
        is_root_metadata = true;
1221
61
    }
1222
21.9k
    bool filter = stream.isDataModified() || m->compress_streams || m->stream_decode_level;
1223
21.9k
    bool filter_on_write = stream.getFilterOnWrite();
1224
21.9k
    if (!filter_on_write) {
1225
0
        QTC::TC("qpdf", "QPDFWriter getFilterOnWrite false");
1226
0
        filter = false;
1227
0
    }
1228
21.9k
    if (filter_on_write && m->compress_streams) {
1229
        // Don't filter if the stream is already compressed with FlateDecode. This way we don't make
1230
        // it worse if the original file used a better Flate algorithm, and we don't spend time and
1231
        // CPU cycles uncompressing and recompressing stuff. This can be overridden with
1232
        // setRecompressFlate(true).
1233
21.9k
        QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
1234
21.9k
        if (!m->recompress_flate && !stream.isDataModified() && filter_obj.isName() &&
1235
21.9k
            (filter_obj.getName() == "/FlateDecode" || filter_obj.getName() == "/Fl")) {
1236
3.15k
            QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode");
1237
3.15k
            filter = false;
1238
3.15k
        }
1239
21.9k
    }
1240
21.9k
    bool normalize = false;
1241
21.9k
    bool uncompress = false;
1242
21.9k
    if (filter_on_write && is_root_metadata &&
1243
21.9k
        (!m->encryption || !m->encryption->getEncryptMetadata())) {
1244
0
        QTC::TC("qpdf", "QPDFWriter not compressing metadata");
1245
0
        filter = true;
1246
0
        compress_stream = false;
1247
0
        uncompress = true;
1248
21.9k
    } else if (filter_on_write && m->normalize_content && m->normalized_streams.contains(old_og)) {
1249
0
        normalize = true;
1250
0
        filter = true;
1251
21.9k
    } else if (filter_on_write && filter && m->compress_streams) {
1252
18.7k
        compress_stream = true;
1253
18.7k
        QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");
1254
18.7k
    }
1255
1256
    // Disable compression for empty streams to improve compatibility
1257
21.9k
    if (stream_dict.getKey("/Length").isInteger() &&
1258
21.9k
        stream_dict.getKey("/Length").getIntValue() == 0) {
1259
27
        filter = true;
1260
27
        compress_stream = false;
1261
27
    }
1262
1263
21.9k
    bool filtered = false;
1264
30.9k
    for (bool first_attempt: {true, false}) {
1265
30.9k
        PipelinePopper pp_stream_data(this);
1266
30.9k
        if (stream_data != nullptr) {
1267
30.9k
            activatePipelineStack(pp_stream_data, *stream_data);
1268
30.9k
        } else {
1269
0
            activatePipelineStack(pp_stream_data, true);
1270
0
        }
1271
30.9k
        try {
1272
30.9k
            filtered = stream.pipeStreamData(
1273
30.9k
                m->pipeline,
1274
30.9k
                !filter ? 0
1275
30.9k
                        : ((normalize ? qpdf_ef_normalize : 0) |
1276
18.7k
                           (compress_stream ? qpdf_ef_compress : 0)),
1277
30.9k
                !filter ? qpdf_dl_none : (uncompress ? qpdf_dl_all : m->stream_decode_level),
1278
30.9k
                false,
1279
30.9k
                first_attempt);
1280
30.9k
            if (filter && !filtered) {
1281
                // Try again
1282
9.04k
                filter = false;
1283
9.04k
                stream.setFilterOnWrite(false);
1284
21.9k
            } else {
1285
21.9k
                break;
1286
21.9k
            }
1287
30.9k
        } catch (std::runtime_error& e) {
1288
32
            if (filter && first_attempt) {
1289
26
                stream.warnIfPossible("error while getting stream data: "s + e.what());
1290
26
                stream.warnIfPossible("qpdf will attempt to write the damaged stream unchanged");
1291
26
                filter = false;
1292
26
                stream.setFilterOnWrite(false);
1293
26
                continue;
1294
26
            }
1295
6
            throw std::runtime_error(
1296
6
                "error while getting stream data for " + stream.unparse() + ": " + e.what());
1297
32
        }
1298
9.04k
        if (stream_data) {
1299
9.04k
            stream_data->clear();
1300
9.04k
        }
1301
9.04k
    }
1302
21.9k
    if (!filtered) {
1303
12.1k
        compress_stream = false;
1304
12.1k
    }
1305
21.9k
    return filtered;
1306
21.9k
}
1307
1308
void
1309
QPDFWriter::unparseObject(
1310
    QPDFObjectHandle object, int level, int flags, size_t stream_length, bool compress)
1311
716k
{
1312
716k
    QPDFObjGen old_og = object.getObjGen();
1313
716k
    int child_flags = flags & ~f_stream;
1314
716k
    if (level < 0) {
1315
0
        throw std::logic_error("invalid level in QPDFWriter::unparseObject");
1316
0
    }
1317
    // For non-qdf, "indent" is a single space between tokens. For qdf, indent includes the
1318
    // preceding newline.
1319
716k
    std::string indent = " ";
1320
716k
    if (m->qdf_mode) {
1321
0
        indent.append(static_cast<size_t>(2 * level), ' ');
1322
0
        indent[0] = '\n';
1323
0
    }
1324
1325
716k
    if (auto const tc = object.getTypeCode(); tc == ::ot_array) {
1326
        // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the
1327
        // [ in the /H key of the linearization parameter dictionary.  We'll do this unconditionally
1328
        // for all arrays because it looks nicer and doesn't make the files that much bigger.
1329
61.4k
        writeString("[");
1330
408k
        for (auto const& item: object.as_array()) {
1331
408k
            writeString(indent);
1332
408k
            writeStringQDF("  ");
1333
408k
            unparseChild(item, level + 1, child_flags);
1334
408k
        }
1335
61.4k
        writeString(indent);
1336
61.4k
        writeString("]");
1337
655k
    } else if (tc == ::ot_dictionary) {
1338
        // Handle special cases for specific dictionaries.
1339
1340
96.8k
        if (old_og == m->root_og) {
1341
            // Extensions dictionaries.
1342
1343
            // We have one of several cases:
1344
            //
1345
            // * We need ADBE
1346
            //    - We already have Extensions
1347
            //       - If it has the right ADBE, preserve it
1348
            //       - Otherwise, replace ADBE
1349
            //    - We don't have Extensions: create one from scratch
1350
            // * We don't want ADBE
1351
            //    - We already have Extensions
1352
            //       - If it only has ADBE, remove it
1353
            //       - If it has other things, keep those and remove ADBE
1354
            //    - We have no extensions: no action required
1355
            //
1356
            // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE
1357
            // dictionary, so we can modify in place.
1358
1359
7.48k
            auto extensions = object.getKey("/Extensions");
1360
7.48k
            const bool has_extensions = extensions.isDictionary();
1361
7.48k
            const bool need_extensions_adbe = m->final_extension_level > 0;
1362
1363
7.48k
            if (has_extensions || need_extensions_adbe) {
1364
                // Make a shallow copy of this object so we can modify it safely without affecting
1365
                // the original. This code has logic to skip certain keys in agreement with
1366
                // prepareFileForWrite and with skip_stream_parameters so that replacing them
1367
                // doesn't leave unreferenced objects in the output. We can use unsafeShallowCopy
1368
                // here because all we are doing is removing or replacing top-level keys.
1369
235
                object = object.unsafeShallowCopy();
1370
235
                if (!has_extensions) {
1371
0
                    extensions = QPDFObjectHandle();
1372
0
                }
1373
1374
235
                const bool have_extensions_adbe = extensions && extensions.hasKey("/ADBE");
1375
235
                const bool have_extensions_other =
1376
235
                    extensions && extensions.getKeys().size() > (have_extensions_adbe ? 1u : 0u);
1377
1378
235
                if (need_extensions_adbe) {
1379
28
                    if (!(have_extensions_other || have_extensions_adbe)) {
1380
                        // We need Extensions and don't have it.  Create it here.
1381
0
                        QTC::TC("qpdf", "QPDFWriter create Extensions", m->qdf_mode ? 0 : 1);
1382
0
                        extensions = object.replaceKeyAndGetNew(
1383
0
                            "/Extensions", QPDFObjectHandle::newDictionary());
1384
0
                    }
1385
207
                } else if (!have_extensions_other) {
1386
                    // We have Extensions dictionary and don't want one.
1387
66
                    if (have_extensions_adbe) {
1388
64
                        QTC::TC("qpdf", "QPDFWriter remove existing Extensions");
1389
64
                        object.removeKey("/Extensions");
1390
64
                        extensions = QPDFObjectHandle(); // uninitialized
1391
64
                    }
1392
66
                }
1393
1394
235
                if (extensions) {
1395
171
                    QTC::TC("qpdf", "QPDFWriter preserve Extensions");
1396
171
                    QPDFObjectHandle adbe = extensions.getKey("/ADBE");
1397
171
                    if (adbe.isDictionary() &&
1398
171
                        adbe.getKey("/BaseVersion").isNameAndEquals("/" + m->final_pdf_version) &&
1399
171
                        adbe.getKey("/ExtensionLevel").isInteger() &&
1400
171
                        (adbe.getKey("/ExtensionLevel").getIntValue() ==
1401
15
                         m->final_extension_level)) {
1402
11
                        QTC::TC("qpdf", "QPDFWriter preserve ADBE");
1403
160
                    } else {
1404
160
                        if (need_extensions_adbe) {
1405
17
                            extensions.replaceKey(
1406
17
                                "/ADBE",
1407
17
                                QPDFObjectHandle::parse(
1408
17
                                    "<< /BaseVersion /" + m->final_pdf_version +
1409
17
                                    " /ExtensionLevel " + std::to_string(m->final_extension_level) +
1410
17
                                    " >>"));
1411
143
                        } else {
1412
143
                            QTC::TC("qpdf", "QPDFWriter remove ADBE");
1413
143
                            extensions.removeKey("/ADBE");
1414
143
                        }
1415
160
                    }
1416
171
                }
1417
235
            }
1418
7.48k
        }
1419
1420
        // Stream dictionaries.
1421
1422
96.8k
        if (flags & f_stream) {
1423
            // Suppress /Length since we will write it manually
1424
1425
            // Make a shallow copy of this object so we can modify it safely without affecting the
1426
            // original. This code has logic to skip certain keys in agreement with
1427
            // prepareFileForWrite and with skip_stream_parameters so that replacing them doesn't
1428
            // leave unreferenced objects in the output. We can use unsafeShallowCopy here because
1429
            // all we are doing is removing or replacing top-level keys.
1430
21.9k
            object = object.unsafeShallowCopy();
1431
1432
21.9k
            object.removeKey("/Length");
1433
1434
            // If /DecodeParms is an empty list, remove it.
1435
21.9k
            if (object.getKey("/DecodeParms").isArray() &&
1436
21.9k
                (0 == object.getKey("/DecodeParms").getArrayNItems())) {
1437
1
                QTC::TC("qpdf", "QPDFWriter remove empty DecodeParms");
1438
1
                object.removeKey("/DecodeParms");
1439
1
            }
1440
1441
21.9k
            if (flags & f_filtered) {
1442
                // We will supply our own filter and decode parameters.
1443
9.79k
                object.removeKey("/Filter");
1444
9.79k
                object.removeKey("/DecodeParms");
1445
12.1k
            } else {
1446
                // Make sure, no matter what else we have, that we don't have /Crypt in the output
1447
                // filters.
1448
12.1k
                QPDFObjectHandle filter = object.getKey("/Filter");
1449
12.1k
                QPDFObjectHandle decode_parms = object.getKey("/DecodeParms");
1450
12.1k
                if (filter.isOrHasName("/Crypt")) {
1451
121
                    if (filter.isName()) {
1452
17
                        object.removeKey("/Filter");
1453
17
                        object.removeKey("/DecodeParms");
1454
104
                    } else {
1455
104
                        int idx = -1;
1456
4.93k
                        for (int i = 0; i < filter.getArrayNItems(); ++i) {
1457
4.93k
                            QPDFObjectHandle item = filter.getArrayItem(i);
1458
4.93k
                            if (item.isNameAndEquals("/Crypt")) {
1459
104
                                idx = i;
1460
104
                                break;
1461
104
                            }
1462
4.93k
                        }
1463
104
                        if (idx >= 0) {
1464
                            // If filter is an array, then the code in QPDF_Stream has already
1465
                            // verified that DecodeParms and Filters are arrays of the same length,
1466
                            // but if they weren't for some reason, eraseItem does type and bounds
1467
                            // checking.
1468
104
                            QTC::TC("qpdf", "QPDFWriter remove Crypt");
1469
104
                            filter.eraseItem(idx);
1470
104
                            decode_parms.eraseItem(idx);
1471
104
                        }
1472
104
                    }
1473
121
                }
1474
12.1k
            }
1475
21.9k
        }
1476
1477
96.8k
        writeString("<<");
1478
1479
370k
        for (auto const& [key, value]: object.as_dictionary()) {
1480
370k
            if (!value.null()) {
1481
316k
                writeString(indent);
1482
316k
                writeStringQDF("  ");
1483
316k
                writeString(Name::normalize(key));
1484
316k
                writeString(" ");
1485
316k
                if (key == "/Contents" && object.isDictionaryOfType("/Sig") &&
1486
316k
                    object.hasKey("/ByteRange")) {
1487
10
                    QTC::TC("qpdf", "QPDFWriter no encryption sig contents");
1488
10
                    unparseChild(value, level + 1, child_flags | f_hex_string | f_no_encryption);
1489
316k
                } else {
1490
316k
                    unparseChild(value, level + 1, child_flags);
1491
316k
                }
1492
316k
            }
1493
370k
        }
1494
1495
96.8k
        if (flags & f_stream) {
1496
21.8k
            writeString(indent);
1497
21.8k
            writeStringQDF("  ");
1498
21.8k
            writeString("/Length ");
1499
1500
21.8k
            if (m->direct_stream_lengths) {
1501
21.8k
                writeString(std::to_string(stream_length));
1502
21.8k
            } else {
1503
0
                writeString(std::to_string(m->cur_stream_length_id));
1504
0
                writeString(" 0 R");
1505
0
            }
1506
21.8k
            if (compress && (flags & f_filtered)) {
1507
9.70k
                writeString(indent);
1508
9.70k
                writeStringQDF("  ");
1509
9.70k
                writeString("/Filter /FlateDecode");
1510
9.70k
            }
1511
21.8k
        }
1512
1513
96.8k
        writeString(indent);
1514
96.8k
        writeString(">>");
1515
558k
    } else if (tc == ::ot_stream) {
1516
        // Write stream data to a buffer.
1517
21.9k
        if (!m->direct_stream_lengths) {
1518
0
            m->cur_stream_length_id = m->obj[old_og].renumber + 1;
1519
0
        }
1520
1521
21.9k
        flags |= f_stream;
1522
21.9k
        bool compress_stream = false;
1523
21.9k
        bool is_metadata = false;
1524
21.9k
        std::string stream_data;
1525
21.9k
        if (willFilterStream(object, compress_stream, is_metadata, &stream_data)) {
1526
9.79k
            flags |= f_filtered;
1527
9.79k
        }
1528
21.9k
        QPDFObjectHandle stream_dict = object.getDict();
1529
1530
21.9k
        m->cur_stream_length = stream_data.size();
1531
21.9k
        if (is_metadata && m->encryption && !m->encryption->getEncryptMetadata()) {
1532
            // Don't encrypt stream data for the metadata stream
1533
0
            m->cur_data_key.clear();
1534
0
        }
1535
21.9k
        adjustAESStreamLength(m->cur_stream_length);
1536
21.9k
        unparseObject(stream_dict, 0, flags, m->cur_stream_length, compress_stream);
1537
21.9k
        char last_char = stream_data.empty() ? '\0' : stream_data.back();
1538
21.9k
        writeString("\nstream\n");
1539
21.9k
        {
1540
21.9k
            PipelinePopper pp_enc(this);
1541
21.9k
            pushEncryptionFilter(pp_enc);
1542
21.9k
            writeString(stream_data);
1543
21.9k
        }
1544
1545
21.9k
        if ((m->added_newline =
1546
21.9k
                 m->newline_before_endstream || (m->qdf_mode && last_char != '\n'))) {
1547
0
            writeString("\nendstream");
1548
21.9k
        } else {
1549
21.9k
            writeString("endstream");
1550
21.9k
        }
1551
536k
    } else if (tc == ::ot_string) {
1552
18.9k
        std::string val;
1553
18.9k
        if (m->encryption && !(flags & f_in_ostream) && !(flags & f_no_encryption) &&
1554
18.9k
            !m->cur_data_key.empty()) {
1555
14.5k
            val = object.getStringValue();
1556
14.5k
            if (m->encrypt_use_aes) {
1557
0
                Pl_Buffer bufpl("encrypted string");
1558
0
                Pl_AES_PDF pl(
1559
0
                    "aes encrypt string",
1560
0
                    &bufpl,
1561
0
                    true,
1562
0
                    QUtil::unsigned_char_pointer(m->cur_data_key),
1563
0
                    m->cur_data_key.length());
1564
0
                pl.writeString(val);
1565
0
                pl.finish();
1566
0
                val = QPDF_String(bufpl.getString()).unparse(true);
1567
14.5k
            } else {
1568
14.5k
                auto tmp_ph = QUtil::make_unique_cstr(val);
1569
14.5k
                char* tmp = tmp_ph.get();
1570
14.5k
                size_t vlen = val.length();
1571
14.5k
                RC4 rc4(
1572
14.5k
                    QUtil::unsigned_char_pointer(m->cur_data_key),
1573
14.5k
                    QIntC::to_int(m->cur_data_key.length()));
1574
14.5k
                auto data = QUtil::unsigned_char_pointer(tmp);
1575
14.5k
                rc4.process(data, vlen, data);
1576
14.5k
                val = QPDF_String(std::string(tmp, vlen)).unparse();
1577
14.5k
            }
1578
14.5k
        } else if (flags & f_hex_string) {
1579
10
            val = QPDF_String(object.getStringValue()).unparse(true);
1580
4.44k
        } else {
1581
4.44k
            val = object.unparseResolved();
1582
4.44k
        }
1583
18.9k
        writeString(val);
1584
517k
    } else {
1585
517k
        writeString(object.unparseResolved());
1586
517k
    }
1587
716k
}
1588
1589
void
1590
QPDFWriter::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj)
1591
0
{
1592
0
    qpdf_assert_debug(first_obj > 0);
1593
0
    bool is_first = true;
1594
0
    auto id = std::to_string(first_obj) + ' ';
1595
0
    for (auto& offset: offsets) {
1596
0
        if (is_first) {
1597
0
            is_first = false;
1598
0
        } else {
1599
0
            writeStringQDF("\n");
1600
0
            writeStringNoQDF(" ");
1601
0
        }
1602
0
        writeString(id);
1603
0
        util::increment(id, 1);
1604
0
        writeString(std::to_string(offset));
1605
0
    }
1606
0
    writeString("\n");
1607
0
}
1608
1609
void
1610
QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1611
0
{
1612
    // Note: object might be null if this is a place-holder for an object stream that we are
1613
    // generating from scratch.
1614
1615
0
    QPDFObjGen old_og = object.getObjGen();
1616
0
    qpdf_assert_debug(old_og.getGen() == 0);
1617
0
    int old_id = old_og.getObj();
1618
0
    int new_stream_id = m->obj[old_og].renumber;
1619
1620
0
    std::vector<qpdf_offset_t> offsets;
1621
0
    qpdf_offset_t first = 0;
1622
1623
    // Generate stream itself.  We have to do this in two passes so we can calculate offsets in the
1624
    // first pass.
1625
0
    std::string stream_buffer_pass1;
1626
0
    std::string stream_buffer_pass2;
1627
0
    int first_obj = -1;
1628
0
    const bool compressed = m->compress_streams && !m->qdf_mode;
1629
0
    {
1630
        // Pass 1
1631
0
        PipelinePopper pp_ostream_pass1(this);
1632
0
        activatePipelineStack(pp_ostream_pass1, stream_buffer_pass1);
1633
1634
0
        int count = -1;
1635
0
        for (auto const& obj: m->object_stream_to_objects[old_id]) {
1636
0
            ++count;
1637
0
            int new_obj = m->obj[obj].renumber;
1638
0
            if (first_obj == -1) {
1639
0
                first_obj = new_obj;
1640
0
            }
1641
0
            if (m->qdf_mode) {
1642
0
                writeString(
1643
0
                    "%% Object stream: object " + std::to_string(new_obj) + ", index " +
1644
0
                    std::to_string(count));
1645
0
                if (!m->suppress_original_object_ids) {
1646
0
                    writeString("; original object ID: " + std::to_string(obj.getObj()));
1647
                    // For compatibility, only write the generation if non-zero.  While object
1648
                    // streams only allow objects with generation 0, if we are generating object
1649
                    // streams, the old object could have a non-zero generation.
1650
0
                    if (obj.getGen() != 0) {
1651
0
                        QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
1652
0
                        writeString(" " + std::to_string(obj.getGen()));
1653
0
                    }
1654
0
                }
1655
0
                writeString("\n");
1656
0
            }
1657
1658
0
            offsets.push_back(m->pipeline->getCount());
1659
            // To avoid double-counting objects being written in object streams for progress
1660
            // reporting, decrement in pass 1.
1661
0
            indicateProgress(true, false);
1662
1663
0
            QPDFObjectHandle obj_to_write = m->pdf.getObject(obj);
1664
0
            if (obj_to_write.isStream()) {
1665
                // This condition occurred in a fuzz input. Ideally we should block it at parse
1666
                // time, but it's not clear to me how to construct a case for this.
1667
0
                obj_to_write.warnIfPossible("stream found inside object stream; treating as null");
1668
0
                obj_to_write = QPDFObjectHandle::newNull();
1669
0
            }
1670
0
            writeObject(obj_to_write, count);
1671
1672
0
            m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count);
1673
0
        }
1674
0
    }
1675
0
    {
1676
0
        PipelinePopper pp_ostream(this);
1677
        // Adjust offsets to skip over comment before first object
1678
0
        first = offsets.at(0);
1679
0
        for (auto& iter: offsets) {
1680
0
            iter -= first;
1681
0
        }
1682
1683
        // Take one pass at writing pairs of numbers so we can get their size information
1684
0
        {
1685
0
            PipelinePopper pp_discard(this);
1686
0
            activatePipelineStack(pp_discard, true);
1687
0
            writeObjectStreamOffsets(offsets, first_obj);
1688
0
            first += m->pipeline->getCount();
1689
0
        }
1690
1691
        // Set up a stream to write the stream data into a buffer.
1692
0
        if (compressed) {
1693
0
            activatePipelineStack(
1694
0
                pp_ostream,
1695
0
                pl::create<Pl_Flate>(
1696
0
                    pl::create<pl::String>(stream_buffer_pass2), Pl_Flate::a_deflate));
1697
0
        } else {
1698
0
            activatePipelineStack(pp_ostream, stream_buffer_pass2);
1699
0
        }
1700
0
        writeObjectStreamOffsets(offsets, first_obj);
1701
0
        writeString(stream_buffer_pass1);
1702
0
        stream_buffer_pass1.clear();
1703
0
        stream_buffer_pass1.shrink_to_fit();
1704
0
    }
1705
1706
    // Write the object
1707
0
    openObject(new_stream_id);
1708
0
    setDataKey(new_stream_id);
1709
0
    writeString("<<");
1710
0
    writeStringQDF("\n ");
1711
0
    writeString(" /Type /ObjStm");
1712
0
    writeStringQDF("\n ");
1713
0
    size_t length = stream_buffer_pass2.size();
1714
0
    adjustAESStreamLength(length);
1715
0
    writeString(" /Length " + std::to_string(length));
1716
0
    writeStringQDF("\n ");
1717
0
    if (compressed) {
1718
0
        writeString(" /Filter /FlateDecode");
1719
0
    }
1720
0
    writeString(" /N " + std::to_string(offsets.size()));
1721
0
    writeStringQDF("\n ");
1722
0
    writeString(" /First " + std::to_string(first));
1723
0
    if (!object.isNull()) {
1724
        // If the original object has an /Extends key, preserve it.
1725
0
        QPDFObjectHandle dict = object.getDict();
1726
0
        QPDFObjectHandle extends = dict.getKey("/Extends");
1727
0
        if (extends.isIndirect()) {
1728
0
            QTC::TC("qpdf", "QPDFWriter copy Extends");
1729
0
            writeStringQDF("\n ");
1730
0
            writeString(" /Extends ");
1731
0
            unparseChild(extends, 1, f_in_ostream);
1732
0
        }
1733
0
    }
1734
0
    writeStringQDF("\n");
1735
0
    writeStringNoQDF(" ");
1736
0
    writeString(">>\nstream\n");
1737
0
    if (m->encryption) {
1738
0
        QTC::TC("qpdf", "QPDFWriter encrypt object stream");
1739
0
    }
1740
0
    {
1741
0
        PipelinePopper pp_enc(this);
1742
0
        pushEncryptionFilter(pp_enc);
1743
0
        writeString(stream_buffer_pass2);
1744
0
    }
1745
0
    if (m->newline_before_endstream) {
1746
0
        writeString("\n");
1747
0
    }
1748
0
    writeString("endstream");
1749
0
    m->cur_data_key.clear();
1750
0
    closeObject(new_stream_id);
1751
0
}
1752
1753
void
1754
QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
1755
80.4k
{
1756
80.4k
    QPDFObjGen old_og = object.getObjGen();
1757
1758
80.4k
    if ((object_stream_index == -1) && (old_og.getGen() == 0) &&
1759
80.4k
        (m->object_stream_to_objects.count(old_og.getObj()))) {
1760
0
        writeObjectStream(object);
1761
0
        return;
1762
0
    }
1763
1764
80.4k
    indicateProgress(false, false);
1765
80.4k
    auto new_id = m->obj[old_og].renumber;
1766
80.4k
    if (m->qdf_mode) {
1767
0
        if (m->page_object_to_seq.contains(old_og)) {
1768
0
            writeString("%% Page ");
1769
0
            writeString(std::to_string(m->page_object_to_seq[old_og]));
1770
0
            writeString("\n");
1771
0
        }
1772
0
        if (m->contents_to_page_seq.contains(old_og)) {
1773
0
            writeString("%% Contents for page ");
1774
0
            writeString(std::to_string(m->contents_to_page_seq[old_og]));
1775
0
            writeString("\n");
1776
0
        }
1777
0
    }
1778
80.4k
    if (object_stream_index == -1) {
1779
80.4k
        if (m->qdf_mode && (!m->suppress_original_object_ids)) {
1780
0
            writeString("%% Original object ID: " + object.getObjGen().unparse(' ') + "\n");
1781
0
        }
1782
80.4k
        openObject(new_id);
1783
80.4k
        setDataKey(new_id);
1784
80.4k
        unparseObject(object, 0, 0);
1785
80.4k
        m->cur_data_key.clear();
1786
80.4k
        closeObject(new_id);
1787
80.4k
    } else {
1788
0
        unparseObject(object, 0, f_in_ostream);
1789
0
        writeString("\n");
1790
0
    }
1791
1792
80.4k
    if ((!m->direct_stream_lengths) && object.isStream()) {
1793
0
        if (m->qdf_mode) {
1794
0
            if (m->added_newline) {
1795
0
                writeString("%QDF: ignore_newline\n");
1796
0
            }
1797
0
        }
1798
0
        openObject(new_id + 1);
1799
0
        writeString(std::to_string(m->cur_stream_length));
1800
0
        closeObject(new_id + 1);
1801
0
    }
1802
80.4k
}
1803
1804
std::string
1805
QPDFWriter::getOriginalID1()
1806
7.75k
{
1807
7.75k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1808
7.75k
    if (trailer.hasKey("/ID")) {
1809
1.01k
        return trailer.getKey("/ID").getArrayItem(0).getStringValue();
1810
6.73k
    } else {
1811
6.73k
        return "";
1812
6.73k
    }
1813
7.75k
}
1814
1815
void
1816
QPDFWriter::generateID()
1817
15.2k
{
1818
    // Generate the ID lazily so that we can handle the user's preference to use static or
1819
    // deterministic ID generation.
1820
1821
15.2k
    if (!m->id2.empty()) {
1822
7.50k
        return;
1823
7.50k
    }
1824
1825
7.75k
    QPDFObjectHandle trailer = m->pdf.getTrailer();
1826
1827
7.75k
    std::string result;
1828
1829
7.75k
    if (m->static_id) {
1830
        // For test suite use only...
1831
7.75k
        static unsigned char tmp[] = {
1832
7.75k
            0x31,
1833
7.75k
            0x41,
1834
7.75k
            0x59,
1835
7.75k
            0x26,
1836
7.75k
            0x53,
1837
7.75k
            0x58,
1838
7.75k
            0x97,
1839
7.75k
            0x93,
1840
7.75k
            0x23,
1841
7.75k
            0x84,
1842
7.75k
            0x62,
1843
7.75k
            0x64,
1844
7.75k
            0x33,
1845
7.75k
            0x83,
1846
7.75k
            0x27,
1847
7.75k
            0x95,
1848
7.75k
            0x00};
1849
7.75k
        result = reinterpret_cast<char*>(tmp);
1850
7.75k
    } else {
1851
        // The PDF specification has guidelines for creating IDs, but it states clearly that the
1852
        // only thing that's really important is that it is very likely to be unique.  We can't
1853
        // really follow the guidelines in the spec exactly because we haven't written the file yet.
1854
        // This scheme should be fine though.  The deterministic ID case uses a digest of a
1855
        // sufficient portion of the file's contents such no two non-matching files would match in
1856
        // the subsets used for this computation.  Note that we explicitly omit the filename from
1857
        // the digest calculation for deterministic ID so that the same file converted with qpdf, in
1858
        // that case, would have the same ID regardless of the output file's name.
1859
1860
0
        std::string seed;
1861
0
        if (m->deterministic_id) {
1862
0
            if (m->deterministic_id_data.empty()) {
1863
0
                QTC::TC("qpdf", "QPDFWriter deterministic with no data");
1864
0
                throw std::runtime_error(
1865
0
                    "INTERNAL ERROR: QPDFWriter::generateID has no data for "
1866
0
                    "deterministic ID.  This may happen if deterministic ID "
1867
0
                    "and file encryption are requested together.");
1868
0
            }
1869
0
            seed += m->deterministic_id_data;
1870
0
        } else {
1871
0
            seed += std::to_string(QUtil::get_current_time());
1872
0
            seed += m->filename;
1873
0
            seed += " ";
1874
0
        }
1875
0
        seed += " QPDF ";
1876
0
        if (trailer.hasKey("/Info")) {
1877
0
            for (auto const& item: trailer.getKey("/Info").as_dictionary()) {
1878
0
                if (item.second.isString()) {
1879
0
                    seed += " ";
1880
0
                    seed += item.second.getStringValue();
1881
0
                }
1882
0
            }
1883
0
        }
1884
1885
0
        MD5 m;
1886
0
        m.encodeString(seed.c_str());
1887
0
        MD5::Digest digest;
1888
0
        m.digest(digest);
1889
0
        result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest));
1890
0
    }
1891
1892
    // If /ID already exists, follow the spec: use the original first word and generate a new second
1893
    // word.  Otherwise, we'll use the generated ID for both.
1894
1895
7.75k
    m->id2 = result;
1896
    // Note: keep /ID from old file even if --static-id was given.
1897
7.75k
    m->id1 = getOriginalID1();
1898
7.75k
    if (m->id1.empty()) {
1899
6.79k
        m->id1 = m->id2;
1900
6.79k
    }
1901
7.75k
}
1902
1903
void
1904
QPDFWriter::initializeSpecialStreams()
1905
7.74k
{
1906
    // Mark all page content streams in case we are filtering or normalizing.
1907
7.74k
    std::vector<QPDFObjectHandle> pages = m->pdf.getAllPages();
1908
7.74k
    int num = 0;
1909
12.1k
    for (auto& page: pages) {
1910
12.1k
        m->page_object_to_seq[page.getObjGen()] = ++num;
1911
12.1k
        QPDFObjectHandle contents = page.getKey("/Contents");
1912
12.1k
        std::vector<QPDFObjGen> contents_objects;
1913
12.1k
        if (contents.isArray()) {
1914
272
            int n = contents.getArrayNItems();
1915
7.90k
            for (int i = 0; i < n; ++i) {
1916
7.63k
                contents_objects.push_back(contents.getArrayItem(i).getObjGen());
1917
7.63k
            }
1918
11.8k
        } else if (contents.isStream()) {
1919
1.92k
            contents_objects.push_back(contents.getObjGen());
1920
1.92k
        }
1921
1922
12.1k
        for (auto const& c: contents_objects) {
1923
9.56k
            m->contents_to_page_seq[c] = num;
1924
9.56k
            m->normalized_streams.insert(c);
1925
9.56k
        }
1926
12.1k
    }
1927
7.74k
}
1928
1929
void
1930
QPDFWriter::preserveObjectStreams()
1931
0
{
1932
0
    auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
1933
    // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
1934
    // streams out of old objects that have generation numbers greater than zero. However in an
1935
    // existing PDF, all object stream objects and all objects in them must have generation 0
1936
    // because the PDF spec does not provide any way to do otherwise. This code filters out objects
1937
    // that are not allowed to be in object streams. In addition to removing objects that were
1938
    // erroneously included in object streams in the source PDF, it also prevents unreferenced
1939
    // objects from being included.
1940
0
    auto end = xref.cend();
1941
0
    m->obj.streams_empty = true;
1942
0
    if (m->preserve_unreferenced_objects) {
1943
0
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
1944
0
            if (iter->second.getType() == 2) {
1945
                // Pdf contains object streams.
1946
0
                QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
1947
0
                m->obj.streams_empty = false;
1948
0
                m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
1949
0
            }
1950
0
        }
1951
0
    } else {
1952
        // Start by scanning for first compressed object in case we don't have any object streams to
1953
        // process.
1954
0
        for (auto iter = xref.cbegin(); iter != end; ++iter) {
1955
0
            if (iter->second.getType() == 2) {
1956
                // Pdf contains object streams.
1957
0
                QTC::TC("qpdf", "QPDFWriter preserve object streams");
1958
0
                m->obj.streams_empty = false;
1959
0
                auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
1960
                // The object pointed to by iter may be a previous generation, in which case it is
1961
                // removed by getCompressibleObjSet. We need to restart the loop (while the object
1962
                // table may contain multiple generations of an object).
1963
0
                for (iter = xref.cbegin(); iter != end; ++iter) {
1964
0
                    if (iter->second.getType() == 2) {
1965
0
                        auto id = static_cast<size_t>(iter->first.getObj());
1966
0
                        if (id < eligible.size() && eligible[id]) {
1967
0
                            m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
1968
0
                        } else {
1969
0
                            QTC::TC("qpdf", "QPDFWriter exclude from object stream");
1970
0
                        }
1971
0
                    }
1972
0
                }
1973
0
                return;
1974
0
            }
1975
0
        }
1976
0
    }
1977
0
}
1978
1979
void
1980
QPDFWriter::generateObjectStreams()
1981
0
{
1982
    // Basic strategy: make a list of objects that can go into an object stream.  Then figure out
1983
    // how many object streams are needed so that we can distribute objects approximately evenly
1984
    // without having any object stream exceed 100 members.  We don't have to worry about linearized
1985
    // files here -- if the file is linearized, we take care of excluding things that aren't allowed
1986
    // here later.
1987
1988
    // This code doesn't do anything with /Extends.
1989
1990
0
    std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf);
1991
0
    size_t n_object_streams = (eligible.size() + 99U) / 100U;
1992
1993
0
    initializeTables(2U * n_object_streams);
1994
0
    if (n_object_streams == 0) {
1995
0
        m->obj.streams_empty = true;
1996
0
        return;
1997
0
    }
1998
0
    size_t n_per = eligible.size() / n_object_streams;
1999
0
    if (n_per * n_object_streams < eligible.size()) {
2000
0
        ++n_per;
2001
0
    }
2002
0
    unsigned int n = 0;
2003
0
    int cur_ostream = m->pdf.newIndirectNull().getObjectID();
2004
0
    for (auto const& item: eligible) {
2005
0
        if (n == n_per) {
2006
0
            QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
2007
0
            n = 0;
2008
            // Construct a new null object as the "original" object stream.  The rest of the code
2009
            // knows that this means we're creating the object stream from scratch.
2010
0
            cur_ostream = m->pdf.newIndirectNull().getObjectID();
2011
0
        }
2012
0
        auto& obj = m->obj[item];
2013
0
        obj.object_stream = cur_ostream;
2014
0
        obj.gen = item.getGen();
2015
0
        ++n;
2016
0
    }
2017
0
}
2018
2019
QPDFObjectHandle
2020
QPDFWriter::getTrimmedTrailer()
2021
15.1k
{
2022
    // Remove keys from the trailer that necessarily have to be replaced when writing the file.
2023
2024
15.1k
    QPDFObjectHandle trailer = m->pdf.getTrailer().unsafeShallowCopy();
2025
2026
    // Remove encryption keys
2027
15.1k
    trailer.removeKey("/ID");
2028
15.1k
    trailer.removeKey("/Encrypt");
2029
2030
    // Remove modification information
2031
15.1k
    trailer.removeKey("/Prev");
2032
2033
    // Remove all trailer keys that potentially come from a cross-reference stream
2034
15.1k
    trailer.removeKey("/Index");
2035
15.1k
    trailer.removeKey("/W");
2036
15.1k
    trailer.removeKey("/Length");
2037
15.1k
    trailer.removeKey("/Filter");
2038
15.1k
    trailer.removeKey("/DecodeParms");
2039
15.1k
    trailer.removeKey("/Type");
2040
15.1k
    trailer.removeKey("/XRefStm");
2041
2042
15.1k
    return trailer;
2043
15.1k
}
2044
2045
// Make document extension level information direct as required by the spec.
2046
void
2047
QPDFWriter::prepareFileForWrite()
2048
7.66k
{
2049
7.66k
    m->pdf.fixDanglingReferences();
2050
7.66k
    auto root = m->pdf.getRoot();
2051
7.66k
    auto oh = root.getKey("/Extensions");
2052
7.66k
    if (oh.isDictionary()) {
2053
275
        const bool extensions_indirect = oh.isIndirect();
2054
275
        if (extensions_indirect) {
2055
24
            QTC::TC("qpdf", "QPDFWriter make Extensions direct");
2056
24
            oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy());
2057
24
        }
2058
275
        if (oh.hasKey("/ADBE")) {
2059
230
            auto adbe = oh.getKey("/ADBE");
2060
230
            if (adbe.isIndirect()) {
2061
142
                QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1);
2062
142
                adbe.makeDirect();
2063
142
                oh.replaceKey("/ADBE", adbe);
2064
142
            }
2065
230
        }
2066
275
    }
2067
7.66k
}
2068
2069
void
2070
QPDFWriter::initializeTables(size_t extra)
2071
7.72k
{
2072
7.72k
    auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra;
2073
7.72k
    m->obj.resize(size);
2074
7.72k
    m->new_obj.resize(size);
2075
7.72k
}
2076
2077
void
2078
QPDFWriter::doWriteSetup()
2079
7.74k
{
2080
7.74k
    if (m->did_write_setup) {
2081
0
        return;
2082
0
    }
2083
7.74k
    m->did_write_setup = true;
2084
2085
    // Do preliminary setup
2086
2087
7.74k
    if (m->linearized) {
2088
0
        m->qdf_mode = false;
2089
0
    }
2090
2091
7.74k
    if (m->pclm) {
2092
0
        m->stream_decode_level = qpdf_dl_none;
2093
0
        m->compress_streams = false;
2094
0
        m->encryption = nullptr;
2095
0
    }
2096
2097
7.74k
    if (m->qdf_mode) {
2098
0
        if (!m->normalize_content_set) {
2099
0
            m->normalize_content = true;
2100
0
        }
2101
0
        if (!m->compress_streams_set) {
2102
0
            m->compress_streams = false;
2103
0
        }
2104
0
        if (!m->stream_decode_level_set) {
2105
0
            m->stream_decode_level = qpdf_dl_generalized;
2106
0
        }
2107
0
    }
2108
2109
7.74k
    if (m->encryption) {
2110
        // Encryption has been explicitly set
2111
7.74k
        m->preserve_encryption = false;
2112
7.74k
    } else if (m->normalize_content || !m->compress_streams || m->pclm || m->qdf_mode) {
2113
        // Encryption makes looking at contents pretty useless.  If the user explicitly encrypted
2114
        // though, we still obey that.
2115
0
        m->preserve_encryption = false;
2116
0
    }
2117
2118
7.74k
    if (m->preserve_encryption) {
2119
0
        copyEncryptionParameters(m->pdf);
2120
0
    }
2121
2122
7.74k
    if (!m->forced_pdf_version.empty()) {
2123
0
        int major = 0;
2124
0
        int minor = 0;
2125
0
        parseVersion(m->forced_pdf_version, major, minor);
2126
0
        disableIncompatibleEncryption(major, minor, m->forced_extension_level);
2127
0
        if (compareVersions(major, minor, 1, 5) < 0) {
2128
0
            QTC::TC("qpdf", "QPDFWriter forcing object stream disable");
2129
0
            m->object_stream_mode = qpdf_o_disable;
2130
0
        }
2131
0
    }
2132
2133
7.74k
    if (m->qdf_mode || m->normalize_content || m->stream_decode_level) {
2134
7.74k
        initializeSpecialStreams();
2135
7.74k
    }
2136
2137
7.74k
    if (m->qdf_mode) {
2138
        // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing
2139
        // recomputed stream length data. Certain streams such as object streams, xref streams, and
2140
        // hint streams always get direct stream lengths.
2141
0
        m->direct_stream_lengths = false;
2142
0
    }
2143
2144
7.74k
    switch (m->object_stream_mode) {
2145
7.72k
    case qpdf_o_disable:
2146
7.72k
        initializeTables();
2147
7.72k
        m->obj.streams_empty = true;
2148
7.72k
        break;
2149
2150
0
    case qpdf_o_preserve:
2151
0
        initializeTables();
2152
0
        preserveObjectStreams();
2153
0
        break;
2154
2155
0
    case qpdf_o_generate:
2156
0
        generateObjectStreams();
2157
0
        break;
2158
2159
        // no default so gcc will warn for missing case tag
2160
7.74k
    }
2161
2162
7.71k
    if (!m->obj.streams_empty) {
2163
0
        if (m->linearized) {
2164
            // Page dictionaries are not allowed to be compressed objects.
2165
0
            for (auto& page: m->pdf.getAllPages()) {
2166
0
                if (m->obj[page].object_stream > 0) {
2167
0
                    QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
2168
0
                    m->obj[page].object_stream = 0;
2169
0
                }
2170
0
            }
2171
0
        }
2172
2173
0
        if (m->linearized || m->encryption) {
2174
            // The document catalog is not allowed to be compressed in linearized files either.  It
2175
            // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
2176
            // handle encrypted files with compressed document catalogs, so we disable them in that
2177
            // case as well.
2178
0
            if (m->obj[m->root_og].object_stream > 0) {
2179
0
                QTC::TC("qpdf", "QPDFWriter uncompressing root");
2180
0
                m->obj[m->root_og].object_stream = 0;
2181
0
            }
2182
0
        }
2183
2184
        // Generate reverse mapping from object stream to objects
2185
0
        m->obj.forEach([this](auto id, auto const& item) -> void {
2186
0
            if (item.object_stream > 0) {
2187
0
                auto& vec = m->object_stream_to_objects[item.object_stream];
2188
0
                vec.emplace_back(id, item.gen);
2189
0
                if (m->max_ostream_index < vec.size()) {
2190
0
                    ++m->max_ostream_index;
2191
0
                }
2192
0
            }
2193
0
        });
2194
0
        --m->max_ostream_index;
2195
2196
0
        if (m->object_stream_to_objects.empty()) {
2197
0
            m->obj.streams_empty = true;
2198
0
        } else {
2199
0
            setMinimumPDFVersion("1.5");
2200
0
        }
2201
0
    }
2202
2203
7.71k
    setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel());
2204
7.71k
    m->final_pdf_version = m->min_pdf_version;
2205
7.71k
    m->final_extension_level = m->min_extension_level;
2206
7.71k
    if (!m->forced_pdf_version.empty()) {
2207
0
        QTC::TC("qpdf", "QPDFWriter using forced PDF version");
2208
0
        m->final_pdf_version = m->forced_pdf_version;
2209
0
        m->final_extension_level = m->forced_extension_level;
2210
0
    }
2211
7.71k
}
2212
2213
void
2214
QPDFWriter::write()
2215
7.74k
{
2216
7.74k
    doWriteSetup();
2217
2218
    // Set up progress reporting. For linearized files, we write two passes. events_expected is an
2219
    // approximation, but it's good enough for progress reporting, which is mostly a guess anyway.
2220
7.74k
    m->events_expected = QIntC::to_int(m->pdf.getObjectCount() * (m->linearized ? 2 : 1));
2221
2222
7.74k
    prepareFileForWrite();
2223
2224
7.74k
    if (m->linearized) {
2225
0
        writeLinearized();
2226
7.74k
    } else {
2227
7.74k
        writeStandard();
2228
7.74k
    }
2229
2230
7.74k
    m->pipeline->finish();
2231
7.74k
    if (m->close_file) {
2232
0
        fclose(m->file);
2233
0
    }
2234
7.74k
    m->file = nullptr;
2235
7.74k
    if (m->buffer_pipeline) {
2236
0
        m->output_buffer = m->buffer_pipeline->getBuffer();
2237
0
        m->buffer_pipeline = nullptr;
2238
0
    }
2239
7.74k
    indicateProgress(false, true);
2240
7.74k
}
2241
2242
QPDFObjGen
2243
QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
2244
0
{
2245
0
    return {m->obj[og].renumber, 0};
2246
0
}
2247
2248
std::map<QPDFObjGen, QPDFXRefEntry>
2249
QPDFWriter::getWrittenXRefTable()
2250
0
{
2251
0
    std::map<QPDFObjGen, QPDFXRefEntry> result;
2252
2253
0
    auto it = result.begin();
2254
0
    m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void {
2255
0
        if (item.xref.getType() != 0) {
2256
0
            it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref);
2257
0
        }
2258
0
    });
2259
0
    return result;
2260
0
}
2261
2262
void
2263
QPDFWriter::enqueuePart(std::vector<QPDFObjectHandle>& part)
2264
0
{
2265
0
    for (auto const& oh: part) {
2266
0
        enqueueObject(oh);
2267
0
    }
2268
0
}
2269
2270
void
2271
QPDFWriter::writeEncryptionDictionary()
2272
7.50k
{
2273
7.50k
    m->encryption_dict_objid = openObject(m->encryption_dict_objid);
2274
7.50k
    auto& enc = *m->encryption;
2275
7.50k
    auto const V = enc.getV();
2276
2277
7.50k
    writeString("<<");
2278
7.50k
    if (V >= 4) {
2279
0
        writeString(" /CF << /StdCF << /AuthEvent /DocOpen /CFM ");
2280
0
        writeString(m->encrypt_use_aes ? ((V < 5) ? "/AESV2" : "/AESV3") : "/V2");
2281
        // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of
2282
        // MacOS won't open encrypted files without it.
2283
0
        writeString((V < 5) ? " /Length 16 >> >>" : " /Length 32 >> >>");
2284
0
        if (!m->encryption->getEncryptMetadata()) {
2285
0
            writeString(" /EncryptMetadata false");
2286
0
        }
2287
0
    }
2288
7.50k
    writeString(" /Filter /Standard /Length ");
2289
7.50k
    writeString(std::to_string(enc.getLengthBytes() * 8));
2290
7.50k
    writeString(" /O ");
2291
7.50k
    writeString(QPDF_String(enc.getO()).unparse(true));
2292
7.50k
    if (V >= 4) {
2293
0
        writeString(" /OE ");
2294
0
        writeString(QPDF_String(enc.getOE()).unparse(true));
2295
0
    }
2296
7.50k
    writeString(" /P ");
2297
7.50k
    writeString(std::to_string(enc.getP()));
2298
7.50k
    if (V >= 5) {
2299
0
        writeString(" /Perms ");
2300
0
        writeString(QPDF_String(enc.getPerms()).unparse(true));
2301
0
    }
2302
7.50k
    writeString(" /R ");
2303
7.50k
    writeString(std::to_string(enc.getR()));
2304
2305
7.50k
    if (V >= 4) {
2306
0
        writeString(" /StmF /StdCF /StrF /StdCF");
2307
0
    }
2308
7.50k
    writeString(" /U ");
2309
7.50k
    writeString(QPDF_String(enc.getU()).unparse(true));
2310
7.50k
    if (V >= 4) {
2311
0
        writeString(" /UE ");
2312
0
        writeString(QPDF_String(enc.getUE()).unparse(true));
2313
0
    }
2314
7.50k
    writeString(" /V ");
2315
7.50k
    writeString(std::to_string(enc.getV()));
2316
7.50k
    writeString(" >>");
2317
7.50k
    closeObject(m->encryption_dict_objid);
2318
7.50k
}
2319
2320
std::string
2321
QPDFWriter::getFinalVersion()
2322
0
{
2323
0
    doWriteSetup();
2324
0
    return m->final_pdf_version;
2325
0
}
2326
2327
void
2328
QPDFWriter::writeHeader()
2329
7.63k
{
2330
7.63k
    writeString("%PDF-");
2331
7.63k
    writeString(m->final_pdf_version);
2332
7.63k
    if (m->pclm) {
2333
        // PCLm version
2334
0
        writeString("\n%PCLm 1.0\n");
2335
7.63k
    } else {
2336
        // This string of binary characters would not be valid UTF-8, so it really should be treated
2337
        // as binary.
2338
7.63k
        writeString("\n%\xbf\xf7\xa2\xfe\n");
2339
7.63k
    }
2340
7.63k
    writeStringQDF("%QDF-1.0\n\n");
2341
2342
    // Note: do not write extra header text here.  Linearized PDFs must include the entire
2343
    // linearization parameter dictionary within the first 1024 characters of the PDF file, so for
2344
    // linearized files, we have to write extra header text after the linearization parameter
2345
    // dictionary.
2346
7.63k
}
2347
2348
void
2349
QPDFWriter::writeHintStream(int hint_id)
2350
0
{
2351
0
    std::string hint_buffer;
2352
0
    int S = 0;
2353
0
    int O = 0;
2354
0
    bool compressed = (m->compress_streams && !m->qdf_mode);
2355
0
    QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed);
2356
2357
0
    openObject(hint_id);
2358
0
    setDataKey(hint_id);
2359
2360
0
    size_t hlen = hint_buffer.size();
2361
2362
0
    writeString("<< ");
2363
0
    if (compressed) {
2364
0
        writeString("/Filter /FlateDecode ");
2365
0
    }
2366
0
    writeString("/S ");
2367
0
    writeString(std::to_string(S));
2368
0
    if (O) {
2369
0
        writeString(" /O ");
2370
0
        writeString(std::to_string(O));
2371
0
    }
2372
0
    writeString(" /Length ");
2373
0
    adjustAESStreamLength(hlen);
2374
0
    writeString(std::to_string(hlen));
2375
0
    writeString(" >>\nstream\n");
2376
2377
0
    if (m->encryption) {
2378
0
        QTC::TC("qpdf", "QPDFWriter encrypted hint stream");
2379
0
    }
2380
0
    char last_char = hint_buffer.empty() ? '\0' : hint_buffer.back();
2381
0
    {
2382
0
        PipelinePopper pp_enc(this);
2383
0
        pushEncryptionFilter(pp_enc);
2384
0
        writeString(hint_buffer);
2385
0
    }
2386
2387
0
    if (last_char != '\n') {
2388
0
        writeString("\n");
2389
0
    }
2390
0
    writeString("endstream");
2391
0
    closeObject(hint_id);
2392
0
}
2393
2394
qpdf_offset_t
2395
QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size)
2396
7.50k
{
2397
    // There are too many extra arguments to replace overloaded function with defaults in the header
2398
    // file...too much risk of leaving something off.
2399
7.50k
    return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0);
2400
7.50k
}
2401
2402
qpdf_offset_t
2403
QPDFWriter::writeXRefTable(
2404
    trailer_e which,
2405
    int first,
2406
    int last,
2407
    int size,
2408
    qpdf_offset_t prev,
2409
    bool suppress_offsets,
2410
    int hint_id,
2411
    qpdf_offset_t hint_offset,
2412
    qpdf_offset_t hint_length,
2413
    int linearization_pass)
2414
7.50k
{
2415
7.50k
    writeString("xref\n");
2416
7.50k
    writeString(std::to_string(first));
2417
7.50k
    writeString(" ");
2418
7.50k
    writeString(std::to_string(last - first + 1));
2419
7.50k
    qpdf_offset_t space_before_zero = m->pipeline->getCount();
2420
7.50k
    writeString("\n");
2421
101k
    for (int i = first; i <= last; ++i) {
2422
93.4k
        if (i == 0) {
2423
7.50k
            writeString("0000000000 65535 f \n");
2424
85.9k
        } else {
2425
85.9k
            qpdf_offset_t offset = 0;
2426
85.9k
            if (!suppress_offsets) {
2427
85.9k
                offset = m->new_obj[i].xref.getOffset();
2428
85.9k
                if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2429
0
                    offset += hint_length;
2430
0
                }
2431
85.9k
            }
2432
85.9k
            writeString(QUtil::int_to_string(offset, 10));
2433
85.9k
            writeString(" 00000 n \n");
2434
85.9k
        }
2435
93.4k
    }
2436
7.50k
    writeTrailer(which, size, false, prev, linearization_pass);
2437
7.50k
    writeString("\n");
2438
7.50k
    return space_before_zero;
2439
7.50k
}
2440
2441
qpdf_offset_t
2442
QPDFWriter::writeXRefStream(
2443
    int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size)
2444
0
{
2445
    // There are too many extra arguments to replace overloaded function with defaults in the header
2446
    // file...too much risk of leaving something off.
2447
0
    return writeXRefStream(
2448
0
        objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0);
2449
0
}
2450
2451
qpdf_offset_t
2452
QPDFWriter::writeXRefStream(
2453
    int xref_id,
2454
    int max_id,
2455
    qpdf_offset_t max_offset,
2456
    trailer_e which,
2457
    int first,
2458
    int last,
2459
    int size,
2460
    qpdf_offset_t prev,
2461
    int hint_id,
2462
    qpdf_offset_t hint_offset,
2463
    qpdf_offset_t hint_length,
2464
    bool skip_compression,
2465
    int linearization_pass)
2466
0
{
2467
0
    qpdf_offset_t xref_offset = m->pipeline->getCount();
2468
0
    qpdf_offset_t space_before_zero = xref_offset - 1;
2469
2470
    // field 1 contains offsets and object stream identifiers
2471
0
    unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id));
2472
2473
    // field 2 contains object stream indices
2474
0
    unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index));
2475
2476
0
    unsigned int esize = 1 + f1_size + f2_size;
2477
2478
    // Must store in xref table in advance of writing the actual data rather than waiting for
2479
    // openObject to do it.
2480
0
    m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2481
2482
0
    std::string xref_data;
2483
0
    const bool compressed = m->compress_streams && !m->qdf_mode;
2484
0
    {
2485
0
        PipelinePopper pp_xref(this);
2486
0
        if (compressed) {
2487
0
            m->count_buffer.clear();
2488
0
            auto link = pl::create<pl::String>(xref_data);
2489
0
            if (!skip_compression) {
2490
                // Write the stream dictionary for compression but don't actually compress.  This
2491
                // helps us with computation of padding for pass 1 of linearization.
2492
0
                link = pl::create<Pl_Flate>(std::move(link), Pl_Flate::a_deflate);
2493
0
            }
2494
0
            activatePipelineStack(
2495
0
                pp_xref, pl::create<Pl_PNGFilter>(std::move(link), Pl_PNGFilter::a_encode, esize));
2496
0
        } else {
2497
0
            activatePipelineStack(pp_xref, xref_data);
2498
0
        }
2499
2500
0
        for (int i = first; i <= last; ++i) {
2501
0
            QPDFXRefEntry& e = m->new_obj[i].xref;
2502
0
            switch (e.getType()) {
2503
0
            case 0:
2504
0
                writeBinary(0, 1);
2505
0
                writeBinary(0, f1_size);
2506
0
                writeBinary(0, f2_size);
2507
0
                break;
2508
2509
0
            case 1:
2510
0
                {
2511
0
                    qpdf_offset_t offset = e.getOffset();
2512
0
                    if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2513
0
                        offset += hint_length;
2514
0
                    }
2515
0
                    writeBinary(1, 1);
2516
0
                    writeBinary(QIntC::to_ulonglong(offset), f1_size);
2517
0
                    writeBinary(0, f2_size);
2518
0
                }
2519
0
                break;
2520
2521
0
            case 2:
2522
0
                writeBinary(2, 1);
2523
0
                writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size);
2524
0
                writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size);
2525
0
                break;
2526
2527
0
            default:
2528
0
                throw std::logic_error("invalid type writing xref stream");
2529
0
                break;
2530
0
            }
2531
0
        }
2532
0
    }
2533
2534
0
    openObject(xref_id);
2535
0
    writeString("<<");
2536
0
    writeStringQDF("\n ");
2537
0
    writeString(" /Type /XRef");
2538
0
    writeStringQDF("\n ");
2539
0
    writeString(" /Length " + std::to_string(xref_data.size()));
2540
0
    if (compressed) {
2541
0
        writeStringQDF("\n ");
2542
0
        writeString(" /Filter /FlateDecode");
2543
0
        writeStringQDF("\n ");
2544
0
        writeString(" /DecodeParms << /Columns " + std::to_string(esize) + " /Predictor 12 >>");
2545
0
    }
2546
0
    writeStringQDF("\n ");
2547
0
    writeString(" /W [ 1 " + std::to_string(f1_size) + " " + std::to_string(f2_size) + " ]");
2548
0
    if (!((first == 0) && (last == size - 1))) {
2549
0
        writeString(
2550
0
            " /Index [ " + std::to_string(first) + " " + std::to_string(last - first + 1) + " ]");
2551
0
    }
2552
0
    writeTrailer(which, size, true, prev, linearization_pass);
2553
0
    writeString("\nstream\n");
2554
0
    writeString(xref_data);
2555
0
    writeString("\nendstream");
2556
0
    closeObject(xref_id);
2557
0
    return space_before_zero;
2558
0
}
2559
2560
size_t
2561
QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
2562
0
{
2563
    // This routine is called right after a linearization first pass xref stream has been written
2564
    // without compression.  Calculate the amount of padding that would be required in the worst
2565
    // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is
2566
    // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add
2567
    // 10 extra bytes for number length increases.
2568
2569
0
    return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384)));
2570
0
}
2571
2572
void
2573
QPDFWriter::writeLinearized()
2574
0
{
2575
    // Optimize file and enqueue objects in order
2576
2577
0
    std::map<int, int> stream_cache;
2578
2579
0
    auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) {
2580
0
        auto& result = stream_cache[stream.getObjectID()];
2581
0
        if (result == 0) {
2582
0
            bool compress_stream;
2583
0
            bool is_metadata;
2584
0
            if (willFilterStream(stream, compress_stream, is_metadata, nullptr)) {
2585
0
                result = 2;
2586
0
            } else {
2587
0
                result = 1;
2588
0
            }
2589
0
        }
2590
0
        return result;
2591
0
    };
2592
2593
0
    QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters);
2594
2595
0
    std::vector<QPDFObjectHandle> part4;
2596
0
    std::vector<QPDFObjectHandle> part6;
2597
0
    std::vector<QPDFObjectHandle> part7;
2598
0
    std::vector<QPDFObjectHandle> part8;
2599
0
    std::vector<QPDFObjectHandle> part9;
2600
0
    QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9);
2601
2602
    // Object number sequence:
2603
    //
2604
    //  second half
2605
    //    second half uncompressed objects
2606
    //    second half xref stream, if any
2607
    //    second half compressed objects
2608
    //  first half
2609
    //    linearization dictionary
2610
    //    first half xref stream, if any
2611
    //    part 4 uncompresesd objects
2612
    //    encryption dictionary, if any
2613
    //    hint stream
2614
    //    part 6 uncompressed objects
2615
    //    first half compressed objects
2616
    //
2617
2618
    // Second half objects
2619
0
    int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size());
2620
0
    int second_half_first_obj = 1;
2621
0
    int after_second_half = 1 + second_half_uncompressed;
2622
0
    m->next_objid = after_second_half;
2623
0
    int second_half_xref = 0;
2624
0
    bool need_xref_stream = !m->obj.streams_empty;
2625
0
    if (need_xref_stream) {
2626
0
        second_half_xref = m->next_objid++;
2627
0
    }
2628
    // Assign numbers to all compressed objects in the second half.
2629
0
    std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9};
2630
0
    for (int i = 0; i < 3; ++i) {
2631
0
        for (auto const& oh: *vecs2[i]) {
2632
0
            assignCompressedObjectNumbers(oh.getObjGen());
2633
0
        }
2634
0
    }
2635
0
    int second_half_end = m->next_objid - 1;
2636
0
    int second_trailer_size = m->next_objid;
2637
2638
    // First half objects
2639
0
    int first_half_start = m->next_objid;
2640
0
    int lindict_id = m->next_objid++;
2641
0
    int first_half_xref = 0;
2642
0
    if (need_xref_stream) {
2643
0
        first_half_xref = m->next_objid++;
2644
0
    }
2645
0
    int part4_first_obj = m->next_objid;
2646
0
    m->next_objid += QIntC::to_int(part4.size());
2647
0
    int after_part4 = m->next_objid;
2648
0
    if (m->encryption) {
2649
0
        m->encryption_dict_objid = m->next_objid++;
2650
0
    }
2651
0
    int hint_id = m->next_objid++;
2652
0
    int part6_first_obj = m->next_objid;
2653
0
    m->next_objid += QIntC::to_int(part6.size());
2654
0
    int after_part6 = m->next_objid;
2655
    // Assign numbers to all compressed objects in the first half
2656
0
    std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6};
2657
0
    for (int i = 0; i < 2; ++i) {
2658
0
        for (auto const& oh: *vecs1[i]) {
2659
0
            assignCompressedObjectNumbers(oh.getObjGen());
2660
0
        }
2661
0
    }
2662
0
    int first_half_end = m->next_objid - 1;
2663
0
    int first_trailer_size = m->next_objid;
2664
2665
0
    int part4_end_marker = part4.back().getObjectID();
2666
0
    int part6_end_marker = part6.back().getObjectID();
2667
0
    qpdf_offset_t space_before_zero = 0;
2668
0
    qpdf_offset_t file_size = 0;
2669
0
    qpdf_offset_t part6_end_offset = 0;
2670
0
    qpdf_offset_t first_half_max_obj_offset = 0;
2671
0
    qpdf_offset_t second_xref_offset = 0;
2672
0
    qpdf_offset_t first_xref_end = 0;
2673
0
    qpdf_offset_t second_xref_end = 0;
2674
2675
0
    m->next_objid = part4_first_obj;
2676
0
    enqueuePart(part4);
2677
0
    if (m->next_objid != after_part4) {
2678
        // This can happen with very botched files as in the fuzzer test. There are likely some
2679
        // faulty assumptions in calculateLinearizationData
2680
0
        throw std::runtime_error("error encountered after writing part 4 of linearized data");
2681
0
    }
2682
0
    m->next_objid = part6_first_obj;
2683
0
    enqueuePart(part6);
2684
0
    if (m->next_objid != after_part6) {
2685
0
        throw std::runtime_error("error encountered after writing part 6 of linearized data");
2686
0
    }
2687
0
    m->next_objid = second_half_first_obj;
2688
0
    enqueuePart(part7);
2689
0
    enqueuePart(part8);
2690
0
    enqueuePart(part9);
2691
0
    if (m->next_objid != after_second_half) {
2692
0
        throw std::runtime_error("error encountered after writing part 9 of linearized data");
2693
0
    }
2694
2695
0
    qpdf_offset_t hint_length = 0;
2696
0
    std::string hint_buffer;
2697
2698
    // Write file in two passes.  Part numbers refer to PDF spec 1.4.
2699
2700
0
    FILE* lin_pass1_file = nullptr;
2701
0
    auto pp_pass1 = std::make_unique<PipelinePopper>(this);
2702
0
    auto pp_md5 = std::make_unique<PipelinePopper>(this);
2703
0
    for (int pass: {1, 2}) {
2704
0
        if (pass == 1) {
2705
0
            if (!m->lin_pass1_filename.empty()) {
2706
0
                lin_pass1_file = QUtil::safe_fopen(m->lin_pass1_filename.c_str(), "wb");
2707
0
                pushPipeline(new Pl_StdioFile("linearization pass1", lin_pass1_file));
2708
0
                activatePipelineStack(*pp_pass1);
2709
0
            } else {
2710
0
                activatePipelineStack(*pp_pass1, true);
2711
0
            }
2712
0
            if (m->deterministic_id) {
2713
0
                pushMD5Pipeline(*pp_md5);
2714
0
            }
2715
0
        }
2716
2717
        // Part 1: header
2718
2719
0
        writeHeader();
2720
2721
        // Part 2: linearization parameter dictionary.  Save enough space to write real dictionary.
2722
        // 200 characters is enough space if all numerical values in the parameter dictionary that
2723
        // contain offsets are 20 digits long plus a few extra characters for safety.  The entire
2724
        // linearization parameter dictionary must appear within the first 1024 characters of the
2725
        // file.
2726
2727
0
        qpdf_offset_t pos = m->pipeline->getCount();
2728
0
        openObject(lindict_id);
2729
0
        writeString("<<");
2730
0
        if (pass == 2) {
2731
0
            std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages();
2732
0
            int first_page_object = m->obj[pages.at(0)].renumber;
2733
0
            int npages = QIntC::to_int(pages.size());
2734
2735
0
            writeString(" /Linearized 1 /L ");
2736
0
            writeString(std::to_string(file_size + hint_length));
2737
            // Implementation note 121 states that a space is mandatory after this open bracket.
2738
0
            writeString(" /H [ ");
2739
0
            writeString(std::to_string(m->new_obj[hint_id].xref.getOffset()));
2740
0
            writeString(" ");
2741
0
            writeString(std::to_string(hint_length));
2742
0
            writeString(" ] /O ");
2743
0
            writeString(std::to_string(first_page_object));
2744
0
            writeString(" /E ");
2745
0
            writeString(std::to_string(part6_end_offset + hint_length));
2746
0
            writeString(" /N ");
2747
0
            writeString(std::to_string(npages));
2748
0
            writeString(" /T ");
2749
0
            writeString(std::to_string(space_before_zero + hint_length));
2750
0
        }
2751
0
        writeString(" >>");
2752
0
        closeObject(lindict_id);
2753
0
        static int const pad = 200;
2754
0
        writePad(QIntC::to_size(pos - m->pipeline->getCount() + pad));
2755
0
        writeString("\n");
2756
2757
        // If the user supplied any additional header text, write it here after the linearization
2758
        // parameter dictionary.
2759
0
        writeString(m->extra_header_text);
2760
2761
        // Part 3: first page cross reference table and trailer.
2762
2763
0
        qpdf_offset_t first_xref_offset = m->pipeline->getCount();
2764
0
        qpdf_offset_t hint_offset = 0;
2765
0
        if (pass == 2) {
2766
0
            hint_offset = m->new_obj[hint_id].xref.getOffset();
2767
0
        }
2768
0
        if (need_xref_stream) {
2769
            // Must pad here too.
2770
0
            if (pass == 1) {
2771
                // Set first_half_max_obj_offset to a value large enough to force four bytes to be
2772
                // reserved for each file offset.  This would provide adequate space for the xref
2773
                // stream as long as the last object in page 1 starts with in the first 4 GB of the
2774
                // file, which is extremely likely.  In the second pass, we will know the actual
2775
                // value for this, but it's okay if it's smaller.
2776
0
                first_half_max_obj_offset = 1 << 25;
2777
0
            }
2778
0
            pos = m->pipeline->getCount();
2779
0
            writeXRefStream(
2780
0
                first_half_xref,
2781
0
                first_half_end,
2782
0
                first_half_max_obj_offset,
2783
0
                t_lin_first,
2784
0
                first_half_start,
2785
0
                first_half_end,
2786
0
                first_trailer_size,
2787
0
                hint_length + second_xref_offset,
2788
0
                hint_id,
2789
0
                hint_offset,
2790
0
                hint_length,
2791
0
                (pass == 1),
2792
0
                pass);
2793
0
            qpdf_offset_t endpos = m->pipeline->getCount();
2794
0
            if (pass == 1) {
2795
                // Pad so we have enough room for the real xref stream.
2796
0
                writePad(calculateXrefStreamPadding(endpos - pos));
2797
0
                first_xref_end = m->pipeline->getCount();
2798
0
            } else {
2799
                // Pad so that the next object starts at the same place as in pass 1.
2800
0
                writePad(QIntC::to_size(first_xref_end - endpos));
2801
2802
0
                if (m->pipeline->getCount() != first_xref_end) {
2803
0
                    throw std::logic_error(
2804
0
                        "insufficient padding for first pass xref stream; "
2805
0
                        "first_xref_end=" +
2806
0
                        std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos));
2807
0
                }
2808
0
            }
2809
0
            writeString("\n");
2810
0
        } else {
2811
0
            writeXRefTable(
2812
0
                t_lin_first,
2813
0
                first_half_start,
2814
0
                first_half_end,
2815
0
                first_trailer_size,
2816
0
                hint_length + second_xref_offset,
2817
0
                (pass == 1),
2818
0
                hint_id,
2819
0
                hint_offset,
2820
0
                hint_length,
2821
0
                pass);
2822
0
            writeString("startxref\n0\n%%EOF\n");
2823
0
        }
2824
2825
        // Parts 4 through 9
2826
2827
0
        for (auto const& cur_object: m->object_queue) {
2828
0
            if (cur_object.getObjectID() == part6_end_marker) {
2829
0
                first_half_max_obj_offset = m->pipeline->getCount();
2830
0
            }
2831
0
            writeObject(cur_object);
2832
0
            if (cur_object.getObjectID() == part4_end_marker) {
2833
0
                if (m->encryption) {
2834
0
                    writeEncryptionDictionary();
2835
0
                }
2836
0
                if (pass == 1) {
2837
0
                    m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2838
0
                } else {
2839
                    // Part 5: hint stream
2840
0
                    writeString(hint_buffer);
2841
0
                }
2842
0
            }
2843
0
            if (cur_object.getObjectID() == part6_end_marker) {
2844
0
                part6_end_offset = m->pipeline->getCount();
2845
0
            }
2846
0
        }
2847
2848
        // Part 10: overflow hint stream -- not used
2849
2850
        // Part 11: main cross reference table and trailer
2851
2852
0
        second_xref_offset = m->pipeline->getCount();
2853
0
        if (need_xref_stream) {
2854
0
            pos = m->pipeline->getCount();
2855
0
            space_before_zero = writeXRefStream(
2856
0
                second_half_xref,
2857
0
                second_half_end,
2858
0
                second_xref_offset,
2859
0
                t_lin_second,
2860
0
                0,
2861
0
                second_half_end,
2862
0
                second_trailer_size,
2863
0
                0,
2864
0
                0,
2865
0
                0,
2866
0
                0,
2867
0
                (pass == 1),
2868
0
                pass);
2869
0
            qpdf_offset_t endpos = m->pipeline->getCount();
2870
2871
0
            if (pass == 1) {
2872
                // Pad so we have enough room for the real xref stream.  See comments for previous
2873
                // xref stream on how we calculate the padding.
2874
0
                writePad(calculateXrefStreamPadding(endpos - pos));
2875
0
                writeString("\n");
2876
0
                second_xref_end = m->pipeline->getCount();
2877
0
            } else {
2878
                // Make the file size the same.
2879
0
                writePad(
2880
0
                    QIntC::to_size(second_xref_end + hint_length - 1 - m->pipeline->getCount()));
2881
0
                writeString("\n");
2882
2883
                // If this assertion fails, maybe we didn't have enough padding above.
2884
0
                if (m->pipeline->getCount() != second_xref_end + hint_length) {
2885
0
                    throw std::logic_error(
2886
0
                        "count mismatch after xref stream; possible insufficient padding?");
2887
0
                }
2888
0
            }
2889
0
        } else {
2890
0
            space_before_zero = writeXRefTable(
2891
0
                t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass);
2892
0
        }
2893
0
        writeString("startxref\n");
2894
0
        writeString(std::to_string(first_xref_offset));
2895
0
        writeString("\n%%EOF\n");
2896
2897
0
        if (pass == 1) {
2898
0
            if (m->deterministic_id) {
2899
0
                QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1);
2900
0
                computeDeterministicIDData();
2901
0
                pp_md5 = nullptr;
2902
0
                qpdf_assert_debug(m->md5_pipeline == nullptr);
2903
0
            }
2904
2905
            // Close first pass pipeline
2906
0
            file_size = m->pipeline->getCount();
2907
0
            pp_pass1 = nullptr;
2908
2909
            // Save hint offset since it will be set to zero by calling openObject.
2910
0
            qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset();
2911
2912
            // Write hint stream to a buffer
2913
0
            {
2914
0
                PipelinePopper pp_hint(this);
2915
0
                activatePipelineStack(pp_hint, hint_buffer);
2916
0
                writeHintStream(hint_id);
2917
0
            }
2918
0
            hint_length = QIntC::to_offset(hint_buffer.size());
2919
2920
            // Restore hint offset
2921
0
            m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1);
2922
0
            if (lin_pass1_file) {
2923
                // Write some debugging information
2924
0
                fprintf(
2925
0
                    lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str());
2926
0
                fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str());
2927
0
                fprintf(
2928
0
                    lin_pass1_file,
2929
0
                    "%% second_xref_offset=%s\n",
2930
0
                    std::to_string(second_xref_offset).c_str());
2931
0
                fprintf(
2932
0
                    lin_pass1_file,
2933
0
                    "%% second_xref_end=%s\n",
2934
0
                    std::to_string(second_xref_end).c_str());
2935
0
                fclose(lin_pass1_file);
2936
0
                lin_pass1_file = nullptr;
2937
0
            }
2938
0
        }
2939
0
    }
2940
0
}
2941
2942
void
2943
QPDFWriter::enqueueObjectsStandard()
2944
7.63k
{
2945
7.63k
    if (m->preserve_unreferenced_objects) {
2946
0
        QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard");
2947
0
        for (auto const& oh: m->pdf.getAllObjects()) {
2948
0
            enqueueObject(oh);
2949
0
        }
2950
0
    }
2951
2952
    // Put root first on queue.
2953
7.63k
    QPDFObjectHandle trailer = getTrimmedTrailer();
2954
7.63k
    enqueueObject(trailer.getKey("/Root"));
2955
2956
    // Next place any other objects referenced from the trailer dictionary into the queue, handling
2957
    // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op.
2958
16.8k
    for (auto& item: trailer.as_dictionary()) {
2959
16.8k
        if (!item.second.null()) {
2960
14.0k
            enqueueObject(item.second);
2961
14.0k
        }
2962
16.8k
    }
2963
7.63k
}
2964
2965
void
2966
QPDFWriter::enqueueObjectsPCLm()
2967
0
{
2968
    // Image transform stream content for page strip images. Each of this new stream has to come
2969
    // after every page image strip written in the pclm file.
2970
0
    std::string image_transform_content = "q /image Do Q\n";
2971
2972
    // enqueue all pages first
2973
0
    std::vector<QPDFObjectHandle> all = m->pdf.getAllPages();
2974
0
    for (auto& page: all) {
2975
        // enqueue page
2976
0
        enqueueObject(page);
2977
2978
        // enqueue page contents stream
2979
0
        enqueueObject(page.getKey("/Contents"));
2980
2981
        // enqueue all the strips for each page
2982
0
        QPDFObjectHandle strips = page.getKey("/Resources").getKey("/XObject");
2983
0
        for (auto& image: strips.as_dictionary()) {
2984
0
            if (!image.second.null()) {
2985
0
                enqueueObject(image.second);
2986
0
                enqueueObject(QPDFObjectHandle::newStream(&m->pdf, image_transform_content));
2987
0
            }
2988
0
        }
2989
0
    }
2990
2991
    // Put root in queue.
2992
0
    QPDFObjectHandle trailer = getTrimmedTrailer();
2993
0
    enqueueObject(trailer.getKey("/Root"));
2994
0
}
2995
2996
void
2997
QPDFWriter::indicateProgress(bool decrement, bool finished)
2998
87.9k
{
2999
87.9k
    if (decrement) {
3000
0
        --m->events_seen;
3001
0
        return;
3002
0
    }
3003
3004
87.9k
    ++m->events_seen;
3005
3006
87.9k
    if (!m->progress_reporter.get()) {
3007
87.9k
        return;
3008
87.9k
    }
3009
3010
0
    if (finished || (m->events_seen >= m->next_progress_report)) {
3011
0
        int percentage =
3012
0
            (finished ? 100
3013
0
                 : m->next_progress_report == 0
3014
0
                 ? 0
3015
0
                 : std::min(99, 1 + ((100 * m->events_seen) / m->events_expected)));
3016
0
        m->progress_reporter->reportProgress(percentage);
3017
0
    }
3018
0
    int increment = std::max(1, (m->events_expected / 100));
3019
0
    while (m->events_seen >= m->next_progress_report) {
3020
0
        m->next_progress_report += increment;
3021
0
    }
3022
0
}
3023
3024
void
3025
QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr)
3026
0
{
3027
0
    m->progress_reporter = pr;
3028
0
}
3029
3030
void
3031
QPDFWriter::writeStandard()
3032
7.63k
{
3033
7.63k
    auto pp_md5 = PipelinePopper(this);
3034
7.63k
    if (m->deterministic_id) {
3035
0
        pushMD5Pipeline(pp_md5);
3036
0
    }
3037
3038
    // Start writing
3039
3040
7.63k
    writeHeader();
3041
7.63k
    writeString(m->extra_header_text);
3042
3043
7.63k
    if (m->pclm) {
3044
0
        enqueueObjectsPCLm();
3045
7.63k
    } else {
3046
7.63k
        enqueueObjectsStandard();
3047
7.63k
    }
3048
3049
    // Now start walking queue, outputting each object.
3050
88.0k
    while (m->object_queue_front < m->object_queue.size()) {
3051
80.4k
        QPDFObjectHandle cur_object = m->object_queue.at(m->object_queue_front);
3052
80.4k
        ++m->object_queue_front;
3053
80.4k
        writeObject(cur_object);
3054
80.4k
    }
3055
3056
    // Write out the encryption dictionary, if any
3057
7.63k
    if (m->encryption) {
3058
7.50k
        writeEncryptionDictionary();
3059
7.50k
    }
3060
3061
    // Now write out xref.  next_objid is now the number of objects.
3062
7.63k
    qpdf_offset_t xref_offset = m->pipeline->getCount();
3063
7.63k
    if (m->object_stream_to_objects.empty()) {
3064
        // Write regular cross-reference table
3065
7.50k
        writeXRefTable(t_normal, 0, m->next_objid - 1, m->next_objid);
3066
7.50k
    } else {
3067
        // Write cross-reference stream.
3068
130
        int xref_id = m->next_objid++;
3069
130
        writeXRefStream(
3070
130
            xref_id, xref_id, xref_offset, t_normal, 0, m->next_objid - 1, m->next_objid);
3071
130
    }
3072
7.63k
    writeString("startxref\n");
3073
7.63k
    writeString(std::to_string(xref_offset));
3074
7.63k
    writeString("\n%%EOF\n");
3075
3076
7.63k
    if (m->deterministic_id) {
3077
0
        QTC::TC(
3078
0
            "qpdf",
3079
0
            "QPDFWriter standard deterministic ID",
3080
0
            m->object_stream_to_objects.empty() ? 0 : 1);
3081
0
    }
3082
7.63k
}