/src/qpdf/libqpdf/QPDFWriter.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/assert_debug.h> |
2 | | |
3 | | #include <qpdf/qpdf-config.h> // include early for large file support |
4 | | |
5 | | #include <qpdf/QPDFWriter_private.hh> |
6 | | |
7 | | #include <qpdf/MD5.hh> |
8 | | #include <qpdf/Pl_AES_PDF.hh> |
9 | | #include <qpdf/Pl_Count.hh> |
10 | | #include <qpdf/Pl_Discard.hh> |
11 | | #include <qpdf/Pl_Flate.hh> |
12 | | #include <qpdf/Pl_MD5.hh> |
13 | | #include <qpdf/Pl_PNGFilter.hh> |
14 | | #include <qpdf/Pl_RC4.hh> |
15 | | #include <qpdf/Pl_StdioFile.hh> |
16 | | #include <qpdf/QIntC.hh> |
17 | | #include <qpdf/QPDF.hh> |
18 | | #include <qpdf/QPDFObjectHandle.hh> |
19 | | #include <qpdf/QPDF_Name.hh> |
20 | | #include <qpdf/QPDF_String.hh> |
21 | | #include <qpdf/QTC.hh> |
22 | | #include <qpdf/QUtil.hh> |
23 | | #include <qpdf/RC4.hh> |
24 | | |
25 | | #include <algorithm> |
26 | | #include <cstdlib> |
27 | | #include <stdexcept> |
28 | | |
29 | | QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default) |
30 | 0 | { |
31 | | // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
32 | 0 | } |
33 | | |
34 | | QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) : |
35 | | handler(handler) |
36 | 0 | { |
37 | 0 | } |
38 | | |
39 | | QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT |
40 | | // (modernize-use-equals-default) |
41 | 0 | { |
42 | | // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
43 | 0 | } |
44 | | |
45 | | void |
46 | | QPDFWriter::FunctionProgressReporter::reportProgress(int progress) |
47 | 0 | { |
48 | 0 | this->handler(progress); |
49 | 0 | } |
50 | | |
51 | | QPDFWriter::Members::Members(QPDF& pdf) : |
52 | | pdf(pdf), |
53 | | root_og(pdf.getRoot().getObjGen().isIndirect() ? pdf.getRoot().getObjGen() : QPDFObjGen(-1, 0)) |
54 | 34.6k | { |
55 | 34.6k | } |
56 | | |
57 | | QPDFWriter::Members::~Members() |
58 | 34.3k | { |
59 | 34.3k | if (file && close_file) { |
60 | 0 | fclose(file); |
61 | 0 | } |
62 | 34.3k | delete output_buffer; |
63 | 34.3k | } |
64 | | |
65 | | QPDFWriter::QPDFWriter(QPDF& pdf) : |
66 | | m(new Members(pdf)) |
67 | 34.6k | { |
68 | 34.6k | } |
69 | | |
70 | | QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) : |
71 | | m(new Members(pdf)) |
72 | 0 | { |
73 | 0 | setOutputFilename(filename); |
74 | 0 | } |
75 | | |
76 | | QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) : |
77 | | m(new Members(pdf)) |
78 | 0 | { |
79 | 0 | setOutputFile(description, file, close_file); |
80 | 0 | } |
81 | | |
82 | | void |
83 | | QPDFWriter::setOutputFilename(char const* filename) |
84 | 0 | { |
85 | 0 | char const* description = filename; |
86 | 0 | FILE* f = nullptr; |
87 | 0 | bool close_file = false; |
88 | 0 | if (filename == nullptr) { |
89 | 0 | description = "standard output"; |
90 | 0 | QTC::TC("qpdf", "QPDFWriter write to stdout"); |
91 | 0 | f = stdout; |
92 | 0 | QUtil::binary_stdout(); |
93 | 0 | } else { |
94 | 0 | QTC::TC("qpdf", "QPDFWriter write to file"); |
95 | 0 | f = QUtil::safe_fopen(filename, "wb+"); |
96 | 0 | close_file = true; |
97 | 0 | } |
98 | 0 | setOutputFile(description, f, close_file); |
99 | 0 | } |
100 | | |
101 | | void |
102 | | QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file) |
103 | 0 | { |
104 | 0 | m->filename = description; |
105 | 0 | m->file = file; |
106 | 0 | m->close_file = close_file; |
107 | 0 | std::shared_ptr<Pipeline> p = std::make_shared<Pl_StdioFile>("qpdf output", file); |
108 | 0 | m->to_delete.push_back(p); |
109 | 0 | initializePipelineStack(p.get()); |
110 | 0 | } |
111 | | |
112 | | void |
113 | | QPDFWriter::setOutputMemory() |
114 | 0 | { |
115 | 0 | m->filename = "memory buffer"; |
116 | 0 | m->buffer_pipeline = new Pl_Buffer("qpdf output"); |
117 | 0 | m->to_delete.push_back(std::shared_ptr<Pipeline>(m->buffer_pipeline)); |
118 | 0 | initializePipelineStack(m->buffer_pipeline); |
119 | 0 | } |
120 | | |
121 | | Buffer* |
122 | | QPDFWriter::getBuffer() |
123 | 0 | { |
124 | 0 | Buffer* result = m->output_buffer; |
125 | 0 | m->output_buffer = nullptr; |
126 | 0 | return result; |
127 | 0 | } |
128 | | |
129 | | std::shared_ptr<Buffer> |
130 | | QPDFWriter::getBufferSharedPointer() |
131 | 0 | { |
132 | 0 | return std::shared_ptr<Buffer>(getBuffer()); |
133 | 0 | } |
134 | | |
135 | | void |
136 | | QPDFWriter::setOutputPipeline(Pipeline* p) |
137 | 34.3k | { |
138 | 34.3k | m->filename = "custom pipeline"; |
139 | 34.3k | initializePipelineStack(p); |
140 | 34.3k | } |
141 | | |
142 | | void |
143 | | QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode) |
144 | 11.2k | { |
145 | 11.2k | m->object_stream_mode = mode; |
146 | 11.2k | } |
147 | | |
148 | | void |
149 | | QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode) |
150 | 0 | { |
151 | 0 | switch (mode) { |
152 | 0 | case qpdf_s_uncompress: |
153 | 0 | m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level); |
154 | 0 | m->compress_streams = false; |
155 | 0 | break; |
156 | | |
157 | 0 | case qpdf_s_preserve: |
158 | 0 | m->stream_decode_level = qpdf_dl_none; |
159 | 0 | m->compress_streams = false; |
160 | 0 | break; |
161 | | |
162 | 0 | case qpdf_s_compress: |
163 | 0 | m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level); |
164 | 0 | m->compress_streams = true; |
165 | 0 | break; |
166 | 0 | } |
167 | 0 | m->stream_decode_level_set = true; |
168 | 0 | m->compress_streams_set = true; |
169 | 0 | } |
170 | | |
171 | | void |
172 | | QPDFWriter::setCompressStreams(bool val) |
173 | 0 | { |
174 | 0 | m->compress_streams = val; |
175 | 0 | m->compress_streams_set = true; |
176 | 0 | } |
177 | | |
178 | | void |
179 | | QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val) |
180 | 34.3k | { |
181 | 34.3k | m->stream_decode_level = val; |
182 | 34.3k | m->stream_decode_level_set = true; |
183 | 34.3k | } |
184 | | |
185 | | void |
186 | | QPDFWriter::setRecompressFlate(bool val) |
187 | 0 | { |
188 | 0 | m->recompress_flate = val; |
189 | 0 | } |
190 | | |
191 | | void |
192 | | QPDFWriter::setContentNormalization(bool val) |
193 | 0 | { |
194 | 0 | m->normalize_content_set = true; |
195 | 0 | m->normalize_content = val; |
196 | 0 | } |
197 | | |
198 | | void |
199 | | QPDFWriter::setQDFMode(bool val) |
200 | 8.61k | { |
201 | 8.61k | m->qdf_mode = val; |
202 | 8.61k | } |
203 | | |
204 | | void |
205 | | QPDFWriter::setPreserveUnreferencedObjects(bool val) |
206 | 0 | { |
207 | 0 | m->preserve_unreferenced_objects = val; |
208 | 0 | } |
209 | | |
210 | | void |
211 | | QPDFWriter::setNewlineBeforeEndstream(bool val) |
212 | 0 | { |
213 | 0 | m->newline_before_endstream = val; |
214 | 0 | } |
215 | | |
216 | | void |
217 | | QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level) |
218 | 60.1k | { |
219 | 60.1k | bool set_version = false; |
220 | 60.1k | bool set_extension_level = false; |
221 | 60.1k | if (m->min_pdf_version.empty()) { |
222 | 33.2k | set_version = true; |
223 | 33.2k | set_extension_level = true; |
224 | 33.2k | } else { |
225 | 26.8k | int old_major = 0; |
226 | 26.8k | int old_minor = 0; |
227 | 26.8k | int min_major = 0; |
228 | 26.8k | int min_minor = 0; |
229 | 26.8k | parseVersion(version, old_major, old_minor); |
230 | 26.8k | parseVersion(m->min_pdf_version, min_major, min_minor); |
231 | 26.8k | int compare = compareVersions(old_major, old_minor, min_major, min_minor); |
232 | 26.8k | if (compare > 0) { |
233 | 2.41k | QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1); |
234 | 2.41k | set_version = true; |
235 | 2.41k | set_extension_level = true; |
236 | 24.4k | } else if (compare == 0) { |
237 | 2.38k | if (extension_level > m->min_extension_level) { |
238 | 8 | QTC::TC("qpdf", "QPDFWriter increasing extension level"); |
239 | 8 | set_extension_level = true; |
240 | 8 | } |
241 | 2.38k | } |
242 | 26.8k | } |
243 | | |
244 | 60.1k | if (set_version) { |
245 | 35.6k | m->min_pdf_version = version; |
246 | 35.6k | } |
247 | 60.1k | if (set_extension_level) { |
248 | 35.6k | m->min_extension_level = extension_level; |
249 | 35.6k | } |
250 | 60.1k | } |
251 | | |
252 | | void |
253 | | QPDFWriter::setMinimumPDFVersion(PDFVersion const& v) |
254 | 0 | { |
255 | 0 | std::string version; |
256 | 0 | int extension_level; |
257 | 0 | v.getVersion(version, extension_level); |
258 | 0 | setMinimumPDFVersion(version, extension_level); |
259 | 0 | } |
260 | | |
261 | | void |
262 | | QPDFWriter::forcePDFVersion(std::string const& version, int extension_level) |
263 | 0 | { |
264 | 0 | m->forced_pdf_version = version; |
265 | 0 | m->forced_extension_level = extension_level; |
266 | 0 | } |
267 | | |
268 | | void |
269 | | QPDFWriter::setExtraHeaderText(std::string const& text) |
270 | 0 | { |
271 | 0 | m->extra_header_text = text; |
272 | 0 | if ((m->extra_header_text.length() > 0) && (*(m->extra_header_text.rbegin()) != '\n')) { |
273 | 0 | QTC::TC("qpdf", "QPDFWriter extra header text add newline"); |
274 | 0 | m->extra_header_text += "\n"; |
275 | 0 | } else { |
276 | 0 | QTC::TC("qpdf", "QPDFWriter extra header text no newline"); |
277 | 0 | } |
278 | 0 | } |
279 | | |
280 | | void |
281 | | QPDFWriter::setStaticID(bool val) |
282 | 20.4k | { |
283 | 20.4k | m->static_id = val; |
284 | 20.4k | } |
285 | | |
286 | | void |
287 | | QPDFWriter::setDeterministicID(bool val) |
288 | 13.8k | { |
289 | 13.8k | m->deterministic_id = val; |
290 | 13.8k | } |
291 | | |
292 | | void |
293 | | QPDFWriter::setStaticAesIV(bool val) |
294 | 0 | { |
295 | 0 | if (val) { |
296 | 0 | Pl_AES_PDF::useStaticIV(); |
297 | 0 | } |
298 | 0 | } |
299 | | |
300 | | void |
301 | | QPDFWriter::setSuppressOriginalObjectIDs(bool val) |
302 | 0 | { |
303 | 0 | m->suppress_original_object_ids = val; |
304 | 0 | } |
305 | | |
306 | | void |
307 | | QPDFWriter::setPreserveEncryption(bool val) |
308 | 0 | { |
309 | 0 | m->preserve_encryption = val; |
310 | 0 | } |
311 | | |
312 | | void |
313 | | QPDFWriter::setLinearization(bool val) |
314 | 19.7k | { |
315 | 19.7k | m->linearized = val; |
316 | 19.7k | if (val) { |
317 | 19.7k | m->pclm = false; |
318 | 19.7k | } |
319 | 19.7k | } |
320 | | |
321 | | void |
322 | | QPDFWriter::setLinearizationPass1Filename(std::string const& filename) |
323 | 0 | { |
324 | 0 | m->lin_pass1_filename = filename; |
325 | 0 | } |
326 | | |
327 | | void |
328 | | QPDFWriter::setPCLm(bool val) |
329 | 0 | { |
330 | 0 | m->pclm = val; |
331 | 0 | if (val) { |
332 | 0 | m->linearized = false; |
333 | 0 | } |
334 | 0 | } |
335 | | |
336 | | void |
337 | | QPDFWriter::setR2EncryptionParametersInsecure( |
338 | | char const* user_password, |
339 | | char const* owner_password, |
340 | | bool allow_print, |
341 | | bool allow_modify, |
342 | | bool allow_extract, |
343 | | bool allow_annotate) |
344 | 0 | { |
345 | 0 | std::set<int> clear; |
346 | 0 | if (!allow_print) { |
347 | 0 | clear.insert(3); |
348 | 0 | } |
349 | 0 | if (!allow_modify) { |
350 | 0 | clear.insert(4); |
351 | 0 | } |
352 | 0 | if (!allow_extract) { |
353 | 0 | clear.insert(5); |
354 | 0 | } |
355 | 0 | if (!allow_annotate) { |
356 | 0 | clear.insert(6); |
357 | 0 | } |
358 | |
|
359 | 0 | setEncryptionParameters(user_password, owner_password, 1, 2, 5, clear); |
360 | 0 | } |
361 | | |
362 | | void |
363 | | QPDFWriter::setR3EncryptionParametersInsecure( |
364 | | char const* user_password, |
365 | | char const* owner_password, |
366 | | bool allow_accessibility, |
367 | | bool allow_extract, |
368 | | bool allow_assemble, |
369 | | bool allow_annotate_and_form, |
370 | | bool allow_form_filling, |
371 | | bool allow_modify_other, |
372 | | qpdf_r3_print_e print) |
373 | 5.98k | { |
374 | 5.98k | std::set<int> clear; |
375 | 5.98k | interpretR3EncryptionParameters( |
376 | 5.98k | clear, |
377 | 5.98k | user_password, |
378 | 5.98k | owner_password, |
379 | 5.98k | allow_accessibility, |
380 | 5.98k | allow_extract, |
381 | 5.98k | allow_assemble, |
382 | 5.98k | allow_annotate_and_form, |
383 | 5.98k | allow_form_filling, |
384 | 5.98k | allow_modify_other, |
385 | 5.98k | print, |
386 | 5.98k | qpdf_r3m_all); |
387 | 5.98k | setEncryptionParameters(user_password, owner_password, 2, 3, 16, clear); |
388 | 5.98k | } |
389 | | |
390 | | void |
391 | | QPDFWriter::setR4EncryptionParametersInsecure( |
392 | | char const* user_password, |
393 | | char const* owner_password, |
394 | | bool allow_accessibility, |
395 | | bool allow_extract, |
396 | | bool allow_assemble, |
397 | | bool allow_annotate_and_form, |
398 | | bool allow_form_filling, |
399 | | bool allow_modify_other, |
400 | | qpdf_r3_print_e print, |
401 | | bool encrypt_metadata, |
402 | | bool use_aes) |
403 | 0 | { |
404 | 0 | std::set<int> clear; |
405 | 0 | interpretR3EncryptionParameters( |
406 | 0 | clear, |
407 | 0 | user_password, |
408 | 0 | owner_password, |
409 | 0 | allow_accessibility, |
410 | 0 | allow_extract, |
411 | 0 | allow_assemble, |
412 | 0 | allow_annotate_and_form, |
413 | 0 | allow_form_filling, |
414 | 0 | allow_modify_other, |
415 | 0 | print, |
416 | 0 | qpdf_r3m_all); |
417 | 0 | m->encrypt_use_aes = use_aes; |
418 | 0 | m->encrypt_metadata = encrypt_metadata; |
419 | 0 | setEncryptionParameters(user_password, owner_password, 4, 4, 16, clear); |
420 | 0 | } |
421 | | |
422 | | void |
423 | | QPDFWriter::setR5EncryptionParameters( |
424 | | char const* user_password, |
425 | | char const* owner_password, |
426 | | bool allow_accessibility, |
427 | | bool allow_extract, |
428 | | bool allow_assemble, |
429 | | bool allow_annotate_and_form, |
430 | | bool allow_form_filling, |
431 | | bool allow_modify_other, |
432 | | qpdf_r3_print_e print, |
433 | | bool encrypt_metadata) |
434 | 0 | { |
435 | 0 | std::set<int> clear; |
436 | 0 | interpretR3EncryptionParameters( |
437 | 0 | clear, |
438 | 0 | user_password, |
439 | 0 | owner_password, |
440 | 0 | allow_accessibility, |
441 | 0 | allow_extract, |
442 | 0 | allow_assemble, |
443 | 0 | allow_annotate_and_form, |
444 | 0 | allow_form_filling, |
445 | 0 | allow_modify_other, |
446 | 0 | print, |
447 | 0 | qpdf_r3m_all); |
448 | 0 | m->encrypt_use_aes = true; |
449 | 0 | m->encrypt_metadata = encrypt_metadata; |
450 | 0 | setEncryptionParameters(user_password, owner_password, 5, 5, 32, clear); |
451 | 0 | } |
452 | | |
453 | | void |
454 | | QPDFWriter::setR6EncryptionParameters( |
455 | | char const* user_password, |
456 | | char const* owner_password, |
457 | | bool allow_accessibility, |
458 | | bool allow_extract, |
459 | | bool allow_assemble, |
460 | | bool allow_annotate_and_form, |
461 | | bool allow_form_filling, |
462 | | bool allow_modify_other, |
463 | | qpdf_r3_print_e print, |
464 | | bool encrypt_metadata) |
465 | 14.5k | { |
466 | 14.5k | std::set<int> clear; |
467 | 14.5k | interpretR3EncryptionParameters( |
468 | 14.5k | clear, |
469 | 14.5k | user_password, |
470 | 14.5k | owner_password, |
471 | 14.5k | allow_accessibility, |
472 | 14.5k | allow_extract, |
473 | 14.5k | allow_assemble, |
474 | 14.5k | allow_annotate_and_form, |
475 | 14.5k | allow_form_filling, |
476 | 14.5k | allow_modify_other, |
477 | 14.5k | print, |
478 | 14.5k | qpdf_r3m_all); |
479 | 14.5k | m->encrypt_use_aes = true; |
480 | 14.5k | m->encrypt_metadata = encrypt_metadata; |
481 | 14.5k | setEncryptionParameters(user_password, owner_password, 5, 6, 32, clear); |
482 | 14.5k | } |
483 | | |
484 | | void |
485 | | QPDFWriter::interpretR3EncryptionParameters( |
486 | | std::set<int>& clear, |
487 | | char const* user_password, |
488 | | char const* owner_password, |
489 | | bool allow_accessibility, |
490 | | bool allow_extract, |
491 | | bool allow_assemble, |
492 | | bool allow_annotate_and_form, |
493 | | bool allow_form_filling, |
494 | | bool allow_modify_other, |
495 | | qpdf_r3_print_e print, |
496 | | qpdf_r3_modify_e modify) |
497 | 20.4k | { |
498 | | // Acrobat 5 security options: |
499 | | |
500 | | // Checkboxes: |
501 | | // Enable Content Access for the Visually Impaired |
502 | | // Allow Content Copying and Extraction |
503 | | |
504 | | // Allowed changes menu: |
505 | | // None |
506 | | // Only Document Assembly |
507 | | // Only Form Field Fill-in or Signing |
508 | | // Comment Authoring, Form Field Fill-in or Signing |
509 | | // General Editing, Comment and Form Field Authoring |
510 | | |
511 | | // Allowed printing menu: |
512 | | // None |
513 | | // Low Resolution |
514 | | // Full printing |
515 | | |
516 | | // Meanings of bits in P when R >= 3 |
517 | | // |
518 | | // 3: low-resolution printing |
519 | | // 4: document modification except as controlled by 6, 9, and 11 |
520 | | // 5: extraction |
521 | | // 6: add/modify annotations (comment), fill in forms |
522 | | // if 4+6 are set, also allows modification of form fields |
523 | | // 9: fill in forms even if 6 is clear |
524 | | // 10: accessibility; ignored by readers, should always be set |
525 | | // 11: document assembly even if 4 is clear |
526 | | // 12: high-resolution printing |
527 | | |
528 | 20.4k | if (!allow_accessibility) { |
529 | | // setEncryptionParameters sets this if R > 3 |
530 | 0 | clear.insert(10); |
531 | 0 | } |
532 | 20.4k | if (!allow_extract) { |
533 | 0 | clear.insert(5); |
534 | 0 | } |
535 | | |
536 | | // Note: these switch statements all "fall through" (no break statements). Each option clears |
537 | | // successively more access bits. |
538 | 20.4k | switch (print) { |
539 | 0 | case qpdf_r3p_none: |
540 | 0 | clear.insert(3); // any printing |
541 | |
|
542 | 0 | case qpdf_r3p_low: |
543 | 0 | clear.insert(12); // high resolution printing |
544 | |
|
545 | 20.4k | case qpdf_r3p_full: |
546 | 20.4k | break; |
547 | | |
548 | | // no default so gcc warns for missing cases |
549 | 20.4k | } |
550 | | |
551 | | // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full |
552 | | // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're |
553 | | // stuck with it. See also allow checks below to control the bits individually. |
554 | | |
555 | | // NOT EXERCISED IN TEST SUITE |
556 | 20.4k | switch (modify) { |
557 | 0 | case qpdf_r3m_none: |
558 | 0 | clear.insert(11); // document assembly |
559 | |
|
560 | 0 | case qpdf_r3m_assembly: |
561 | 0 | clear.insert(9); // filling in form fields |
562 | |
|
563 | 0 | case qpdf_r3m_form: |
564 | 0 | clear.insert(6); // modify annotations, fill in form fields |
565 | |
|
566 | 0 | case qpdf_r3m_annotate: |
567 | 0 | clear.insert(4); // other modifications |
568 | |
|
569 | 20.4k | case qpdf_r3m_all: |
570 | 20.4k | break; |
571 | | |
572 | | // no default so gcc warns for missing cases |
573 | 20.4k | } |
574 | | // END NOT EXERCISED IN TEST SUITE |
575 | | |
576 | 20.4k | if (!allow_assemble) { |
577 | 0 | clear.insert(11); |
578 | 0 | } |
579 | 20.4k | if (!allow_annotate_and_form) { |
580 | 0 | clear.insert(6); |
581 | 0 | } |
582 | 20.4k | if (!allow_form_filling) { |
583 | 0 | clear.insert(9); |
584 | 0 | } |
585 | 20.4k | if (!allow_modify_other) { |
586 | 0 | clear.insert(4); |
587 | 0 | } |
588 | 20.4k | } |
589 | | |
590 | | void |
591 | | QPDFWriter::setEncryptionParameters( |
592 | | char const* user_password, |
593 | | char const* owner_password, |
594 | | int V, |
595 | | int R, |
596 | | int key_len, |
597 | | std::set<int>& bits_to_clear) |
598 | 20.4k | { |
599 | | // PDF specification refers to bits with the low bit numbered 1. |
600 | | // We have to convert this into a bit field. |
601 | | |
602 | | // Specification always requires bits 1 and 2 to be cleared. |
603 | 20.4k | bits_to_clear.insert(1); |
604 | 20.4k | bits_to_clear.insert(2); |
605 | | |
606 | 20.4k | if (R > 3) { |
607 | | // Bit 10 is deprecated and should always be set. This used to mean accessibility. There |
608 | | // is no way to disable accessibility with R > 3. |
609 | 14.5k | bits_to_clear.erase(10); |
610 | 14.5k | } |
611 | | |
612 | 20.4k | int P = 0; |
613 | | // Create the complement of P, then invert. |
614 | 40.9k | for (int b: bits_to_clear) { |
615 | 40.9k | P |= (1 << (b - 1)); |
616 | 40.9k | } |
617 | 20.4k | P = ~P; |
618 | | |
619 | 20.4k | generateID(); |
620 | 20.4k | std::string O; |
621 | 20.4k | std::string U; |
622 | 20.4k | std::string OE; |
623 | 20.4k | std::string UE; |
624 | 20.4k | std::string Perms; |
625 | 20.4k | std::string encryption_key; |
626 | 20.4k | if (V < 5) { |
627 | 5.97k | QPDF::compute_encryption_O_U( |
628 | 5.97k | user_password, owner_password, V, R, key_len, P, m->encrypt_metadata, m->id1, O, U); |
629 | 14.5k | } else { |
630 | 14.5k | QPDF::compute_encryption_parameters_V5( |
631 | 14.5k | user_password, |
632 | 14.5k | owner_password, |
633 | 14.5k | V, |
634 | 14.5k | R, |
635 | 14.5k | key_len, |
636 | 14.5k | P, |
637 | 14.5k | m->encrypt_metadata, |
638 | 14.5k | m->id1, |
639 | 14.5k | encryption_key, |
640 | 14.5k | O, |
641 | 14.5k | U, |
642 | 14.5k | OE, |
643 | 14.5k | UE, |
644 | 14.5k | Perms); |
645 | 14.5k | } |
646 | 20.4k | setEncryptionParametersInternal( |
647 | 20.4k | V, R, key_len, P, O, U, OE, UE, Perms, m->id1, user_password, encryption_key); |
648 | 20.4k | } |
649 | | |
650 | | void |
651 | | QPDFWriter::copyEncryptionParameters(QPDF& qpdf) |
652 | 0 | { |
653 | 0 | m->preserve_encryption = false; |
654 | 0 | QPDFObjectHandle trailer = qpdf.getTrailer(); |
655 | 0 | if (trailer.hasKey("/Encrypt")) { |
656 | 0 | generateID(); |
657 | 0 | m->id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue(); |
658 | 0 | QPDFObjectHandle encrypt = trailer.getKey("/Encrypt"); |
659 | 0 | int V = encrypt.getKey("/V").getIntValueAsInt(); |
660 | 0 | int key_len = 5; |
661 | 0 | if (V > 1) { |
662 | 0 | key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8; |
663 | 0 | } |
664 | 0 | if (encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool()) { |
665 | 0 | m->encrypt_metadata = encrypt.getKey("/EncryptMetadata").getBoolValue(); |
666 | 0 | } |
667 | 0 | if (V >= 4) { |
668 | | // When copying encryption parameters, use AES even if the original file did not. |
669 | | // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of |
670 | | // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF |
671 | | // all potentially having different values. |
672 | 0 | m->encrypt_use_aes = true; |
673 | 0 | } |
674 | 0 | QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", m->encrypt_metadata ? 0 : 1); |
675 | 0 | QTC::TC("qpdf", "QPDFWriter copy use_aes", m->encrypt_use_aes ? 0 : 1); |
676 | 0 | std::string OE; |
677 | 0 | std::string UE; |
678 | 0 | std::string Perms; |
679 | 0 | std::string encryption_key; |
680 | 0 | if (V >= 5) { |
681 | 0 | QTC::TC("qpdf", "QPDFWriter copy V5"); |
682 | 0 | OE = encrypt.getKey("/OE").getStringValue(); |
683 | 0 | UE = encrypt.getKey("/UE").getStringValue(); |
684 | 0 | Perms = encrypt.getKey("/Perms").getStringValue(); |
685 | 0 | encryption_key = qpdf.getEncryptionKey(); |
686 | 0 | } |
687 | |
|
688 | 0 | setEncryptionParametersInternal( |
689 | 0 | V, |
690 | 0 | encrypt.getKey("/R").getIntValueAsInt(), |
691 | 0 | key_len, |
692 | 0 | static_cast<int>(encrypt.getKey("/P").getIntValue()), |
693 | 0 | encrypt.getKey("/O").getStringValue(), |
694 | 0 | encrypt.getKey("/U").getStringValue(), |
695 | 0 | OE, |
696 | 0 | UE, |
697 | 0 | Perms, |
698 | 0 | m->id1, // m->id1 == the other file's id1 |
699 | 0 | qpdf.getPaddedUserPassword(), |
700 | 0 | encryption_key); |
701 | 0 | } |
702 | 0 | } |
703 | | |
704 | | void |
705 | | QPDFWriter::disableIncompatibleEncryption(int major, int minor, int extension_level) |
706 | 0 | { |
707 | 0 | if (!m->encrypted) { |
708 | 0 | return; |
709 | 0 | } |
710 | | |
711 | 0 | bool disable = false; |
712 | 0 | if (compareVersions(major, minor, 1, 3) < 0) { |
713 | 0 | disable = true; |
714 | 0 | } else { |
715 | 0 | int V = QUtil::string_to_int(m->encryption_dictionary["/V"].c_str()); |
716 | 0 | int R = QUtil::string_to_int(m->encryption_dictionary["/R"].c_str()); |
717 | 0 | if (compareVersions(major, minor, 1, 4) < 0) { |
718 | 0 | if ((V > 1) || (R > 2)) { |
719 | 0 | disable = true; |
720 | 0 | } |
721 | 0 | } else if (compareVersions(major, minor, 1, 5) < 0) { |
722 | 0 | if ((V > 2) || (R > 3)) { |
723 | 0 | disable = true; |
724 | 0 | } |
725 | 0 | } else if (compareVersions(major, minor, 1, 6) < 0) { |
726 | 0 | if (m->encrypt_use_aes) { |
727 | 0 | disable = true; |
728 | 0 | } |
729 | 0 | } else if ( |
730 | 0 | (compareVersions(major, minor, 1, 7) < 0) || |
731 | 0 | ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) { |
732 | 0 | if ((V >= 5) || (R >= 5)) { |
733 | 0 | disable = true; |
734 | 0 | } |
735 | 0 | } |
736 | 0 | } |
737 | 0 | if (disable) { |
738 | 0 | QTC::TC("qpdf", "QPDFWriter forced version disabled encryption"); |
739 | 0 | m->encrypted = false; |
740 | 0 | } |
741 | 0 | } |
742 | | |
743 | | void |
744 | | QPDFWriter::parseVersion(std::string const& version, int& major, int& minor) const |
745 | 53.4k | { |
746 | 53.4k | major = QUtil::string_to_int(version.c_str()); |
747 | 53.4k | minor = 0; |
748 | 53.4k | size_t p = version.find('.'); |
749 | 53.4k | if ((p != std::string::npos) && (version.length() > p)) { |
750 | 53.3k | minor = QUtil::string_to_int(version.substr(p + 1).c_str()); |
751 | 53.3k | } |
752 | 53.4k | std::string tmp = std::to_string(major) + "." + std::to_string(minor); |
753 | 53.4k | if (tmp != version) { |
754 | | // The version number in the input is probably invalid. This happens with some files that |
755 | | // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately |
756 | | // QPDFWriter doesn't have a way to give a warning, so we just ignore this case. |
757 | 33 | } |
758 | 53.4k | } |
759 | | |
760 | | int |
761 | | QPDFWriter::compareVersions(int major1, int minor1, int major2, int minor2) const |
762 | 26.5k | { |
763 | 26.5k | if (major1 < major2) { |
764 | 91 | return -1; |
765 | 26.4k | } else if (major1 > major2) { |
766 | 303 | return 1; |
767 | 26.1k | } else if (minor1 < minor2) { |
768 | 21.6k | return -1; |
769 | 21.6k | } else if (minor1 > minor2) { |
770 | 2.10k | return 1; |
771 | 2.38k | } else { |
772 | 2.38k | return 0; |
773 | 2.38k | } |
774 | 26.5k | } |
775 | | |
776 | | void |
777 | | QPDFWriter::setEncryptionParametersInternal( |
778 | | int V, |
779 | | int R, |
780 | | int key_len, |
781 | | int P, |
782 | | std::string const& O, |
783 | | std::string const& U, |
784 | | std::string const& OE, |
785 | | std::string const& UE, |
786 | | std::string const& Perms, |
787 | | std::string const& id1, |
788 | | std::string const& user_password, |
789 | | std::string const& encryption_key) |
790 | 19.5k | { |
791 | 19.5k | m->encryption_V = V; |
792 | 19.5k | m->encryption_R = R; |
793 | 19.5k | m->encryption_dictionary["/Filter"] = "/Standard"; |
794 | 19.5k | m->encryption_dictionary["/V"] = std::to_string(V); |
795 | 19.5k | m->encryption_dictionary["/Length"] = std::to_string(key_len * 8); |
796 | 19.5k | m->encryption_dictionary["/R"] = std::to_string(R); |
797 | 19.5k | m->encryption_dictionary["/P"] = std::to_string(P); |
798 | 19.5k | m->encryption_dictionary["/O"] = QPDF_String(O).unparse(true); |
799 | 19.5k | m->encryption_dictionary["/U"] = QPDF_String(U).unparse(true); |
800 | 19.5k | if (V >= 5) { |
801 | 13.6k | m->encryption_dictionary["/OE"] = QPDF_String(OE).unparse(true); |
802 | 13.6k | m->encryption_dictionary["/UE"] = QPDF_String(UE).unparse(true); |
803 | 13.6k | m->encryption_dictionary["/Perms"] = QPDF_String(Perms).unparse(true); |
804 | 13.6k | } |
805 | 19.5k | if (R >= 6) { |
806 | 13.6k | setMinimumPDFVersion("1.7", 8); |
807 | 13.6k | } else if (R == 5) { |
808 | 0 | setMinimumPDFVersion("1.7", 3); |
809 | 5.97k | } else if (R == 4) { |
810 | 0 | setMinimumPDFVersion(m->encrypt_use_aes ? "1.6" : "1.5"); |
811 | 5.97k | } else if (R == 3) { |
812 | 5.97k | setMinimumPDFVersion("1.4"); |
813 | 5.97k | } else { |
814 | 0 | setMinimumPDFVersion("1.3"); |
815 | 0 | } |
816 | | |
817 | 19.5k | if ((R >= 4) && (!m->encrypt_metadata)) { |
818 | 0 | m->encryption_dictionary["/EncryptMetadata"] = "false"; |
819 | 0 | } |
820 | 19.5k | if ((V == 4) || (V == 5)) { |
821 | | // The spec says the value for the crypt filter key can be anything, and xpdf seems to |
822 | | // agree. However, Adobe Reader won't open our files unless we use /StdCF. |
823 | 13.6k | m->encryption_dictionary["/StmF"] = "/StdCF"; |
824 | 13.6k | m->encryption_dictionary["/StrF"] = "/StdCF"; |
825 | 13.6k | std::string method = (m->encrypt_use_aes ? ((V < 5) ? "/AESV2" : "/AESV3") : "/V2"); |
826 | | // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of |
827 | | // MacOS won't open encrypted files without it. |
828 | 13.6k | m->encryption_dictionary["/CF"] = "<< /StdCF << /AuthEvent /DocOpen /CFM " + method + |
829 | 13.6k | " /Length " + std::string((V < 5) ? "16" : "32") + " >> >>"; |
830 | 13.6k | } |
831 | | |
832 | 19.5k | m->encrypted = true; |
833 | 19.5k | QPDF::EncryptionData encryption_data( |
834 | 19.5k | V, R, key_len, P, O, U, OE, UE, Perms, id1, m->encrypt_metadata); |
835 | 19.5k | if (V < 5) { |
836 | 5.97k | m->encryption_key = QPDF::compute_encryption_key(user_password, encryption_data); |
837 | 13.6k | } else { |
838 | 13.6k | m->encryption_key = encryption_key; |
839 | 13.6k | } |
840 | 19.5k | } |
841 | | |
842 | | void |
843 | | QPDFWriter::setDataKey(int objid) |
844 | 1.24M | { |
845 | 1.24M | m->cur_data_key = QPDF::compute_data_key( |
846 | 1.24M | m->encryption_key, objid, 0, m->encrypt_use_aes, m->encryption_V, m->encryption_R); |
847 | 1.24M | } |
848 | | |
849 | | unsigned int |
850 | | QPDFWriter::bytesNeeded(long long n) |
851 | 66.2k | { |
852 | 66.2k | unsigned int bytes = 0; |
853 | 164k | while (n) { |
854 | 98.4k | ++bytes; |
855 | 98.4k | n >>= 8; |
856 | 98.4k | } |
857 | 66.2k | return bytes; |
858 | 66.2k | } |
859 | | |
860 | | void |
861 | | QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes) |
862 | 1.97M | { |
863 | 1.97M | if (bytes > sizeof(unsigned long long)) { |
864 | 0 | throw std::logic_error("QPDFWriter::writeBinary called with too many bytes"); |
865 | 0 | } |
866 | 1.97M | unsigned char data[sizeof(unsigned long long)]; |
867 | 4.96M | for (unsigned int i = 0; i < bytes; ++i) { |
868 | 2.98M | data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff); |
869 | 2.98M | val >>= 8; |
870 | 2.98M | } |
871 | 1.97M | m->pipeline->write(data, bytes); |
872 | 1.97M | } |
873 | | |
874 | | void |
875 | | QPDFWriter::writeString(std::string_view str) |
876 | 131M | { |
877 | 131M | m->pipeline->write(reinterpret_cast<unsigned char const*>(str.data()), str.size()); |
878 | 131M | } |
879 | | |
880 | | void |
881 | | QPDFWriter::writeBuffer(std::shared_ptr<Buffer>& b) |
882 | 518k | { |
883 | 518k | m->pipeline->write(b->getBuffer(), b->getSize()); |
884 | 518k | } |
885 | | |
886 | | void |
887 | | QPDFWriter::writeStringQDF(std::string_view str) |
888 | 42.4M | { |
889 | 42.4M | if (m->qdf_mode) { |
890 | 7.06M | m->pipeline->write(reinterpret_cast<unsigned char const*>(str.data()), str.size()); |
891 | 7.06M | } |
892 | 42.4M | } |
893 | | |
894 | | void |
895 | | QPDFWriter::writeStringNoQDF(std::string_view str) |
896 | 855k | { |
897 | 855k | if (!m->qdf_mode) { |
898 | 744k | m->pipeline->write(reinterpret_cast<unsigned char const*>(str.data()), str.size()); |
899 | 744k | } |
900 | 855k | } |
901 | | |
902 | | void |
903 | | QPDFWriter::writePad(size_t nspaces) |
904 | 77.1k | { |
905 | 77.1k | writeString(std::string(nspaces, ' ')); |
906 | 77.1k | } |
907 | | |
908 | | Pipeline* |
909 | | QPDFWriter::pushPipeline(Pipeline* p) |
910 | 1.13M | { |
911 | 1.13M | qpdf_assert_debug(dynamic_cast<Pl_Count*>(p) == nullptr); |
912 | 1.13M | m->pipeline_stack.push_back(p); |
913 | 1.13M | return p; |
914 | 1.13M | } |
915 | | |
916 | | void |
917 | | QPDFWriter::initializePipelineStack(Pipeline* p) |
918 | 34.3k | { |
919 | 34.3k | m->pipeline = new Pl_Count("pipeline stack base", p); |
920 | 34.3k | m->to_delete.push_back(std::shared_ptr<Pipeline>(m->pipeline)); |
921 | 34.3k | m->pipeline_stack.push_back(m->pipeline); |
922 | 34.3k | } |
923 | | |
924 | | void |
925 | | QPDFWriter::activatePipelineStack(PipelinePopper& pp) |
926 | 1.27M | { |
927 | 1.27M | std::string stack_id("stack " + std::to_string(m->next_stack_id)); |
928 | 1.27M | auto* c = new Pl_Count(stack_id.c_str(), m->pipeline_stack.back()); |
929 | 1.27M | ++m->next_stack_id; |
930 | 1.27M | m->pipeline_stack.push_back(c); |
931 | 1.27M | m->pipeline = c; |
932 | 1.27M | pp.stack_id = stack_id; |
933 | 1.27M | } |
934 | | |
935 | | QPDFWriter::PipelinePopper::~PipelinePopper() |
936 | 1.28M | { |
937 | 1.28M | if (stack_id.empty()) { |
938 | 17.0k | return; |
939 | 17.0k | } |
940 | 1.27M | qpdf_assert_debug(qw->m->pipeline_stack.size() >= 2); |
941 | 1.27M | qw->m->pipeline->finish(); |
942 | 1.27M | qpdf_assert_debug(dynamic_cast<Pl_Count*>(qw->m->pipeline_stack.back()) == qw->m->pipeline); |
943 | | // It might be possible for this assertion to fail if writeLinearized exits by exception when |
944 | | // deterministic ID, but I don't think so. As of this writing, this is the only case in which |
945 | | // two dynamically allocated PipelinePopper objects ever exist at the same time, so the |
946 | | // assertion will fail if they get popped out of order from automatic destruction. |
947 | 1.27M | qpdf_assert_debug(qw->m->pipeline->getIdentifier() == stack_id); |
948 | 1.27M | delete qw->m->pipeline_stack.back(); |
949 | 1.27M | qw->m->pipeline_stack.pop_back(); |
950 | 2.40M | while (dynamic_cast<Pl_Count*>(qw->m->pipeline_stack.back()) == nullptr) { |
951 | 1.13M | Pipeline* p = qw->m->pipeline_stack.back(); |
952 | 1.13M | if (dynamic_cast<Pl_MD5*>(p) == qw->m->md5_pipeline) { |
953 | 956k | qw->m->md5_pipeline = nullptr; |
954 | 956k | } |
955 | 1.13M | qw->m->pipeline_stack.pop_back(); |
956 | 1.13M | auto* buf = dynamic_cast<Pl_Buffer*>(p); |
957 | 1.13M | if (bp && buf) { |
958 | 520k | *bp = buf->getBufferSharedPointer(); |
959 | 520k | } |
960 | 1.13M | delete p; |
961 | 1.13M | } |
962 | 1.27M | qw->m->pipeline = dynamic_cast<Pl_Count*>(qw->m->pipeline_stack.back()); |
963 | 1.27M | } |
964 | | |
965 | | void |
966 | | QPDFWriter::adjustAESStreamLength(size_t& length) |
967 | 482k | { |
968 | 482k | if (m->encrypted && (!m->cur_data_key.empty()) && m->encrypt_use_aes) { |
969 | | // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16. It will |
970 | | // also be prepended by 16 bits of random data. |
971 | 242k | length += 32 - (length & 0xf); |
972 | 242k | } |
973 | 482k | } |
974 | | |
975 | | void |
976 | | QPDFWriter::pushEncryptionFilter(PipelinePopper& pp) |
977 | 481k | { |
978 | 481k | if (m->encrypted && (!m->cur_data_key.empty())) { |
979 | 291k | Pipeline* p = nullptr; |
980 | 291k | if (m->encrypt_use_aes) { |
981 | 242k | p = new Pl_AES_PDF( |
982 | 242k | "aes stream encryption", |
983 | 242k | m->pipeline, |
984 | 242k | true, |
985 | 242k | QUtil::unsigned_char_pointer(m->cur_data_key), |
986 | 242k | m->cur_data_key.length()); |
987 | 242k | } else { |
988 | 49.1k | p = new Pl_RC4( |
989 | 49.1k | "rc4 stream encryption", |
990 | 49.1k | m->pipeline, |
991 | 49.1k | QUtil::unsigned_char_pointer(m->cur_data_key), |
992 | 49.1k | QIntC::to_int(m->cur_data_key.length())); |
993 | 49.1k | } |
994 | 291k | pushPipeline(p); |
995 | 291k | } |
996 | | // Must call this unconditionally so we can call popPipelineStack to balance |
997 | | // pushEncryptionFilter(). |
998 | 481k | activatePipelineStack(pp); |
999 | 481k | } |
1000 | | |
1001 | | void |
1002 | | QPDFWriter::pushDiscardFilter(PipelinePopper& pp) |
1003 | 53.8k | { |
1004 | 53.8k | pushPipeline(new Pl_Discard()); |
1005 | 53.8k | activatePipelineStack(pp); |
1006 | 53.8k | } |
1007 | | |
1008 | | void |
1009 | | QPDFWriter::pushMD5Pipeline(PipelinePopper& pp) |
1010 | 12.9k | { |
1011 | 12.9k | if (!m->id2.empty()) { |
1012 | | // Can't happen in the code |
1013 | 0 | throw std::logic_error("Deterministic ID computation enabled after ID" |
1014 | 0 | " generation has already occurred."); |
1015 | 0 | } |
1016 | 12.9k | qpdf_assert_debug(m->deterministic_id); |
1017 | 12.9k | qpdf_assert_debug(m->md5_pipeline == nullptr); |
1018 | 12.9k | qpdf_assert_debug(m->pipeline->getCount() == 0); |
1019 | 12.9k | m->md5_pipeline = new Pl_MD5("qpdf md5", m->pipeline); |
1020 | 12.9k | m->md5_pipeline->persistAcrossFinish(true); |
1021 | | // Special case code in popPipelineStack clears m->md5_pipeline upon deletion. |
1022 | 12.9k | pushPipeline(m->md5_pipeline); |
1023 | 12.9k | activatePipelineStack(pp); |
1024 | 12.9k | } |
1025 | | |
1026 | | void |
1027 | | QPDFWriter::computeDeterministicIDData() |
1028 | 11.9k | { |
1029 | 11.9k | qpdf_assert_debug(m->md5_pipeline != nullptr); |
1030 | 11.9k | qpdf_assert_debug(m->deterministic_id_data.empty()); |
1031 | 11.9k | m->deterministic_id_data = m->md5_pipeline->getHexDigest(); |
1032 | 11.9k | m->md5_pipeline->enable(false); |
1033 | 11.9k | } |
1034 | | |
1035 | | int |
1036 | | QPDFWriter::openObject(int objid) |
1037 | 1.40M | { |
1038 | 1.40M | if (objid == 0) { |
1039 | 5.75k | objid = m->next_objid++; |
1040 | 5.75k | } |
1041 | 1.40M | m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount()); |
1042 | 1.40M | writeString(std::to_string(objid)); |
1043 | 1.40M | writeString(" 0 obj\n"); |
1044 | 1.40M | return objid; |
1045 | 1.40M | } |
1046 | | |
1047 | | void |
1048 | | QPDFWriter::closeObject(int objid) |
1049 | 1.40M | { |
1050 | | // Write a newline before endobj as it makes the file easier to repair. |
1051 | 1.40M | writeString("\nendobj\n"); |
1052 | 1.40M | writeStringQDF("\n"); |
1053 | 1.40M | auto& new_obj = m->new_obj[objid]; |
1054 | 1.40M | new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset(); |
1055 | 1.40M | } |
1056 | | |
1057 | | void |
1058 | | QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const& og) |
1059 | 457k | { |
1060 | 457k | int objid = og.getObj(); |
1061 | 457k | if ((og.getGen() != 0) || (m->object_stream_to_objects.count(objid) == 0)) { |
1062 | | // This is not an object stream. |
1063 | 445k | return; |
1064 | 445k | } |
1065 | | |
1066 | | // Reserve numbers for the objects that belong to this object stream. |
1067 | 190k | for (auto const& iter: m->object_stream_to_objects[objid]) { |
1068 | 190k | m->obj[iter].renumber = m->next_objid++; |
1069 | 190k | } |
1070 | 11.7k | } |
1071 | | |
1072 | | void |
1073 | | QPDFWriter::enqueueObject(QPDFObjectHandle object) |
1074 | 51.9M | { |
1075 | 51.9M | if (object.isIndirect()) { |
1076 | | // This owner check can only be done for indirect objects. It is possible for a direct |
1077 | | // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle from |
1078 | | // one file was insert into another file without copying. Doing that is safe even if the |
1079 | | // original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from its owner. |
1080 | 3.23M | if (object.getOwningQPDF() != &(m->pdf)) { |
1081 | 0 | QTC::TC("qpdf", "QPDFWriter foreign object"); |
1082 | 0 | throw std::logic_error("QPDFObjectHandle from different QPDF found while writing. Use " |
1083 | 0 | "QPDF::copyForeignObject to add objects from another file."); |
1084 | 0 | } |
1085 | | |
1086 | 3.23M | if (m->qdf_mode && object.isStreamOfType("/XRef")) { |
1087 | | // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so |
1088 | | // will confuse fix-qdf, which expects to see only one XRef stream at the end of the |
1089 | | // file. This case can occur when creating a QDF from a file with object streams when |
1090 | | // preserving unreferenced objects since the old cross reference streams are not |
1091 | | // actually referenced by object number. |
1092 | 3.06k | QTC::TC("qpdf", "QPDFWriter ignore XRef in qdf mode"); |
1093 | 3.06k | return; |
1094 | 3.06k | } |
1095 | | |
1096 | 3.23M | QPDFObjGen og = object.getObjGen(); |
1097 | 3.23M | auto& obj = m->obj[og]; |
1098 | | |
1099 | 3.23M | if (obj.renumber == 0) { |
1100 | 832k | if (obj.object_stream > 0) { |
1101 | | // This is in an object stream. Don't process it here. Instead, enqueue the object |
1102 | | // stream. Object streams always have generation 0. |
1103 | | // Detect loops by storing invalid object ID -1, which will get overwritten later. |
1104 | 3.36k | obj.renumber = -1; |
1105 | 3.36k | enqueueObject(m->pdf.getObject(obj.object_stream, 0)); |
1106 | 829k | } else { |
1107 | 829k | m->object_queue.push_back(object); |
1108 | 829k | obj.renumber = m->next_objid++; |
1109 | | |
1110 | 829k | if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) { |
1111 | | // For linearized files, uncompressed objects go at end, and we take care of |
1112 | | // assigning numbers to them elsewhere. |
1113 | 11.7k | if (!m->linearized) { |
1114 | 2.10k | assignCompressedObjectNumbers(og); |
1115 | 2.10k | } |
1116 | 817k | } else if ((!m->direct_stream_lengths) && object.isStream()) { |
1117 | | // reserve next object ID for length |
1118 | 72.7k | ++m->next_objid; |
1119 | 72.7k | } |
1120 | 829k | } |
1121 | 2.39M | } else if (obj.renumber == -1) { |
1122 | | // This can happen if a specially constructed file indicates that an object stream is |
1123 | | // inside itself. |
1124 | 3.75k | } |
1125 | 3.23M | return; |
1126 | 48.6M | } else if (!m->linearized) { |
1127 | 48.6M | if (object.isArray()) { |
1128 | 29.8M | for (auto& item: object.getArrayAsVector()) { |
1129 | 29.8M | enqueueObject(item); |
1130 | 29.8M | } |
1131 | 47.7M | } else if (object.isDictionary()) { |
1132 | 7.14M | for (auto& item: object.getDictAsMap()) { |
1133 | 7.14M | if (!item.second.isNull()) { |
1134 | 6.74M | enqueueObject(item.second); |
1135 | 6.74M | } |
1136 | 7.14M | } |
1137 | 2.06M | } |
1138 | 48.6M | } else { |
1139 | | // ignore |
1140 | 373 | } |
1141 | 51.9M | } |
1142 | | |
1143 | | void |
1144 | | QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags) |
1145 | 39.1M | { |
1146 | 39.1M | if (!m->linearized) { |
1147 | 14.7M | enqueueObject(child); |
1148 | 14.7M | } |
1149 | 39.1M | if (child.isIndirect()) { |
1150 | 4.61M | writeString(std::to_string(m->obj[child].renumber)); |
1151 | 4.61M | writeString(" 0 R"); |
1152 | 34.5M | } else { |
1153 | 34.5M | unparseObject(child, level, flags); |
1154 | 34.5M | } |
1155 | 39.1M | } |
1156 | | |
1157 | | void |
1158 | | QPDFWriter::writeTrailer( |
1159 | | trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass) |
1160 | 74.7k | { |
1161 | 74.7k | QPDFObjectHandle trailer = getTrimmedTrailer(); |
1162 | 74.7k | if (xref_stream) { |
1163 | 22.0k | m->cur_data_key.clear(); |
1164 | 52.6k | } else { |
1165 | 52.6k | writeString("trailer <<"); |
1166 | 52.6k | } |
1167 | 74.7k | writeStringQDF("\n"); |
1168 | 74.7k | if (which == t_lin_second) { |
1169 | 29.9k | writeString(" /Size "); |
1170 | 29.9k | writeString(std::to_string(size)); |
1171 | 44.7k | } else { |
1172 | 178k | for (auto const& key: trailer.getKeys()) { |
1173 | 178k | writeStringQDF(" "); |
1174 | 178k | writeStringNoQDF(" "); |
1175 | 178k | writeString(QPDF_Name::normalizeName(key)); |
1176 | 178k | writeString(" "); |
1177 | 178k | if (key == "/Size") { |
1178 | 36.7k | writeString(std::to_string(size)); |
1179 | 36.7k | if (which == t_lin_first) { |
1180 | 25.5k | writeString(" /Prev "); |
1181 | 25.5k | qpdf_offset_t pos = m->pipeline->getCount(); |
1182 | 25.5k | writeString(std::to_string(prev)); |
1183 | 25.5k | writePad(QIntC::to_size(pos - m->pipeline->getCount() + 21)); |
1184 | 25.5k | } |
1185 | 142k | } else { |
1186 | 142k | unparseChild(trailer.getKey(key), 1, 0); |
1187 | 142k | } |
1188 | 178k | writeStringQDF("\n"); |
1189 | 178k | } |
1190 | 44.7k | } |
1191 | | |
1192 | | // Write ID |
1193 | 74.7k | writeStringQDF(" "); |
1194 | 74.7k | writeString(" /ID ["); |
1195 | 74.7k | if (linearization_pass == 1) { |
1196 | 30.9k | std::string original_id1 = getOriginalID1(); |
1197 | 30.9k | if (original_id1.empty()) { |
1198 | 10.8k | writeString("<00000000000000000000000000000000>"); |
1199 | 20.0k | } else { |
1200 | | // Write a string of zeroes equal in length to the representation of the original ID. |
1201 | | // While writing the original ID would have the same number of bytes, it would cause a |
1202 | | // change to the deterministic ID generated by older versions of the software that |
1203 | | // hard-coded the length of the ID to 16 bytes. |
1204 | 20.0k | writeString("<"); |
1205 | 20.0k | size_t len = QPDF_String(original_id1).unparse(true).length() - 2; |
1206 | 1.45M | for (size_t i = 0; i < len; ++i) { |
1207 | 1.43M | writeString("0"); |
1208 | 1.43M | } |
1209 | 20.0k | writeString(">"); |
1210 | 20.0k | } |
1211 | 30.9k | writeString("<00000000000000000000000000000000>"); |
1212 | 43.7k | } else { |
1213 | 43.7k | if ((linearization_pass == 0) && (m->deterministic_id)) { |
1214 | 8.07k | computeDeterministicIDData(); |
1215 | 8.07k | } |
1216 | 43.7k | generateID(); |
1217 | 43.7k | writeString(QPDF_String(m->id1).unparse(true)); |
1218 | 43.7k | writeString(QPDF_String(m->id2).unparse(true)); |
1219 | 43.7k | } |
1220 | 74.7k | writeString("]"); |
1221 | | |
1222 | 74.7k | if (which != t_lin_second) { |
1223 | | // Write reference to encryption dictionary |
1224 | 43.1k | if (m->encrypted) { |
1225 | 28.1k | writeString(" /Encrypt "); |
1226 | 28.1k | writeString(std::to_string(m->encryption_dict_objid)); |
1227 | 28.1k | writeString(" 0 R"); |
1228 | 28.1k | } |
1229 | 43.1k | } |
1230 | | |
1231 | 74.7k | writeStringQDF("\n"); |
1232 | 74.7k | writeStringNoQDF(" "); |
1233 | 74.7k | writeString(">>"); |
1234 | 74.7k | } |
1235 | | |
1236 | | bool |
1237 | | QPDFWriter::willFilterStream( |
1238 | | QPDFObjectHandle stream, |
1239 | | bool& compress_stream, // out only |
1240 | | bool& is_metadata, // out only |
1241 | | std::shared_ptr<Buffer>* stream_data) |
1242 | 633k | { |
1243 | 633k | compress_stream = false; |
1244 | 633k | is_metadata = false; |
1245 | | |
1246 | 633k | QPDFObjGen old_og = stream.getObjGen(); |
1247 | 633k | QPDFObjectHandle stream_dict = stream.getDict(); |
1248 | | |
1249 | 633k | if (stream_dict.isDictionaryOfType("/Metadata")) { |
1250 | 14.6k | is_metadata = true; |
1251 | 14.6k | } |
1252 | 633k | bool filter = (stream.isDataModified() || m->compress_streams || m->stream_decode_level); |
1253 | 633k | bool filter_on_write = stream.getFilterOnWrite(); |
1254 | 633k | if (!filter_on_write) { |
1255 | 27.9k | QTC::TC("qpdf", "QPDFWriter getFilterOnWrite false"); |
1256 | 27.9k | filter = false; |
1257 | 27.9k | } |
1258 | 633k | if (filter_on_write && m->compress_streams) { |
1259 | | // Don't filter if the stream is already compressed with FlateDecode. This way we don't make |
1260 | | // it worse if the original file used a better Flate algorithm, and we don't spend time and |
1261 | | // CPU cycles uncompressing and recompressing stuff. This can be overridden with |
1262 | | // setRecompressFlate(true). |
1263 | 534k | QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter"); |
1264 | 534k | if ((!m->recompress_flate) && (!stream.isDataModified()) && filter_obj.isName() && |
1265 | 534k | ((filter_obj.getName() == "/FlateDecode") || (filter_obj.getName() == "/Fl"))) { |
1266 | 240k | QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode"); |
1267 | 240k | filter = false; |
1268 | 240k | } |
1269 | 534k | } |
1270 | 633k | bool normalize = false; |
1271 | 633k | bool uncompress = false; |
1272 | 633k | if (filter_on_write && is_metadata && ((!m->encrypted) || (m->encrypt_metadata == false))) { |
1273 | 5.50k | QTC::TC("qpdf", "QPDFWriter not compressing metadata"); |
1274 | 5.50k | filter = true; |
1275 | 5.50k | compress_stream = false; |
1276 | 5.50k | uncompress = true; |
1277 | 628k | } else if (filter_on_write && m->normalize_content && m->normalized_streams.count(old_og)) { |
1278 | 15.6k | normalize = true; |
1279 | 15.6k | filter = true; |
1280 | 612k | } else if (filter_on_write && filter && m->compress_streams) { |
1281 | 289k | compress_stream = true; |
1282 | 289k | QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream"); |
1283 | 289k | } |
1284 | | |
1285 | 633k | bool filtered = false; |
1286 | 667k | for (int attempt = 1; attempt <= 2; ++attempt) { |
1287 | 667k | pushPipeline(new Pl_Buffer("stream data")); |
1288 | 667k | PipelinePopper pp_stream_data(this, stream_data); |
1289 | 667k | activatePipelineStack(pp_stream_data); |
1290 | 667k | try { |
1291 | 667k | filtered = stream.pipeStreamData( |
1292 | 667k | m->pipeline, |
1293 | 667k | (((filter && normalize) ? qpdf_ef_normalize : 0) | |
1294 | 667k | ((filter && compress_stream) ? qpdf_ef_compress : 0)), |
1295 | 667k | (filter ? (uncompress ? qpdf_dl_all : m->stream_decode_level) : qpdf_dl_none), |
1296 | 667k | false, |
1297 | 667k | (attempt == 1)); |
1298 | 667k | } catch (std::runtime_error& e) { |
1299 | 455 | throw std::runtime_error( |
1300 | 455 | "error while getting stream data for " + stream.unparse() + ": " + e.what()); |
1301 | 455 | } |
1302 | 667k | if (filter && !filtered) { |
1303 | | // Try again |
1304 | 34.2k | filter = false; |
1305 | 34.2k | stream.setFilterOnWrite(false); |
1306 | 633k | } else { |
1307 | 633k | break; |
1308 | 633k | } |
1309 | 667k | } |
1310 | 633k | if (!filtered) { |
1311 | 303k | compress_stream = false; |
1312 | 303k | } |
1313 | 633k | return filtered; |
1314 | 633k | } |
1315 | | |
1316 | | void |
1317 | | QPDFWriter::unparseObject( |
1318 | | QPDFObjectHandle object, int level, int flags, size_t stream_length, bool compress) |
1319 | 36.8M | { |
1320 | 36.8M | QPDFObjGen old_og = object.getObjGen(); |
1321 | 36.8M | int child_flags = flags & ~f_stream; |
1322 | 36.8M | if (level < 0) { |
1323 | 0 | throw std::logic_error("invalid level in QPDFWriter::unparseObject"); |
1324 | 0 | } |
1325 | | // For non-qdf, "indent" is a single space between tokens. For qdf, indent includes the |
1326 | | // preceding newline. |
1327 | 36.8M | std::string indent = " "; |
1328 | 36.8M | if (m->qdf_mode) { |
1329 | 6.11M | indent.append(static_cast<size_t>(2 * level), ' '); |
1330 | 6.11M | indent[0] = '\n'; |
1331 | 6.11M | } |
1332 | | |
1333 | 36.8M | if (auto const tc = object.getTypeCode(); tc == ::ot_array) { |
1334 | | // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the |
1335 | | // [ in the /H key of the linearization parameter dictionary. We'll do this unconditionally |
1336 | | // for all arrays because it looks nicer and doesn't make the files that much bigger. |
1337 | 1.39M | writeString("["); |
1338 | 28.7M | for (auto const& item: object.getArrayAsVector()) { |
1339 | 28.7M | writeString(indent); |
1340 | 28.7M | writeStringQDF(" "); |
1341 | 28.7M | unparseChild(item, level + 1, child_flags); |
1342 | 28.7M | } |
1343 | 1.39M | writeString(indent); |
1344 | 1.39M | writeString("]"); |
1345 | 35.4M | } else if (tc == ::ot_dictionary) { |
1346 | | // Make a shallow copy of this object so we can modify it safely without affecting the |
1347 | | // original. This code has logic to skip certain keys in agreement with prepareFileForWrite |
1348 | | // and with skip_stream_parameters so that replacing them doesn't leave unreferenced objects |
1349 | | // in the output. We can use unsafeShallowCopy here because all we are doing is removing or |
1350 | | // replacing top-level keys. |
1351 | 2.85M | object = object.unsafeShallowCopy(); |
1352 | | |
1353 | | // Handle special cases for specific dictionaries. |
1354 | | |
1355 | | // Extensions dictionaries. |
1356 | | |
1357 | | // We have one of several cases: |
1358 | | // |
1359 | | // * We need ADBE |
1360 | | // - We already have Extensions |
1361 | | // - If it has the right ADBE, preserve it |
1362 | | // - Otherwise, replace ADBE |
1363 | | // - We don't have Extensions: create one from scratch |
1364 | | // * We don't want ADBE |
1365 | | // - We already have Extensions |
1366 | | // - If it only has ADBE, remove it |
1367 | | // - If it has other things, keep those and remove ADBE |
1368 | | // - We have no extensions: no action required |
1369 | | // |
1370 | | // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE |
1371 | | // dictionary, so we can modify in place. |
1372 | | |
1373 | 2.85M | const bool is_root = (old_og == m->root_og); |
1374 | 2.85M | bool have_extensions_other = false; |
1375 | 2.85M | bool have_extensions_adbe = false; |
1376 | | |
1377 | 2.85M | QPDFObjectHandle extensions; |
1378 | 2.85M | if (is_root) { |
1379 | 43.9k | if (object.hasKey("/Extensions") && object.getKey("/Extensions").isDictionary()) { |
1380 | 844 | extensions = object.getKey("/Extensions"); |
1381 | 844 | } |
1382 | 43.9k | } |
1383 | | |
1384 | 2.85M | if (extensions.isInitialized()) { |
1385 | 844 | std::set<std::string> keys = extensions.getKeys(); |
1386 | 844 | if (keys.count("/ADBE") > 0) { |
1387 | 782 | have_extensions_adbe = true; |
1388 | 782 | keys.erase("/ADBE"); |
1389 | 782 | } |
1390 | 844 | if (keys.size() > 0) { |
1391 | 130 | have_extensions_other = true; |
1392 | 130 | } |
1393 | 844 | } |
1394 | | |
1395 | 2.85M | bool need_extensions_adbe = (m->final_extension_level > 0); |
1396 | | |
1397 | 2.85M | if (is_root) { |
1398 | 43.9k | if (need_extensions_adbe) { |
1399 | 22.2k | if (!(have_extensions_other || have_extensions_adbe)) { |
1400 | | // We need Extensions and don't have it. Create it here. |
1401 | 21.5k | QTC::TC("qpdf", "QPDFWriter create Extensions", m->qdf_mode ? 0 : 1); |
1402 | 21.5k | extensions = object.replaceKeyAndGetNew( |
1403 | 21.5k | "/Extensions", QPDFObjectHandle::newDictionary()); |
1404 | 21.5k | } |
1405 | 22.2k | } else if (!have_extensions_other) { |
1406 | | // We have Extensions dictionary and don't want one. |
1407 | 21.6k | if (have_extensions_adbe) { |
1408 | 105 | QTC::TC("qpdf", "QPDFWriter remove existing Extensions"); |
1409 | 105 | object.removeKey("/Extensions"); |
1410 | 105 | extensions = QPDFObjectHandle(); // uninitialized |
1411 | 105 | } |
1412 | 21.6k | } |
1413 | 43.9k | } |
1414 | | |
1415 | 2.85M | if (extensions.isInitialized()) { |
1416 | 22.2k | QTC::TC("qpdf", "QPDFWriter preserve Extensions"); |
1417 | 22.2k | QPDFObjectHandle adbe = extensions.getKey("/ADBE"); |
1418 | 22.2k | if (adbe.isDictionary() && |
1419 | 22.2k | adbe.getKey("/BaseVersion").isNameAndEquals("/" + m->final_pdf_version) && |
1420 | 22.2k | adbe.getKey("/ExtensionLevel").isInteger() && |
1421 | 22.2k | (adbe.getKey("/ExtensionLevel").getIntValue() == m->final_extension_level)) { |
1422 | 420 | QTC::TC("qpdf", "QPDFWriter preserve ADBE"); |
1423 | 21.8k | } else { |
1424 | 21.8k | if (need_extensions_adbe) { |
1425 | 21.7k | extensions.replaceKey( |
1426 | 21.7k | "/ADBE", |
1427 | 21.7k | QPDFObjectHandle::parse( |
1428 | 21.7k | "<< /BaseVersion /" + m->final_pdf_version + " /ExtensionLevel " + |
1429 | 21.7k | std::to_string(m->final_extension_level) + " >>")); |
1430 | 21.7k | } else { |
1431 | 48 | QTC::TC("qpdf", "QPDFWriter remove ADBE"); |
1432 | 48 | extensions.removeKey("/ADBE"); |
1433 | 48 | } |
1434 | 21.8k | } |
1435 | 22.2k | } |
1436 | | |
1437 | | // Stream dictionaries. |
1438 | | |
1439 | 2.85M | if (flags & f_stream) { |
1440 | | // Suppress /Length since we will write it manually |
1441 | 448k | object.removeKey("/Length"); |
1442 | | |
1443 | | // If /DecodeParms is an empty list, remove it. |
1444 | 448k | if (object.getKey("/DecodeParms").isArray() && |
1445 | 448k | (0 == object.getKey("/DecodeParms").getArrayNItems())) { |
1446 | 239 | QTC::TC("qpdf", "QPDFWriter remove empty DecodeParms"); |
1447 | 239 | object.removeKey("/DecodeParms"); |
1448 | 239 | } |
1449 | | |
1450 | 448k | if (flags & f_filtered) { |
1451 | | // We will supply our own filter and decode |
1452 | | // parameters. |
1453 | 240k | object.removeKey("/Filter"); |
1454 | 240k | object.removeKey("/DecodeParms"); |
1455 | 240k | } else { |
1456 | | // Make sure, no matter what else we have, that we don't have /Crypt in the output |
1457 | | // filters. |
1458 | 207k | QPDFObjectHandle filter = object.getKey("/Filter"); |
1459 | 207k | QPDFObjectHandle decode_parms = object.getKey("/DecodeParms"); |
1460 | 207k | if (filter.isOrHasName("/Crypt")) { |
1461 | 79 | if (filter.isName()) { |
1462 | 15 | object.removeKey("/Filter"); |
1463 | 15 | object.removeKey("/DecodeParms"); |
1464 | 64 | } else { |
1465 | 64 | int idx = -1; |
1466 | 143 | for (int i = 0; i < filter.getArrayNItems(); ++i) { |
1467 | 143 | QPDFObjectHandle item = filter.getArrayItem(i); |
1468 | 143 | if (item.isNameAndEquals("/Crypt")) { |
1469 | 64 | idx = i; |
1470 | 64 | break; |
1471 | 64 | } |
1472 | 143 | } |
1473 | 64 | if (idx >= 0) { |
1474 | | // If filter is an array, then the code in QPDF_Stream has already |
1475 | | // verified that DecodeParms and Filters are arrays of the same length, |
1476 | | // but if they weren't for some reason, eraseItem does type and bounds |
1477 | | // checking. |
1478 | 64 | QTC::TC("qpdf", "QPDFWriter remove Crypt"); |
1479 | 64 | filter.eraseItem(idx); |
1480 | 64 | decode_parms.eraseItem(idx); |
1481 | 64 | } |
1482 | 64 | } |
1483 | 79 | } |
1484 | 207k | } |
1485 | 448k | } |
1486 | | |
1487 | 2.85M | writeString("<<"); |
1488 | | |
1489 | 10.8M | for (auto& item: object.getDictAsMap()) { |
1490 | 10.8M | if (!item.second.isNull()) { |
1491 | 10.2M | auto const& key = item.first; |
1492 | 10.2M | writeString(indent); |
1493 | 10.2M | writeStringQDF(" "); |
1494 | 10.2M | writeString(QPDF_Name::normalizeName(key)); |
1495 | 10.2M | writeString(" "); |
1496 | 10.2M | if (key == "/Contents" && object.isDictionaryOfType("/Sig") && |
1497 | 10.2M | object.hasKey("/ByteRange")) { |
1498 | 111 | QTC::TC("qpdf", "QPDFWriter no encryption sig contents"); |
1499 | 111 | unparseChild( |
1500 | 111 | item.second, level + 1, child_flags | f_hex_string | f_no_encryption); |
1501 | 10.2M | } else { |
1502 | 10.2M | unparseChild(item.second, level + 1, child_flags); |
1503 | 10.2M | } |
1504 | 10.2M | } |
1505 | 10.8M | } |
1506 | | |
1507 | 2.85M | if (flags & f_stream) { |
1508 | 448k | writeString(indent); |
1509 | 448k | writeStringQDF(" "); |
1510 | 448k | writeString("/Length "); |
1511 | | |
1512 | 448k | if (m->direct_stream_lengths) { |
1513 | 377k | writeString(std::to_string(stream_length)); |
1514 | 377k | } else { |
1515 | 70.8k | writeString(std::to_string(m->cur_stream_length_id)); |
1516 | 70.8k | writeString(" 0 R"); |
1517 | 70.8k | } |
1518 | 448k | if (compress && (flags & f_filtered)) { |
1519 | 177k | writeString(indent); |
1520 | 177k | writeStringQDF(" "); |
1521 | 177k | writeString("/Filter /FlateDecode"); |
1522 | 177k | } |
1523 | 448k | } |
1524 | | |
1525 | 2.85M | writeString(indent); |
1526 | 2.85M | writeString(">>"); |
1527 | 32.5M | } else if (tc == ::ot_stream) { |
1528 | | // Write stream data to a buffer. |
1529 | 448k | if (!m->direct_stream_lengths) { |
1530 | 70.9k | m->cur_stream_length_id = m->obj[old_og].renumber + 1; |
1531 | 70.9k | } |
1532 | | |
1533 | 448k | flags |= f_stream; |
1534 | 448k | bool compress_stream = false; |
1535 | 448k | bool is_metadata = false; |
1536 | 448k | std::shared_ptr<Buffer> stream_data; |
1537 | 448k | if (willFilterStream(object, compress_stream, is_metadata, &stream_data)) { |
1538 | 240k | flags |= f_filtered; |
1539 | 240k | } |
1540 | 448k | QPDFObjectHandle stream_dict = object.getDict(); |
1541 | | |
1542 | 448k | m->cur_stream_length = stream_data->getSize(); |
1543 | 448k | if (is_metadata && m->encrypted && (!m->encrypt_metadata)) { |
1544 | | // Don't encrypt stream data for the metadata stream |
1545 | 0 | m->cur_data_key.clear(); |
1546 | 0 | } |
1547 | 448k | adjustAESStreamLength(m->cur_stream_length); |
1548 | 448k | unparseObject(stream_dict, 0, flags, m->cur_stream_length, compress_stream); |
1549 | 448k | unsigned char last_char = '\0'; |
1550 | 448k | writeString("\nstream\n"); |
1551 | 448k | { |
1552 | 448k | PipelinePopper pp_enc(this); |
1553 | 448k | pushEncryptionFilter(pp_enc); |
1554 | 448k | writeBuffer(stream_data); |
1555 | 448k | last_char = m->pipeline->getLastChar(); |
1556 | 448k | } |
1557 | | |
1558 | 448k | if (m->newline_before_endstream || (m->qdf_mode && (last_char != '\n'))) { |
1559 | 27.1k | writeString("\n"); |
1560 | 27.1k | m->added_newline = true; |
1561 | 421k | } else { |
1562 | 421k | m->added_newline = false; |
1563 | 421k | } |
1564 | 448k | writeString("endstream"); |
1565 | 32.1M | } else if (tc == ::ot_string) { |
1566 | 1.06M | std::string val; |
1567 | 1.06M | if (m->encrypted && (!(flags & f_in_ostream)) && (!(flags & f_no_encryption)) && |
1568 | 1.06M | (!m->cur_data_key.empty())) { |
1569 | 570k | val = object.getStringValue(); |
1570 | 570k | if (m->encrypt_use_aes) { |
1571 | 472k | Pl_Buffer bufpl("encrypted string"); |
1572 | 472k | Pl_AES_PDF pl( |
1573 | 472k | "aes encrypt string", |
1574 | 472k | &bufpl, |
1575 | 472k | true, |
1576 | 472k | QUtil::unsigned_char_pointer(m->cur_data_key), |
1577 | 472k | m->cur_data_key.length()); |
1578 | 472k | pl.writeString(val); |
1579 | 472k | pl.finish(); |
1580 | 472k | val = QPDF_String(bufpl.getString()).unparse(true); |
1581 | 472k | } else { |
1582 | 98.4k | auto tmp_ph = QUtil::make_unique_cstr(val); |
1583 | 98.4k | char* tmp = tmp_ph.get(); |
1584 | 98.4k | size_t vlen = val.length(); |
1585 | 98.4k | RC4 rc4( |
1586 | 98.4k | QUtil::unsigned_char_pointer(m->cur_data_key), |
1587 | 98.4k | QIntC::to_int(m->cur_data_key.length())); |
1588 | 98.4k | auto data = QUtil::unsigned_char_pointer(tmp); |
1589 | 98.4k | rc4.process(data, vlen, data); |
1590 | 98.4k | val = QPDF_String(std::string(tmp, vlen)).unparse(); |
1591 | 98.4k | } |
1592 | 570k | } else if (flags & f_hex_string) { |
1593 | 111 | val = QPDF_String(object.getStringValue()).unparse(true); |
1594 | 496k | } else { |
1595 | 496k | val = object.unparseResolved(); |
1596 | 496k | } |
1597 | 1.06M | writeString(val); |
1598 | 31.0M | } else { |
1599 | 31.0M | writeString(object.unparseResolved()); |
1600 | 31.0M | } |
1601 | 36.8M | } |
1602 | | |
1603 | | void |
1604 | | QPDFWriter::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj) |
1605 | 37.9k | { |
1606 | 660k | for (size_t i = 0; i < offsets.size(); ++i) { |
1607 | 622k | if (i != 0) { |
1608 | 585k | writeStringQDF("\n"); |
1609 | 585k | writeStringNoQDF(" "); |
1610 | 585k | } |
1611 | 622k | writeString(std::to_string(i + QIntC::to_size(first_obj))); |
1612 | 622k | writeString(" "); |
1613 | 622k | writeString(std::to_string(offsets.at(i))); |
1614 | 622k | } |
1615 | 37.9k | writeString("\n"); |
1616 | 37.9k | } |
1617 | | |
1618 | | void |
1619 | | QPDFWriter::writeObjectStream(QPDFObjectHandle object) |
1620 | 18.9k | { |
1621 | | // Note: object might be null if this is a place-holder for an object stream that we are |
1622 | | // generating from scratch. |
1623 | | |
1624 | 18.9k | QPDFObjGen old_og = object.getObjGen(); |
1625 | 18.9k | qpdf_assert_debug(old_og.getGen() == 0); |
1626 | 18.9k | int old_id = old_og.getObj(); |
1627 | 18.9k | int new_stream_id = m->obj[old_og].renumber; |
1628 | | |
1629 | 18.9k | std::vector<qpdf_offset_t> offsets; |
1630 | 18.9k | qpdf_offset_t first = 0; |
1631 | | |
1632 | | // Generate stream itself. We have to do this in two passes so we can calculate offsets in the |
1633 | | // first pass. |
1634 | 18.9k | std::shared_ptr<Buffer> stream_buffer; |
1635 | 18.9k | int first_obj = -1; |
1636 | 18.9k | bool compressed = false; |
1637 | 56.8k | for (int pass = 1; pass <= 2; ++pass) { |
1638 | | // stream_buffer will be initialized only for pass 2 |
1639 | 37.9k | PipelinePopper pp_ostream(this, &stream_buffer); |
1640 | 37.9k | if (pass == 1) { |
1641 | 18.9k | pushDiscardFilter(pp_ostream); |
1642 | 18.9k | } else { |
1643 | | // Adjust offsets to skip over comment before first object |
1644 | 18.9k | first = offsets.at(0); |
1645 | 311k | for (auto& iter: offsets) { |
1646 | 311k | iter -= first; |
1647 | 311k | } |
1648 | | |
1649 | | // Take one pass at writing pairs of numbers so we can get their size information |
1650 | 18.9k | { |
1651 | 18.9k | PipelinePopper pp_discard(this); |
1652 | 18.9k | pushDiscardFilter(pp_discard); |
1653 | 18.9k | writeObjectStreamOffsets(offsets, first_obj); |
1654 | 18.9k | first += m->pipeline->getCount(); |
1655 | 18.9k | } |
1656 | | |
1657 | | // Set up a stream to write the stream data into a buffer. |
1658 | 18.9k | Pipeline* next = pushPipeline(new Pl_Buffer("object stream")); |
1659 | 18.9k | if (m->compress_streams && !m->qdf_mode) { |
1660 | 16.8k | compressed = true; |
1661 | 16.8k | next = |
1662 | 16.8k | pushPipeline(new Pl_Flate("compress object stream", next, Pl_Flate::a_deflate)); |
1663 | 16.8k | } |
1664 | 18.9k | activatePipelineStack(pp_ostream); |
1665 | 18.9k | writeObjectStreamOffsets(offsets, first_obj); |
1666 | 18.9k | } |
1667 | | |
1668 | 37.9k | int count = -1; |
1669 | 622k | for (auto const& obj: m->object_stream_to_objects[old_id]) { |
1670 | 622k | ++count; |
1671 | 622k | int new_obj = m->obj[obj].renumber; |
1672 | 622k | if (first_obj == -1) { |
1673 | 18.9k | first_obj = new_obj; |
1674 | 18.9k | } |
1675 | 622k | if (m->qdf_mode) { |
1676 | 70.6k | writeString( |
1677 | 70.6k | "%% Object stream: object " + std::to_string(new_obj) + ", index " + |
1678 | 70.6k | std::to_string(count)); |
1679 | 70.6k | if (!m->suppress_original_object_ids) { |
1680 | 70.6k | writeString("; original object ID: " + std::to_string(obj.getObj())); |
1681 | | // For compatibility, only write the generation if non-zero. While object |
1682 | | // streams only allow objects with generation 0, if we are generating object |
1683 | | // streams, the old object could have a non-zero generation. |
1684 | 70.6k | if (obj.getGen() != 0) { |
1685 | 0 | QTC::TC("qpdf", "QPDFWriter original obj non-zero gen"); |
1686 | 0 | writeString(" " + std::to_string(obj.getGen())); |
1687 | 0 | } |
1688 | 70.6k | } |
1689 | 70.6k | writeString("\n"); |
1690 | 70.6k | } |
1691 | 622k | if (pass == 1) { |
1692 | 311k | offsets.push_back(m->pipeline->getCount()); |
1693 | | // To avoid double-counting objects being written in object streams for progress |
1694 | | // reporting, decrement in pass 1. |
1695 | 311k | indicateProgress(true, false); |
1696 | 311k | } |
1697 | 622k | QPDFObjectHandle obj_to_write = m->pdf.getObject(obj); |
1698 | 622k | if (obj_to_write.isStream()) { |
1699 | | // This condition occurred in a fuzz input. Ideally we should block it at parse |
1700 | | // time, but it's not clear to me how to construct a case for this. |
1701 | 0 | QTC::TC("qpdf", "QPDFWriter stream in ostream"); |
1702 | 0 | obj_to_write.warnIfPossible("stream found inside object stream; treating as null"); |
1703 | 0 | obj_to_write = QPDFObjectHandle::newNull(); |
1704 | 0 | } |
1705 | 622k | writeObject(obj_to_write, count); |
1706 | | |
1707 | 622k | m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count); |
1708 | 622k | } |
1709 | 37.9k | } |
1710 | | |
1711 | | // Write the object |
1712 | 18.9k | openObject(new_stream_id); |
1713 | 18.9k | setDataKey(new_stream_id); |
1714 | 18.9k | writeString("<<"); |
1715 | 18.9k | writeStringQDF("\n "); |
1716 | 18.9k | writeString(" /Type /ObjStm"); |
1717 | 18.9k | writeStringQDF("\n "); |
1718 | 18.9k | size_t length = stream_buffer->getSize(); |
1719 | 18.9k | adjustAESStreamLength(length); |
1720 | 18.9k | writeString(" /Length " + std::to_string(length)); |
1721 | 18.9k | writeStringQDF("\n "); |
1722 | 18.9k | if (compressed) { |
1723 | 16.8k | writeString(" /Filter /FlateDecode"); |
1724 | 16.8k | } |
1725 | 18.9k | writeString(" /N " + std::to_string(offsets.size())); |
1726 | 18.9k | writeStringQDF("\n "); |
1727 | 18.9k | writeString(" /First " + std::to_string(first)); |
1728 | 18.9k | if (!object.isNull()) { |
1729 | | // If the original object has an /Extends key, preserve it. |
1730 | 10.6k | QPDFObjectHandle dict = object.getDict(); |
1731 | 10.6k | QPDFObjectHandle extends = dict.getKey("/Extends"); |
1732 | 10.6k | if (extends.isIndirect()) { |
1733 | 646 | QTC::TC("qpdf", "QPDFWriter copy Extends"); |
1734 | 646 | writeStringQDF("\n "); |
1735 | 646 | writeString(" /Extends "); |
1736 | 646 | unparseChild(extends, 1, f_in_ostream); |
1737 | 646 | } |
1738 | 10.6k | } |
1739 | 18.9k | writeStringQDF("\n"); |
1740 | 18.9k | writeStringNoQDF(" "); |
1741 | 18.9k | writeString(">>\nstream\n"); |
1742 | 18.9k | if (m->encrypted) { |
1743 | 8.66k | QTC::TC("qpdf", "QPDFWriter encrypt object stream"); |
1744 | 8.66k | } |
1745 | 18.9k | { |
1746 | 18.9k | PipelinePopper pp_enc(this); |
1747 | 18.9k | pushEncryptionFilter(pp_enc); |
1748 | 18.9k | writeBuffer(stream_buffer); |
1749 | 18.9k | } |
1750 | 18.9k | if (m->newline_before_endstream) { |
1751 | 0 | writeString("\n"); |
1752 | 0 | } |
1753 | 18.9k | writeString("endstream"); |
1754 | 18.9k | m->cur_data_key.clear(); |
1755 | 18.9k | closeObject(new_stream_id); |
1756 | 18.9k | } |
1757 | | |
1758 | | void |
1759 | | QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index) |
1760 | 1.85M | { |
1761 | 1.85M | QPDFObjGen old_og = object.getObjGen(); |
1762 | | |
1763 | 1.85M | if ((object_stream_index == -1) && (old_og.getGen() == 0) && |
1764 | 1.85M | (m->object_stream_to_objects.count(old_og.getObj()))) { |
1765 | 18.9k | writeObjectStream(object); |
1766 | 18.9k | return; |
1767 | 18.9k | } |
1768 | | |
1769 | 1.83M | indicateProgress(false, false); |
1770 | 1.83M | auto new_id = m->obj[old_og].renumber; |
1771 | 1.83M | if (m->qdf_mode) { |
1772 | 289k | if (m->page_object_to_seq.count(old_og)) { |
1773 | 33.0k | writeString("%% Page "); |
1774 | 33.0k | writeString(std::to_string(m->page_object_to_seq[old_og])); |
1775 | 33.0k | writeString("\n"); |
1776 | 33.0k | } |
1777 | 289k | if (m->contents_to_page_seq.count(old_og)) { |
1778 | 16.8k | writeString("%% Contents for page "); |
1779 | 16.8k | writeString(std::to_string(m->contents_to_page_seq[old_og])); |
1780 | 16.8k | writeString("\n"); |
1781 | 16.8k | } |
1782 | 289k | } |
1783 | 1.83M | if (object_stream_index == -1) { |
1784 | 1.21M | if (m->qdf_mode && (!m->suppress_original_object_ids)) { |
1785 | 218k | writeString("%% Original object ID: " + object.getObjGen().unparse(' ') + "\n"); |
1786 | 218k | } |
1787 | 1.21M | openObject(new_id); |
1788 | 1.21M | setDataKey(new_id); |
1789 | 1.21M | unparseObject(object, 0, 0); |
1790 | 1.21M | m->cur_data_key.clear(); |
1791 | 1.21M | closeObject(new_id); |
1792 | 1.21M | } else { |
1793 | 622k | unparseObject(object, 0, f_in_ostream); |
1794 | 622k | writeString("\n"); |
1795 | 622k | } |
1796 | | |
1797 | 1.83M | if ((!m->direct_stream_lengths) && object.isStream()) { |
1798 | 70.8k | if (m->qdf_mode) { |
1799 | 70.8k | if (m->added_newline) { |
1800 | 27.1k | writeString("%QDF: ignore_newline\n"); |
1801 | 27.1k | } |
1802 | 70.8k | } |
1803 | 70.8k | openObject(new_id + 1); |
1804 | 70.8k | writeString(std::to_string(m->cur_stream_length)); |
1805 | 70.8k | closeObject(new_id + 1); |
1806 | 70.8k | } |
1807 | 1.83M | } |
1808 | | |
1809 | | std::string |
1810 | | QPDFWriter::getOriginalID1() |
1811 | 63.3k | { |
1812 | 63.3k | QPDFObjectHandle trailer = m->pdf.getTrailer(); |
1813 | 63.3k | if (trailer.hasKey("/ID")) { |
1814 | 41.5k | return trailer.getKey("/ID").getArrayItem(0).getStringValue(); |
1815 | 41.5k | } else { |
1816 | 21.8k | return ""; |
1817 | 21.8k | } |
1818 | 63.3k | } |
1819 | | |
1820 | | void |
1821 | | QPDFWriter::generateID() |
1822 | 64.2k | { |
1823 | | // Generate the ID lazily so that we can handle the user's preference to use static or |
1824 | | // deterministic ID generation. |
1825 | | |
1826 | 64.2k | if (!m->id2.empty()) { |
1827 | 31.8k | return; |
1828 | 31.8k | } |
1829 | | |
1830 | 32.4k | QPDFObjectHandle trailer = m->pdf.getTrailer(); |
1831 | | |
1832 | 32.4k | std::string result; |
1833 | | |
1834 | 32.4k | if (m->static_id) { |
1835 | | // For test suite use only... |
1836 | 20.4k | static unsigned char tmp[] = { |
1837 | 20.4k | 0x31, |
1838 | 20.4k | 0x41, |
1839 | 20.4k | 0x59, |
1840 | 20.4k | 0x26, |
1841 | 20.4k | 0x53, |
1842 | 20.4k | 0x58, |
1843 | 20.4k | 0x97, |
1844 | 20.4k | 0x93, |
1845 | 20.4k | 0x23, |
1846 | 20.4k | 0x84, |
1847 | 20.4k | 0x62, |
1848 | 20.4k | 0x64, |
1849 | 20.4k | 0x33, |
1850 | 20.4k | 0x83, |
1851 | 20.4k | 0x27, |
1852 | 20.4k | 0x95, |
1853 | 20.4k | 0x00}; |
1854 | 20.4k | result = reinterpret_cast<char*>(tmp); |
1855 | 20.4k | } else { |
1856 | | // The PDF specification has guidelines for creating IDs, but it states clearly that the |
1857 | | // only thing that's really important is that it is very likely to be unique. We can't |
1858 | | // really follow the guidelines in the spec exactly because we haven't written the file yet. |
1859 | | // This scheme should be fine though. The deterministic ID case uses a digest of a |
1860 | | // sufficient portion of the file's contents such no two non-matching files would match in |
1861 | | // the subsets used for this computation. Note that we explicitly omit the filename from |
1862 | | // the digest calculation for deterministic ID so that the same file converted with qpdf, in |
1863 | | // that case, would have the same ID regardless of the output file's name. |
1864 | | |
1865 | 11.9k | std::string seed; |
1866 | 11.9k | if (m->deterministic_id) { |
1867 | 11.9k | if (m->deterministic_id_data.empty()) { |
1868 | 0 | QTC::TC("qpdf", "QPDFWriter deterministic with no data"); |
1869 | 0 | throw std::logic_error("INTERNAL ERROR: QPDFWriter::generateID has no data for " |
1870 | 0 | "deterministic ID. This may happen if deterministic ID and " |
1871 | 0 | "file encryption are requested together."); |
1872 | 0 | } |
1873 | 11.9k | seed += m->deterministic_id_data; |
1874 | 11.9k | } else { |
1875 | 0 | seed += std::to_string(QUtil::get_current_time()); |
1876 | 0 | seed += m->filename; |
1877 | 0 | seed += " "; |
1878 | 0 | } |
1879 | 11.9k | seed += " QPDF "; |
1880 | 11.9k | if (trailer.hasKey("/Info")) { |
1881 | 3.52k | QPDFObjectHandle info = trailer.getKey("/Info"); |
1882 | 13.5k | for (auto const& key: info.getKeys()) { |
1883 | 13.5k | QPDFObjectHandle obj = info.getKey(key); |
1884 | 13.5k | if (obj.isString()) { |
1885 | 10.4k | seed += " "; |
1886 | 10.4k | seed += obj.getStringValue(); |
1887 | 10.4k | } |
1888 | 13.5k | } |
1889 | 3.52k | } |
1890 | | |
1891 | 11.9k | MD5 m; |
1892 | 11.9k | m.encodeString(seed.c_str()); |
1893 | 11.9k | MD5::Digest digest; |
1894 | 11.9k | m.digest(digest); |
1895 | 11.9k | result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest)); |
1896 | 11.9k | } |
1897 | | |
1898 | | // If /ID already exists, follow the spec: use the original first word and generate a new second |
1899 | | // word. Otherwise, we'll use the generated ID for both. |
1900 | | |
1901 | 32.4k | m->id2 = result; |
1902 | | // Note: keep /ID from old file even if --static-id was given. |
1903 | 32.4k | m->id1 = getOriginalID1(); |
1904 | 32.4k | if (m->id1.empty()) { |
1905 | 12.6k | m->id1 = m->id2; |
1906 | 12.6k | } |
1907 | 32.4k | } |
1908 | | |
1909 | | void |
1910 | | QPDFWriter::initializeSpecialStreams() |
1911 | 33.4k | { |
1912 | | // Mark all page content streams in case we are filtering or |
1913 | | // normalizing. |
1914 | 33.4k | std::vector<QPDFObjectHandle> pages = m->pdf.getAllPages(); |
1915 | 33.4k | int num = 0; |
1916 | 178k | for (auto& page: pages) { |
1917 | 178k | m->page_object_to_seq[page.getObjGen()] = ++num; |
1918 | 178k | QPDFObjectHandle contents = page.getKey("/Contents"); |
1919 | 178k | std::vector<QPDFObjGen> contents_objects; |
1920 | 178k | if (contents.isArray()) { |
1921 | 2.68k | int n = contents.getArrayNItems(); |
1922 | 35.6k | for (int i = 0; i < n; ++i) { |
1923 | 32.9k | contents_objects.push_back(contents.getArrayItem(i).getObjGen()); |
1924 | 32.9k | } |
1925 | 175k | } else if (contents.isStream()) { |
1926 | 97.7k | contents_objects.push_back(contents.getObjGen()); |
1927 | 97.7k | } |
1928 | | |
1929 | 178k | for (auto const& c: contents_objects) { |
1930 | 130k | m->contents_to_page_seq[c] = num; |
1931 | 130k | m->normalized_streams.insert(c); |
1932 | 130k | } |
1933 | 178k | } |
1934 | 33.4k | } |
1935 | | |
1936 | | void |
1937 | | QPDFWriter::preserveObjectStreams() |
1938 | 22.0k | { |
1939 | 22.0k | auto const& xref = QPDF::Writer::getXRefTable(m->pdf); |
1940 | | // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object |
1941 | | // streams out of old objects that have generation numbers greater than zero. However in an |
1942 | | // existing PDF, all object stream objects and all objects in them must have generation 0 |
1943 | | // because the PDF spec does not provide any way to do otherwise. This code filters out objects |
1944 | | // that are not allowed to be in object streams. In addition to removing objects that were |
1945 | | // erroneously included in object streams in the source PDF, it also prevents unreferenced |
1946 | | // objects from being included. |
1947 | 22.0k | auto end = xref.cend(); |
1948 | 22.0k | m->obj.streams_empty = true; |
1949 | 22.0k | if (m->preserve_unreferenced_objects) { |
1950 | 0 | for (auto iter = xref.cbegin(); iter != end; ++iter) { |
1951 | 0 | if (iter->second.getType() == 2) { |
1952 | | // Pdf contains object streams. |
1953 | 0 | QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced"); |
1954 | 0 | m->obj.streams_empty = false; |
1955 | 0 | m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); |
1956 | 0 | } |
1957 | 0 | } |
1958 | 22.0k | } else { |
1959 | | // Start by scanning for first compressed object in case we don't have any object streams to |
1960 | | // process. |
1961 | 886k | for (auto iter = xref.cbegin(); iter != end; ++iter) { |
1962 | 866k | if (iter->second.getType() == 2) { |
1963 | | // Pdf contains object streams. |
1964 | 2.63k | QTC::TC("qpdf", "QPDFWriter preserve object streams"); |
1965 | 2.63k | m->obj.streams_empty = false; |
1966 | 2.63k | auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf); |
1967 | | // The object pointed to by iter may be a previous generation, in which case it is |
1968 | | // removed by getCompressibleObjSet. We need to restart the loop (while the object |
1969 | | // table may contain multiple generations of an object). |
1970 | 349k | for (iter = xref.cbegin(); iter != end; ++iter) { |
1971 | 346k | if (iter->second.getType() == 2) { |
1972 | 237k | auto id = static_cast<size_t>(iter->first.getObj()); |
1973 | 237k | if (id < eligible.size() && eligible[id]) { |
1974 | 122k | m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); |
1975 | 122k | } else { |
1976 | 114k | QTC::TC("qpdf", "QPDFWriter exclude from object stream"); |
1977 | 114k | } |
1978 | 237k | } |
1979 | 346k | } |
1980 | 2.63k | return; |
1981 | 2.63k | } |
1982 | 866k | } |
1983 | 22.0k | } |
1984 | 22.0k | } |
1985 | | |
1986 | | void |
1987 | | QPDFWriter::generateObjectStreams() |
1988 | 5.27k | { |
1989 | | // Basic strategy: make a list of objects that can go into an object stream. Then figure out |
1990 | | // how many object streams are needed so that we can distribute objects approximately evenly |
1991 | | // without having any object stream exceed 100 members. We don't have to worry about linearized |
1992 | | // files here -- if the file is linearized, we take care of excluding things that aren't allowed |
1993 | | // here later. |
1994 | | |
1995 | | // This code doesn't do anything with /Extends. |
1996 | | |
1997 | 5.27k | std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf); |
1998 | 5.27k | size_t n_object_streams = (eligible.size() + 99U) / 100U; |
1999 | | |
2000 | 5.27k | initializeTables(2U * n_object_streams); |
2001 | 5.27k | if (n_object_streams == 0) { |
2002 | 30 | m->obj.streams_empty = true; |
2003 | 30 | return; |
2004 | 30 | } |
2005 | 5.24k | size_t n_per = eligible.size() / n_object_streams; |
2006 | 5.24k | if (n_per * n_object_streams < eligible.size()) { |
2007 | 122 | ++n_per; |
2008 | 122 | } |
2009 | 5.24k | unsigned int n = 0; |
2010 | 5.24k | int cur_ostream = m->pdf.newIndirectNull().getObjectID(); |
2011 | 129k | for (auto const& item: eligible) { |
2012 | 129k | if (n == n_per) { |
2013 | 233 | QTC::TC("qpdf", "QPDFWriter generate >1 ostream"); |
2014 | 233 | n = 0; |
2015 | | // Construct a new null object as the "original" object stream. The rest of the code |
2016 | | // knows that this means we're creating the object stream from scratch. |
2017 | 233 | cur_ostream = m->pdf.newIndirectNull().getObjectID(); |
2018 | 233 | } |
2019 | 129k | auto& obj = m->obj[item]; |
2020 | 129k | obj.object_stream = cur_ostream; |
2021 | 129k | obj.gen = item.getGen(); |
2022 | 129k | ++n; |
2023 | 129k | } |
2024 | 5.24k | } |
2025 | | |
2026 | | QPDFObjectHandle |
2027 | | QPDFWriter::getTrimmedTrailer() |
2028 | 88.8k | { |
2029 | | // Remove keys from the trailer that necessarily have to be replaced when writing the file. |
2030 | | |
2031 | 88.8k | QPDFObjectHandle trailer = m->pdf.getTrailer().unsafeShallowCopy(); |
2032 | | |
2033 | | // Remove encryption keys |
2034 | 88.8k | trailer.removeKey("/ID"); |
2035 | 88.8k | trailer.removeKey("/Encrypt"); |
2036 | | |
2037 | | // Remove modification information |
2038 | 88.8k | trailer.removeKey("/Prev"); |
2039 | | |
2040 | | // Remove all trailer keys that potentially come from a cross-reference stream |
2041 | 88.8k | trailer.removeKey("/Index"); |
2042 | 88.8k | trailer.removeKey("/W"); |
2043 | 88.8k | trailer.removeKey("/Length"); |
2044 | 88.8k | trailer.removeKey("/Filter"); |
2045 | 88.8k | trailer.removeKey("/DecodeParms"); |
2046 | 88.8k | trailer.removeKey("/Type"); |
2047 | 88.8k | trailer.removeKey("/XRefStm"); |
2048 | | |
2049 | 88.8k | return trailer; |
2050 | 88.8k | } |
2051 | | |
2052 | | // Make document extension level information direct as required by the spec. |
2053 | | void |
2054 | | QPDFWriter::prepareFileForWrite() |
2055 | 32.3k | { |
2056 | 32.3k | m->pdf.fixDanglingReferences(); |
2057 | 32.3k | auto root = m->pdf.getRoot(); |
2058 | 32.3k | auto oh = root.getKey("/Extensions"); |
2059 | 32.3k | if (oh.isDictionary()) { |
2060 | 844 | const bool extensions_indirect = oh.isIndirect(); |
2061 | 844 | if (extensions_indirect) { |
2062 | 0 | QTC::TC("qpdf", "QPDFWriter make Extensions direct"); |
2063 | 0 | oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy()); |
2064 | 0 | } |
2065 | 844 | if (oh.hasKey("/ADBE")) { |
2066 | 771 | auto adbe = oh.getKey("/ADBE"); |
2067 | 771 | if (adbe.isIndirect()) { |
2068 | 78 | QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1); |
2069 | 78 | adbe.makeDirect(); |
2070 | 78 | oh.replaceKey("/ADBE", adbe); |
2071 | 78 | } |
2072 | 771 | } |
2073 | 844 | } |
2074 | 32.3k | } |
2075 | | |
2076 | | void |
2077 | | QPDFWriter::initializeTables(size_t extra) |
2078 | 33.2k | { |
2079 | 33.2k | auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra; |
2080 | 33.2k | m->obj.resize(size); |
2081 | 33.2k | m->new_obj.resize(size); |
2082 | 33.2k | } |
2083 | | |
2084 | | void |
2085 | | QPDFWriter::doWriteSetup() |
2086 | 33.4k | { |
2087 | 33.4k | if (m->did_write_setup) { |
2088 | 0 | return; |
2089 | 0 | } |
2090 | 33.4k | m->did_write_setup = true; |
2091 | | |
2092 | | // Do preliminary setup |
2093 | | |
2094 | 33.4k | if (m->linearized) { |
2095 | 18.9k | m->qdf_mode = false; |
2096 | 18.9k | } |
2097 | | |
2098 | 33.4k | if (m->pclm) { |
2099 | 0 | m->stream_decode_level = qpdf_dl_none; |
2100 | 0 | m->compress_streams = false; |
2101 | 0 | m->encrypted = false; |
2102 | 0 | } |
2103 | | |
2104 | 33.4k | if (m->qdf_mode) { |
2105 | 8.61k | if (!m->normalize_content_set) { |
2106 | 8.61k | m->normalize_content = true; |
2107 | 8.61k | } |
2108 | 8.61k | if (!m->compress_streams_set) { |
2109 | 8.61k | m->compress_streams = false; |
2110 | 8.61k | } |
2111 | 8.61k | if (!m->stream_decode_level_set) { |
2112 | 0 | m->stream_decode_level = qpdf_dl_generalized; |
2113 | 0 | } |
2114 | 8.61k | } |
2115 | | |
2116 | 33.4k | if (m->encrypted) { |
2117 | | // Encryption has been explicitly set |
2118 | 19.5k | m->preserve_encryption = false; |
2119 | 19.5k | } else if (m->normalize_content || m->stream_decode_level || m->pclm || m->qdf_mode) { |
2120 | | // Encryption makes looking at contents pretty useless. If the user explicitly encrypted |
2121 | | // though, we still obey that. |
2122 | 13.8k | m->preserve_encryption = false; |
2123 | 13.8k | } |
2124 | | |
2125 | 33.4k | if (m->preserve_encryption) { |
2126 | 0 | copyEncryptionParameters(m->pdf); |
2127 | 0 | } |
2128 | | |
2129 | 33.4k | if (!m->forced_pdf_version.empty()) { |
2130 | 0 | int major = 0; |
2131 | 0 | int minor = 0; |
2132 | 0 | parseVersion(m->forced_pdf_version, major, minor); |
2133 | 0 | disableIncompatibleEncryption(major, minor, m->forced_extension_level); |
2134 | 0 | if (compareVersions(major, minor, 1, 5) < 0) { |
2135 | 0 | QTC::TC("qpdf", "QPDFWriter forcing object stream disable"); |
2136 | 0 | m->object_stream_mode = qpdf_o_disable; |
2137 | 0 | } |
2138 | 0 | } |
2139 | | |
2140 | 33.4k | if (m->qdf_mode || m->normalize_content || m->stream_decode_level) { |
2141 | 33.4k | initializeSpecialStreams(); |
2142 | 33.4k | } |
2143 | | |
2144 | 33.4k | if (m->qdf_mode) { |
2145 | | // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing |
2146 | | // recomputed stream length data. Certain streams such as object streams, xref streams, and |
2147 | | // hint streams always get direct stream lengths. |
2148 | 8.53k | m->direct_stream_lengths = false; |
2149 | 8.53k | } |
2150 | | |
2151 | 33.4k | switch (m->object_stream_mode) { |
2152 | 5.97k | case qpdf_o_disable: |
2153 | 5.97k | initializeTables(); |
2154 | 5.97k | m->obj.streams_empty = true; |
2155 | 5.97k | break; |
2156 | | |
2157 | 22.0k | case qpdf_o_preserve: |
2158 | 22.0k | initializeTables(); |
2159 | 22.0k | preserveObjectStreams(); |
2160 | 22.0k | break; |
2161 | | |
2162 | 5.27k | case qpdf_o_generate: |
2163 | 5.27k | generateObjectStreams(); |
2164 | 5.27k | break; |
2165 | | |
2166 | | // no default so gcc will warn for missing case tag |
2167 | 33.4k | } |
2168 | | |
2169 | 33.1k | if (!m->obj.streams_empty) { |
2170 | 7.69k | if (m->linearized) { |
2171 | | // Page dictionaries are not allowed to be compressed objects. |
2172 | 47.5k | for (auto& page: m->pdf.getAllPages()) { |
2173 | 47.5k | if (m->obj[page].object_stream > 0) { |
2174 | 44.5k | QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); |
2175 | 44.5k | m->obj[page].object_stream = 0; |
2176 | 44.5k | } |
2177 | 47.5k | } |
2178 | 6.92k | } |
2179 | | |
2180 | 7.69k | if (m->linearized || m->encrypted) { |
2181 | | // The document catalog is not allowed to be compressed in linearized files either. It |
2182 | | // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to |
2183 | | // handle encrypted files with compressed document catalogs, so we disable them in that |
2184 | | // case as well. |
2185 | 6.92k | if (m->obj[m->root_og].object_stream > 0) { |
2186 | 5.15k | QTC::TC("qpdf", "QPDFWriter uncompressing root"); |
2187 | 5.15k | m->obj[m->root_og].object_stream = 0; |
2188 | 5.15k | } |
2189 | 6.92k | } |
2190 | | |
2191 | | // Generate reverse mapping from object stream to objects |
2192 | 4.98M | m->obj.forEach([this](auto id, auto const& item) -> void { |
2193 | 4.98M | if (item.object_stream > 0) { |
2194 | 202k | auto& vec = m->object_stream_to_objects[item.object_stream]; |
2195 | 202k | vec.emplace_back(id, item.gen); |
2196 | 202k | if (m->max_ostream_index < vec.size()) { |
2197 | 131k | ++m->max_ostream_index; |
2198 | 131k | } |
2199 | 202k | } |
2200 | 4.98M | }); |
2201 | 7.69k | --m->max_ostream_index; |
2202 | | |
2203 | 7.69k | if (m->object_stream_to_objects.empty()) { |
2204 | 254 | m->obj.streams_empty = true; |
2205 | 7.44k | } else { |
2206 | 7.44k | setMinimumPDFVersion("1.5"); |
2207 | 7.44k | } |
2208 | 7.69k | } |
2209 | | |
2210 | 33.1k | setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel()); |
2211 | 33.1k | m->final_pdf_version = m->min_pdf_version; |
2212 | 33.1k | m->final_extension_level = m->min_extension_level; |
2213 | 33.1k | if (!m->forced_pdf_version.empty()) { |
2214 | 0 | QTC::TC("qpdf", "QPDFWriter using forced PDF version"); |
2215 | 0 | m->final_pdf_version = m->forced_pdf_version; |
2216 | 0 | m->final_extension_level = m->forced_extension_level; |
2217 | 0 | } |
2218 | 33.1k | } |
2219 | | |
2220 | | void |
2221 | | QPDFWriter::write() |
2222 | 33.4k | { |
2223 | 33.4k | doWriteSetup(); |
2224 | | |
2225 | | // Set up progress reporting. For linearized files, we write two passes. events_expected is an |
2226 | | // approximation, but it's good enough for progress reporting, which is mostly a guess anyway. |
2227 | 33.4k | m->events_expected = QIntC::to_int(m->pdf.getObjectCount() * (m->linearized ? 2 : 1)); |
2228 | | |
2229 | 33.4k | prepareFileForWrite(); |
2230 | | |
2231 | 33.4k | if (m->linearized) { |
2232 | 18.1k | writeLinearized(); |
2233 | 18.1k | } else { |
2234 | 15.3k | writeStandard(); |
2235 | 15.3k | } |
2236 | | |
2237 | 33.4k | m->pipeline->finish(); |
2238 | 33.4k | if (m->close_file) { |
2239 | 0 | fclose(m->file); |
2240 | 0 | } |
2241 | 33.4k | m->file = nullptr; |
2242 | 33.4k | if (m->buffer_pipeline) { |
2243 | 0 | m->output_buffer = m->buffer_pipeline->getBuffer(); |
2244 | 0 | m->buffer_pipeline = nullptr; |
2245 | 0 | } |
2246 | 33.4k | indicateProgress(false, true); |
2247 | 33.4k | } |
2248 | | |
2249 | | QPDFObjGen |
2250 | | QPDFWriter::getRenumberedObjGen(QPDFObjGen og) |
2251 | 0 | { |
2252 | 0 | return QPDFObjGen(m->obj[og].renumber, 0); |
2253 | 0 | } |
2254 | | |
2255 | | std::map<QPDFObjGen, QPDFXRefEntry> |
2256 | | QPDFWriter::getWrittenXRefTable() |
2257 | 0 | { |
2258 | 0 | std::map<QPDFObjGen, QPDFXRefEntry> result; |
2259 | |
|
2260 | 0 | auto it = result.begin(); |
2261 | 0 | m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void { |
2262 | 0 | if (item.xref.getType() != 0) { |
2263 | 0 | it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref); |
2264 | 0 | } |
2265 | 0 | }); |
2266 | 0 | return result; |
2267 | 0 | } |
2268 | | |
2269 | | void |
2270 | | QPDFWriter::enqueuePart(std::vector<QPDFObjectHandle>& part) |
2271 | 80.9k | { |
2272 | 452k | for (auto const& oh: part) { |
2273 | 452k | enqueueObject(oh); |
2274 | 452k | } |
2275 | 80.9k | } |
2276 | | |
2277 | | void |
2278 | | QPDFWriter::writeEncryptionDictionary() |
2279 | 28.1k | { |
2280 | 28.1k | m->encryption_dict_objid = openObject(m->encryption_dict_objid); |
2281 | 28.1k | writeString("<<"); |
2282 | 331k | for (auto const& iter: m->encryption_dictionary) { |
2283 | 331k | writeString(" "); |
2284 | 331k | writeString(iter.first); |
2285 | 331k | writeString(" "); |
2286 | 331k | writeString(iter.second); |
2287 | 331k | } |
2288 | 28.1k | writeString(" >>"); |
2289 | 28.1k | closeObject(m->encryption_dict_objid); |
2290 | 28.1k | } |
2291 | | |
2292 | | std::string |
2293 | | QPDFWriter::getFinalVersion() |
2294 | 0 | { |
2295 | 0 | doWriteSetup(); |
2296 | 0 | return m->final_pdf_version; |
2297 | 0 | } |
2298 | | |
2299 | | void |
2300 | | QPDFWriter::writeHeader() |
2301 | 45.0k | { |
2302 | 45.0k | writeString("%PDF-"); |
2303 | 45.0k | writeString(m->final_pdf_version); |
2304 | 45.0k | if (m->pclm) { |
2305 | | // PCLm version |
2306 | 0 | writeString("\n%PCLm 1.0\n"); |
2307 | 45.0k | } else { |
2308 | | // This string of binary characters would not be valid UTF-8, so it really should be treated |
2309 | | // as binary. |
2310 | 45.0k | writeString("\n%\xbf\xf7\xa2\xfe\n"); |
2311 | 45.0k | } |
2312 | 45.0k | writeStringQDF("%QDF-1.0\n\n"); |
2313 | | |
2314 | | // Note: do not write extra header text here. Linearized PDFs must include the entire |
2315 | | // linearization parameter dictionary within the first 1024 characters of the PDF file, so for |
2316 | | // linearized files, we have to write extra header text after the linearization parameter |
2317 | | // dictionary. |
2318 | 45.0k | } |
2319 | | |
2320 | | void |
2321 | | QPDFWriter::writeHintStream(int hint_id) |
2322 | 14.9k | { |
2323 | 14.9k | std::shared_ptr<Buffer> hint_buffer; |
2324 | 14.9k | int S = 0; |
2325 | 14.9k | int O = 0; |
2326 | 14.9k | bool compressed = (m->compress_streams && !m->qdf_mode); |
2327 | 14.9k | QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed); |
2328 | | |
2329 | 14.9k | openObject(hint_id); |
2330 | 14.9k | setDataKey(hint_id); |
2331 | | |
2332 | 14.9k | size_t hlen = hint_buffer->getSize(); |
2333 | | |
2334 | 14.9k | writeString("<< "); |
2335 | 14.9k | if (compressed) { |
2336 | 14.9k | writeString("/Filter /FlateDecode "); |
2337 | 14.9k | } |
2338 | 14.9k | writeString("/S "); |
2339 | 14.9k | writeString(std::to_string(S)); |
2340 | 14.9k | if (O) { |
2341 | 3.11k | writeString(" /O "); |
2342 | 3.11k | writeString(std::to_string(O)); |
2343 | 3.11k | } |
2344 | 14.9k | writeString(" /Length "); |
2345 | 14.9k | adjustAESStreamLength(hlen); |
2346 | 14.9k | writeString(std::to_string(hlen)); |
2347 | 14.9k | writeString(" >>\nstream\n"); |
2348 | | |
2349 | 14.9k | if (m->encrypted) { |
2350 | 11.1k | QTC::TC("qpdf", "QPDFWriter encrypted hint stream"); |
2351 | 11.1k | } |
2352 | 14.9k | unsigned char last_char = '\0'; |
2353 | 14.9k | { |
2354 | 14.9k | PipelinePopper pp_enc(this); |
2355 | 14.9k | pushEncryptionFilter(pp_enc); |
2356 | 14.9k | writeBuffer(hint_buffer); |
2357 | 14.9k | last_char = m->pipeline->getLastChar(); |
2358 | 14.9k | } |
2359 | | |
2360 | 14.9k | if (last_char != '\n') { |
2361 | 14.9k | writeString("\n"); |
2362 | 14.9k | } |
2363 | 14.9k | writeString("endstream"); |
2364 | 14.9k | closeObject(hint_id); |
2365 | 14.9k | } |
2366 | | |
2367 | | qpdf_offset_t |
2368 | | QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size) |
2369 | 13.2k | { |
2370 | | // There are too many extra arguments to replace overloaded function with defaults in the header |
2371 | | // file...too much risk of leaving something off. |
2372 | 13.2k | return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0); |
2373 | 13.2k | } |
2374 | | |
2375 | | qpdf_offset_t |
2376 | | QPDFWriter::writeXRefTable( |
2377 | | trailer_e which, |
2378 | | int first, |
2379 | | int last, |
2380 | | int size, |
2381 | | qpdf_offset_t prev, |
2382 | | bool suppress_offsets, |
2383 | | int hint_id, |
2384 | | qpdf_offset_t hint_offset, |
2385 | | qpdf_offset_t hint_length, |
2386 | | int linearization_pass) |
2387 | 52.6k | { |
2388 | 52.6k | writeString("xref\n"); |
2389 | 52.6k | writeString(std::to_string(first)); |
2390 | 52.6k | writeString(" "); |
2391 | 52.6k | writeString(std::to_string(last - first + 1)); |
2392 | 52.6k | qpdf_offset_t space_before_zero = m->pipeline->getCount(); |
2393 | 52.6k | writeString("\n"); |
2394 | 1.15M | for (int i = first; i <= last; ++i) { |
2395 | 1.09M | if (i == 0) { |
2396 | 32.8k | writeString("0000000000 65535 f \n"); |
2397 | 1.06M | } else { |
2398 | 1.06M | qpdf_offset_t offset = 0; |
2399 | 1.06M | if (!suppress_offsets) { |
2400 | 893k | offset = m->new_obj[i].xref.getOffset(); |
2401 | 893k | if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) { |
2402 | 93.4k | offset += hint_length; |
2403 | 93.4k | } |
2404 | 893k | } |
2405 | 1.06M | writeString(QUtil::int_to_string(offset, 10)); |
2406 | 1.06M | writeString(" 00000 n \n"); |
2407 | 1.06M | } |
2408 | 1.09M | } |
2409 | 52.6k | writeTrailer(which, size, false, prev, linearization_pass); |
2410 | 52.6k | writeString("\n"); |
2411 | 52.6k | return space_before_zero; |
2412 | 52.6k | } |
2413 | | |
2414 | | qpdf_offset_t |
2415 | | QPDFWriter::writeXRefStream( |
2416 | | int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size) |
2417 | 608 | { |
2418 | | // There are too many extra arguments to replace overloaded function with defaults in the header |
2419 | | // file...too much risk of leaving something off. |
2420 | 608 | return writeXRefStream( |
2421 | 608 | objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0); |
2422 | 608 | } |
2423 | | |
2424 | | qpdf_offset_t |
2425 | | QPDFWriter::writeXRefStream( |
2426 | | int xref_id, |
2427 | | int max_id, |
2428 | | qpdf_offset_t max_offset, |
2429 | | trailer_e which, |
2430 | | int first, |
2431 | | int last, |
2432 | | int size, |
2433 | | qpdf_offset_t prev, |
2434 | | int hint_id, |
2435 | | qpdf_offset_t hint_offset, |
2436 | | qpdf_offset_t hint_length, |
2437 | | bool skip_compression, |
2438 | | int linearization_pass) |
2439 | 22.0k | { |
2440 | 22.0k | qpdf_offset_t xref_offset = m->pipeline->getCount(); |
2441 | 22.0k | qpdf_offset_t space_before_zero = xref_offset - 1; |
2442 | | |
2443 | | // field 1 contains offsets and object stream identifiers |
2444 | 22.0k | unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id)); |
2445 | | |
2446 | | // field 2 contains object stream indices |
2447 | 22.0k | unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index)); |
2448 | | |
2449 | 22.0k | unsigned int esize = 1 + f1_size + f2_size; |
2450 | | |
2451 | | // Must store in xref table in advance of writing the actual data rather than waiting for |
2452 | | // openObject to do it. |
2453 | 22.0k | m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount()); |
2454 | | |
2455 | 22.0k | Pipeline* p = pushPipeline(new Pl_Buffer("xref stream")); |
2456 | 22.0k | bool compressed = false; |
2457 | 22.0k | if (m->compress_streams && !m->qdf_mode) { |
2458 | 21.4k | compressed = true; |
2459 | 21.4k | if (!skip_compression) { |
2460 | | // Write the stream dictionary for compression but don't actually compress. This helps |
2461 | | // us with computation of padding for pass 1 of linearization. |
2462 | 10.2k | p = pushPipeline(new Pl_Flate("compress xref", p, Pl_Flate::a_deflate)); |
2463 | 10.2k | } |
2464 | 21.4k | p = pushPipeline(new Pl_PNGFilter("pngify xref", p, Pl_PNGFilter::a_encode, esize)); |
2465 | 21.4k | } |
2466 | 22.0k | std::shared_ptr<Buffer> xref_data; |
2467 | 22.0k | { |
2468 | 22.0k | PipelinePopper pp_xref(this, &xref_data); |
2469 | 22.0k | activatePipelineStack(pp_xref); |
2470 | 681k | for (int i = first; i <= last; ++i) { |
2471 | 659k | QPDFXRefEntry& e = m->new_obj[i].xref; |
2472 | 659k | switch (e.getType()) { |
2473 | 124k | case 0: |
2474 | 124k | writeBinary(0, 1); |
2475 | 124k | writeBinary(0, f1_size); |
2476 | 124k | writeBinary(0, f2_size); |
2477 | 124k | break; |
2478 | | |
2479 | 290k | case 1: |
2480 | 290k | { |
2481 | 290k | qpdf_offset_t offset = e.getOffset(); |
2482 | 290k | if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) { |
2483 | 33.5k | offset += hint_length; |
2484 | 33.5k | } |
2485 | 290k | writeBinary(1, 1); |
2486 | 290k | writeBinary(QIntC::to_ulonglong(offset), f1_size); |
2487 | 290k | writeBinary(0, f2_size); |
2488 | 290k | } |
2489 | 290k | break; |
2490 | | |
2491 | 244k | case 2: |
2492 | 244k | writeBinary(2, 1); |
2493 | 244k | writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size); |
2494 | 244k | writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size); |
2495 | 244k | break; |
2496 | | |
2497 | 0 | default: |
2498 | 0 | throw std::logic_error("invalid type writing xref stream"); |
2499 | 0 | break; |
2500 | 659k | } |
2501 | 659k | } |
2502 | 22.0k | } |
2503 | | |
2504 | 22.0k | openObject(xref_id); |
2505 | 22.0k | writeString("<<"); |
2506 | 22.0k | writeStringQDF("\n "); |
2507 | 22.0k | writeString(" /Type /XRef"); |
2508 | 22.0k | writeStringQDF("\n "); |
2509 | 22.0k | writeString(" /Length " + std::to_string(xref_data->getSize())); |
2510 | 22.0k | if (compressed) { |
2511 | 21.4k | writeStringQDF("\n "); |
2512 | 21.4k | writeString(" /Filter /FlateDecode"); |
2513 | 21.4k | writeStringQDF("\n "); |
2514 | 21.4k | writeString(" /DecodeParms << /Columns " + std::to_string(esize) + " /Predictor 12 >>"); |
2515 | 21.4k | } |
2516 | 22.0k | writeStringQDF("\n "); |
2517 | 22.0k | writeString(" /W [ 1 " + std::to_string(f1_size) + " " + std::to_string(f2_size) + " ]"); |
2518 | 22.0k | if (!((first == 0) && (last == size - 1))) { |
2519 | 11.1k | writeString( |
2520 | 11.1k | " /Index [ " + std::to_string(first) + " " + std::to_string(last - first + 1) + " ]"); |
2521 | 11.1k | } |
2522 | 22.0k | writeTrailer(which, size, true, prev, linearization_pass); |
2523 | 22.0k | writeString("\nstream\n"); |
2524 | 22.0k | writeBuffer(xref_data); |
2525 | 22.0k | writeString("\nendstream"); |
2526 | 22.0k | closeObject(xref_id); |
2527 | 22.0k | return space_before_zero; |
2528 | 22.0k | } |
2529 | | |
2530 | | size_t |
2531 | | QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes) |
2532 | 10.4k | { |
2533 | | // This routine is called right after a linearization first pass xref stream has been written |
2534 | | // without compression. Calculate the amount of padding that would be required in the worst |
2535 | | // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is |
2536 | | // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add |
2537 | | // 10 extra bytes for number length increases. |
2538 | | |
2539 | 10.4k | return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384))); |
2540 | 10.4k | } |
2541 | | |
2542 | | void |
2543 | | QPDFWriter::writeLinearized() |
2544 | 18.1k | { |
2545 | | // Optimize file and enqueue objects in order |
2546 | | |
2547 | 18.1k | std::map<int, int> stream_cache; |
2548 | | |
2549 | 379k | auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) { |
2550 | 379k | auto& result = stream_cache[stream.getObjectID()]; |
2551 | 379k | if (result == 0) { |
2552 | 185k | bool compress_stream; |
2553 | 185k | bool is_metadata; |
2554 | 185k | if (willFilterStream(stream, compress_stream, is_metadata, nullptr)) { |
2555 | 89.1k | result = 2; |
2556 | 96.1k | } else { |
2557 | 96.1k | result = 1; |
2558 | 96.1k | } |
2559 | 185k | } |
2560 | 379k | return result; |
2561 | 379k | }; |
2562 | | |
2563 | 18.1k | QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters); |
2564 | | |
2565 | 18.1k | std::vector<QPDFObjectHandle> part4; |
2566 | 18.1k | std::vector<QPDFObjectHandle> part6; |
2567 | 18.1k | std::vector<QPDFObjectHandle> part7; |
2568 | 18.1k | std::vector<QPDFObjectHandle> part8; |
2569 | 18.1k | std::vector<QPDFObjectHandle> part9; |
2570 | 18.1k | QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9); |
2571 | | |
2572 | | // Object number sequence: |
2573 | | // |
2574 | | // second half |
2575 | | // second half uncompressed objects |
2576 | | // second half xref stream, if any |
2577 | | // second half compressed objects |
2578 | | // first half |
2579 | | // linearization dictionary |
2580 | | // first half xref stream, if any |
2581 | | // part 4 uncompresesd objects |
2582 | | // encryption dictionary, if any |
2583 | | // hint stream |
2584 | | // part 6 uncompressed objects |
2585 | | // first half compressed objects |
2586 | | // |
2587 | | |
2588 | | // Second half objects |
2589 | 18.1k | int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size()); |
2590 | 18.1k | int second_half_first_obj = 1; |
2591 | 18.1k | int after_second_half = 1 + second_half_uncompressed; |
2592 | 18.1k | m->next_objid = after_second_half; |
2593 | 18.1k | int second_half_xref = 0; |
2594 | 18.1k | bool need_xref_stream = !m->obj.streams_empty; |
2595 | 18.1k | if (need_xref_stream) { |
2596 | 6.13k | second_half_xref = m->next_objid++; |
2597 | 6.13k | } |
2598 | | // Assign numbers to all compressed objects in the second half. |
2599 | 18.1k | std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9}; |
2600 | 67.4k | for (int i = 0; i < 3; ++i) { |
2601 | 268k | for (auto const& oh: *vecs2[i]) { |
2602 | 268k | assignCompressedObjectNumbers(oh.getObjGen()); |
2603 | 268k | } |
2604 | 49.3k | } |
2605 | 18.1k | int second_half_end = m->next_objid - 1; |
2606 | 18.1k | int second_trailer_size = m->next_objid; |
2607 | | |
2608 | | // First half objects |
2609 | 18.1k | int first_half_start = m->next_objid; |
2610 | 18.1k | int lindict_id = m->next_objid++; |
2611 | 18.1k | int first_half_xref = 0; |
2612 | 18.1k | if (need_xref_stream) { |
2613 | 6.13k | first_half_xref = m->next_objid++; |
2614 | 6.13k | } |
2615 | 18.1k | int part4_first_obj = m->next_objid; |
2616 | 18.1k | m->next_objid += QIntC::to_int(part4.size()); |
2617 | 18.1k | int after_part4 = m->next_objid; |
2618 | 18.1k | if (m->encrypted) { |
2619 | 11.7k | m->encryption_dict_objid = m->next_objid++; |
2620 | 11.7k | } |
2621 | 18.1k | int hint_id = m->next_objid++; |
2622 | 18.1k | int part6_first_obj = m->next_objid; |
2623 | 18.1k | m->next_objid += QIntC::to_int(part6.size()); |
2624 | 18.1k | int after_part6 = m->next_objid; |
2625 | | // Assign numbers to all compressed objects in the first half |
2626 | 18.1k | std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6}; |
2627 | 51.0k | for (int i = 0; i < 2; ++i) { |
2628 | 186k | for (auto const& oh: *vecs1[i]) { |
2629 | 186k | assignCompressedObjectNumbers(oh.getObjGen()); |
2630 | 186k | } |
2631 | 32.8k | } |
2632 | 18.1k | int first_half_end = m->next_objid - 1; |
2633 | 18.1k | int first_trailer_size = m->next_objid; |
2634 | | |
2635 | 18.1k | int part4_end_marker = part4.back().getObjectID(); |
2636 | 18.1k | int part6_end_marker = part6.back().getObjectID(); |
2637 | 18.1k | qpdf_offset_t space_before_zero = 0; |
2638 | 18.1k | qpdf_offset_t file_size = 0; |
2639 | 18.1k | qpdf_offset_t part6_end_offset = 0; |
2640 | 18.1k | qpdf_offset_t first_half_max_obj_offset = 0; |
2641 | 18.1k | qpdf_offset_t second_xref_offset = 0; |
2642 | 18.1k | qpdf_offset_t first_xref_end = 0; |
2643 | 18.1k | qpdf_offset_t second_xref_end = 0; |
2644 | | |
2645 | 18.1k | m->next_objid = part4_first_obj; |
2646 | 18.1k | enqueuePart(part4); |
2647 | 18.1k | if (m->next_objid != after_part4) { |
2648 | | // This can happen with very botched files as in the fuzzer test. There are likely some |
2649 | | // faulty assumptions in calculateLinearizationData |
2650 | 266 | throw std::runtime_error("error encountered after writing part 4 of linearized data"); |
2651 | 266 | } |
2652 | 17.8k | m->next_objid = part6_first_obj; |
2653 | 17.8k | enqueuePart(part6); |
2654 | 17.8k | if (m->next_objid != after_part6) { |
2655 | 88 | throw std::runtime_error("error encountered after writing part 6 of linearized data"); |
2656 | 88 | } |
2657 | 17.7k | m->next_objid = second_half_first_obj; |
2658 | 17.7k | enqueuePart(part7); |
2659 | 17.7k | enqueuePart(part8); |
2660 | 17.7k | enqueuePart(part9); |
2661 | 17.7k | if (m->next_objid != after_second_half) { |
2662 | 178 | throw std::runtime_error("error encountered after writing part 9 of linearized data"); |
2663 | 178 | } |
2664 | | |
2665 | 17.6k | qpdf_offset_t hint_length = 0; |
2666 | 17.6k | std::shared_ptr<Buffer> hint_buffer; |
2667 | | |
2668 | | // Write file in two passes. Part numbers refer to PDF spec 1.4. |
2669 | | |
2670 | 17.6k | FILE* lin_pass1_file = nullptr; |
2671 | 17.6k | auto pp_pass1 = std::make_shared<PipelinePopper>(this); |
2672 | 17.6k | auto pp_md5 = std::make_shared<PipelinePopper>(this); |
2673 | 48.5k | for (int pass = 1; pass <= 2; ++pass) { |
2674 | 30.9k | if (pass == 1) { |
2675 | 15.9k | if (!m->lin_pass1_filename.empty()) { |
2676 | 0 | lin_pass1_file = QUtil::safe_fopen(m->lin_pass1_filename.c_str(), "wb"); |
2677 | 0 | pushPipeline(new Pl_StdioFile("linearization pass1", lin_pass1_file)); |
2678 | 0 | activatePipelineStack(*pp_pass1); |
2679 | 15.9k | } else { |
2680 | 15.9k | pushDiscardFilter(*pp_pass1); |
2681 | 15.9k | } |
2682 | 15.9k | if (m->deterministic_id) { |
2683 | 4.65k | pushMD5Pipeline(*pp_md5); |
2684 | 4.65k | } |
2685 | 15.9k | } |
2686 | | |
2687 | | // Part 1: header |
2688 | | |
2689 | 30.9k | writeHeader(); |
2690 | | |
2691 | | // Part 2: linearization parameter dictionary. Save enough space to write real dictionary. |
2692 | | // 200 characters is enough space if all numerical values in the parameter dictionary that |
2693 | | // contain offsets are 20 digits long plus a few extra characters for safety. The entire |
2694 | | // linearization parameter dictionary must appear within the first 1024 characters of the |
2695 | | // file. |
2696 | | |
2697 | 30.9k | qpdf_offset_t pos = m->pipeline->getCount(); |
2698 | 30.9k | openObject(lindict_id); |
2699 | 30.9k | writeString("<<"); |
2700 | 30.9k | if (pass == 2) { |
2701 | 14.9k | std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages(); |
2702 | 14.9k | int first_page_object = m->obj[pages.at(0)].renumber; |
2703 | 14.9k | int npages = QIntC::to_int(pages.size()); |
2704 | | |
2705 | 14.9k | writeString(" /Linearized 1 /L "); |
2706 | 14.9k | writeString(std::to_string(file_size + hint_length)); |
2707 | | // Implementation note 121 states that a space is mandatory after this open bracket. |
2708 | 14.9k | writeString(" /H [ "); |
2709 | 14.9k | writeString(std::to_string(m->new_obj[hint_id].xref.getOffset())); |
2710 | 14.9k | writeString(" "); |
2711 | 14.9k | writeString(std::to_string(hint_length)); |
2712 | 14.9k | writeString(" ] /O "); |
2713 | 14.9k | writeString(std::to_string(first_page_object)); |
2714 | 14.9k | writeString(" /E "); |
2715 | 14.9k | writeString(std::to_string(part6_end_offset + hint_length)); |
2716 | 14.9k | writeString(" /N "); |
2717 | 14.9k | writeString(std::to_string(npages)); |
2718 | 14.9k | writeString(" /T "); |
2719 | 14.9k | writeString(std::to_string(space_before_zero + hint_length)); |
2720 | 14.9k | } |
2721 | 30.9k | writeString(" >>"); |
2722 | 30.9k | closeObject(lindict_id); |
2723 | 30.9k | static int const pad = 200; |
2724 | 30.9k | writePad(QIntC::to_size(pos - m->pipeline->getCount() + pad)); |
2725 | 30.9k | writeString("\n"); |
2726 | | |
2727 | | // If the user supplied any additional header text, write it here after the linearization |
2728 | | // parameter dictionary. |
2729 | 30.9k | writeString(m->extra_header_text); |
2730 | | |
2731 | | // Part 3: first page cross reference table and trailer. |
2732 | | |
2733 | 30.9k | qpdf_offset_t first_xref_offset = m->pipeline->getCount(); |
2734 | 30.9k | qpdf_offset_t hint_offset = 0; |
2735 | 30.9k | if (pass == 2) { |
2736 | 14.9k | hint_offset = m->new_obj[hint_id].xref.getOffset(); |
2737 | 14.9k | } |
2738 | 30.9k | if (need_xref_stream) { |
2739 | | // Must pad here too. |
2740 | 11.1k | if (pass == 1) { |
2741 | | // Set first_half_max_obj_offset to a value large enough to force four bytes to be |
2742 | | // reserved for each file offset. This would provide adequate space for the xref |
2743 | | // stream as long as the last object in page 1 starts with in the first 4 GB of the |
2744 | | // file, which is extremely likely. In the second pass, we will know the actual |
2745 | | // value for this, but it's okay if it's smaller. |
2746 | 6.01k | first_half_max_obj_offset = 1 << 25; |
2747 | 6.01k | } |
2748 | 11.1k | pos = m->pipeline->getCount(); |
2749 | 11.1k | writeXRefStream( |
2750 | 11.1k | first_half_xref, |
2751 | 11.1k | first_half_end, |
2752 | 11.1k | first_half_max_obj_offset, |
2753 | 11.1k | t_lin_first, |
2754 | 11.1k | first_half_start, |
2755 | 11.1k | first_half_end, |
2756 | 11.1k | first_trailer_size, |
2757 | 11.1k | hint_length + second_xref_offset, |
2758 | 11.1k | hint_id, |
2759 | 11.1k | hint_offset, |
2760 | 11.1k | hint_length, |
2761 | 11.1k | (pass == 1), |
2762 | 11.1k | pass); |
2763 | 11.1k | qpdf_offset_t endpos = m->pipeline->getCount(); |
2764 | 11.1k | if (pass == 1) { |
2765 | | // Pad so we have enough room for the real xref stream. |
2766 | 5.26k | writePad(calculateXrefStreamPadding(endpos - pos)); |
2767 | 5.26k | first_xref_end = m->pipeline->getCount(); |
2768 | 5.90k | } else { |
2769 | | // Pad so that the next object starts at the same place as in pass 1. |
2770 | 5.90k | writePad(QIntC::to_size(first_xref_end - endpos)); |
2771 | | |
2772 | 5.90k | if (m->pipeline->getCount() != first_xref_end) { |
2773 | 0 | throw std::logic_error( |
2774 | 0 | "insufficient padding for first pass xref stream; " |
2775 | 0 | "first_xref_end=" + |
2776 | 0 | std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos)); |
2777 | 0 | } |
2778 | 5.90k | } |
2779 | 11.1k | writeString("\n"); |
2780 | 19.7k | } else { |
2781 | 19.7k | writeXRefTable( |
2782 | 19.7k | t_lin_first, |
2783 | 19.7k | first_half_start, |
2784 | 19.7k | first_half_end, |
2785 | 19.7k | first_trailer_size, |
2786 | 19.7k | hint_length + second_xref_offset, |
2787 | 19.7k | (pass == 1), |
2788 | 19.7k | hint_id, |
2789 | 19.7k | hint_offset, |
2790 | 19.7k | hint_length, |
2791 | 19.7k | pass); |
2792 | 19.7k | writeString("startxref\n0\n%%EOF\n"); |
2793 | 19.7k | } |
2794 | | |
2795 | | // Parts 4 through 9 |
2796 | | |
2797 | 862k | for (auto const& cur_object: m->object_queue) { |
2798 | 862k | if (cur_object.getObjectID() == part6_end_marker) { |
2799 | 30.1k | first_half_max_obj_offset = m->pipeline->getCount(); |
2800 | 30.1k | } |
2801 | 862k | writeObject(cur_object); |
2802 | 862k | if (cur_object.getObjectID() == part4_end_marker) { |
2803 | 30.1k | if (m->encrypted) { |
2804 | 22.3k | writeEncryptionDictionary(); |
2805 | 22.3k | } |
2806 | 30.1k | if (pass == 1) { |
2807 | 15.1k | m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount()); |
2808 | 15.1k | } else { |
2809 | | // Part 5: hint stream |
2810 | 14.9k | writeBuffer(hint_buffer); |
2811 | 14.9k | } |
2812 | 30.1k | } |
2813 | 862k | if (cur_object.getObjectID() == part6_end_marker) { |
2814 | 30.0k | part6_end_offset = m->pipeline->getCount(); |
2815 | 30.0k | } |
2816 | 862k | } |
2817 | | |
2818 | | // Part 10: overflow hint stream -- not used |
2819 | | |
2820 | | // Part 11: main cross reference table and trailer |
2821 | | |
2822 | 30.9k | second_xref_offset = m->pipeline->getCount(); |
2823 | 30.9k | if (need_xref_stream) { |
2824 | 10.3k | pos = m->pipeline->getCount(); |
2825 | 10.3k | space_before_zero = writeXRefStream( |
2826 | 10.3k | second_half_xref, |
2827 | 10.3k | second_half_end, |
2828 | 10.3k | second_xref_offset, |
2829 | 10.3k | t_lin_second, |
2830 | 10.3k | 0, |
2831 | 10.3k | second_half_end, |
2832 | 10.3k | second_trailer_size, |
2833 | 10.3k | 0, |
2834 | 10.3k | 0, |
2835 | 10.3k | 0, |
2836 | 10.3k | 0, |
2837 | 10.3k | (pass == 1), |
2838 | 10.3k | pass); |
2839 | 10.3k | qpdf_offset_t endpos = m->pipeline->getCount(); |
2840 | | |
2841 | 10.3k | if (pass == 1) { |
2842 | | // Pad so we have enough room for the real xref stream. See comments for previous |
2843 | | // xref stream on how we calculate the padding. |
2844 | 5.16k | writePad(calculateXrefStreamPadding(endpos - pos)); |
2845 | 5.16k | writeString("\n"); |
2846 | 5.16k | second_xref_end = m->pipeline->getCount(); |
2847 | 5.16k | } else { |
2848 | | // Make the file size the same. |
2849 | 5.14k | writePad( |
2850 | 5.14k | QIntC::to_size(second_xref_end + hint_length - 1 - m->pipeline->getCount())); |
2851 | 5.14k | writeString("\n"); |
2852 | | |
2853 | | // If this assertion fails, maybe we didn't have enough padding above. |
2854 | 5.14k | if (m->pipeline->getCount() != second_xref_end + hint_length) { |
2855 | 0 | throw std::logic_error( |
2856 | 0 | "count mismatch after xref stream; possible insufficient padding?"); |
2857 | 0 | } |
2858 | 5.14k | } |
2859 | 20.6k | } else { |
2860 | 20.6k | space_before_zero = writeXRefTable( |
2861 | 20.6k | t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass); |
2862 | 20.6k | } |
2863 | 30.9k | writeString("startxref\n"); |
2864 | 30.9k | writeString(std::to_string(first_xref_offset)); |
2865 | 30.9k | writeString("\n%%EOF\n"); |
2866 | | |
2867 | 30.9k | if (pass == 1) { |
2868 | 14.9k | if (m->deterministic_id) { |
2869 | 3.85k | QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1); |
2870 | 3.85k | computeDeterministicIDData(); |
2871 | 3.85k | pp_md5 = nullptr; |
2872 | 3.85k | qpdf_assert_debug(m->md5_pipeline == nullptr); |
2873 | 3.85k | } |
2874 | | |
2875 | | // Close first pass pipeline |
2876 | 14.9k | file_size = m->pipeline->getCount(); |
2877 | 14.9k | pp_pass1 = nullptr; |
2878 | | |
2879 | | // Save hint offset since it will be set to zero by calling openObject. |
2880 | 14.9k | qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset(); |
2881 | | |
2882 | | // Write hint stream to a buffer |
2883 | 14.9k | { |
2884 | 14.9k | pushPipeline(new Pl_Buffer("hint buffer")); |
2885 | 14.9k | PipelinePopper pp_hint(this, &hint_buffer); |
2886 | 14.9k | activatePipelineStack(pp_hint); |
2887 | 14.9k | writeHintStream(hint_id); |
2888 | 14.9k | } |
2889 | 14.9k | hint_length = QIntC::to_offset(hint_buffer->getSize()); |
2890 | | |
2891 | | // Restore hint offset |
2892 | 14.9k | m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1); |
2893 | 14.9k | if (lin_pass1_file) { |
2894 | | // Write some debugging information |
2895 | 0 | fprintf( |
2896 | 0 | lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str()); |
2897 | 0 | fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str()); |
2898 | 0 | fprintf( |
2899 | 0 | lin_pass1_file, |
2900 | 0 | "%% second_xref_offset=%s\n", |
2901 | 0 | std::to_string(second_xref_offset).c_str()); |
2902 | 0 | fprintf( |
2903 | 0 | lin_pass1_file, |
2904 | 0 | "%% second_xref_end=%s\n", |
2905 | 0 | std::to_string(second_xref_end).c_str()); |
2906 | 0 | fclose(lin_pass1_file); |
2907 | 0 | lin_pass1_file = nullptr; |
2908 | 0 | } |
2909 | 14.9k | } |
2910 | 30.9k | } |
2911 | 17.6k | } |
2912 | | |
2913 | | void |
2914 | | QPDFWriter::enqueueObjectsStandard() |
2915 | 14.1k | { |
2916 | 14.1k | if (m->preserve_unreferenced_objects) { |
2917 | 0 | QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard"); |
2918 | 0 | for (auto const& oh: m->pdf.getAllObjects()) { |
2919 | 0 | enqueueObject(oh); |
2920 | 0 | } |
2921 | 0 | } |
2922 | | |
2923 | | // Put root first on queue. |
2924 | 14.1k | QPDFObjectHandle trailer = getTrimmedTrailer(); |
2925 | 14.1k | enqueueObject(trailer.getKey("/Root")); |
2926 | | |
2927 | | // Next place any other objects referenced from the trailer dictionary into the queue, handling |
2928 | | // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op. |
2929 | 61.0k | for (auto const& key: trailer.getKeys()) { |
2930 | 61.0k | enqueueObject(trailer.getKey(key)); |
2931 | 61.0k | } |
2932 | 14.1k | } |
2933 | | |
2934 | | void |
2935 | | QPDFWriter::enqueueObjectsPCLm() |
2936 | 0 | { |
2937 | | // Image transform stream content for page strip images. Each of this new stream has to come |
2938 | | // after every page image strip written in the pclm file. |
2939 | 0 | std::string image_transform_content = "q /image Do Q\n"; |
2940 | | |
2941 | | // enqueue all pages first |
2942 | 0 | std::vector<QPDFObjectHandle> all = m->pdf.getAllPages(); |
2943 | 0 | for (auto& page: all) { |
2944 | | // enqueue page |
2945 | 0 | enqueueObject(page); |
2946 | | |
2947 | | // enqueue page contents stream |
2948 | 0 | enqueueObject(page.getKey("/Contents")); |
2949 | | |
2950 | | // enqueue all the strips for each page |
2951 | 0 | QPDFObjectHandle strips = page.getKey("/Resources").getKey("/XObject"); |
2952 | 0 | for (auto const& image: strips.getKeys()) { |
2953 | 0 | enqueueObject(strips.getKey(image)); |
2954 | 0 | enqueueObject(QPDFObjectHandle::newStream(&m->pdf, image_transform_content)); |
2955 | 0 | } |
2956 | 0 | } |
2957 | | |
2958 | | // Put root in queue. |
2959 | 0 | QPDFObjectHandle trailer = getTrimmedTrailer(); |
2960 | 0 | enqueueObject(trailer.getKey("/Root")); |
2961 | 0 | } |
2962 | | |
2963 | | void |
2964 | | QPDFWriter::indicateProgress(bool decrement, bool finished) |
2965 | 2.17M | { |
2966 | 2.17M | if (decrement) { |
2967 | 311k | --m->events_seen; |
2968 | 311k | return; |
2969 | 311k | } |
2970 | | |
2971 | 1.86M | ++m->events_seen; |
2972 | | |
2973 | 1.86M | if (!m->progress_reporter.get()) { |
2974 | 1.86M | return; |
2975 | 1.86M | } |
2976 | | |
2977 | 0 | if (finished || (m->events_seen >= m->next_progress_report)) { |
2978 | 0 | int percentage = |
2979 | 0 | (finished ? 100 |
2980 | 0 | : m->next_progress_report == 0 |
2981 | 0 | ? 0 |
2982 | 0 | : std::min(99, 1 + ((100 * m->events_seen) / m->events_expected))); |
2983 | 0 | m->progress_reporter->reportProgress(percentage); |
2984 | 0 | } |
2985 | 0 | int increment = std::max(1, (m->events_expected / 100)); |
2986 | 0 | while (m->events_seen >= m->next_progress_report) { |
2987 | 0 | m->next_progress_report += increment; |
2988 | 0 | } |
2989 | 0 | } |
2990 | | |
2991 | | void |
2992 | | QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr) |
2993 | 0 | { |
2994 | 0 | m->progress_reporter = pr; |
2995 | 0 | } |
2996 | | |
2997 | | void |
2998 | | QPDFWriter::writeStandard() |
2999 | 14.1k | { |
3000 | 14.1k | auto pp_md5 = std::make_shared<PipelinePopper>(this); |
3001 | 14.1k | if (m->deterministic_id) { |
3002 | 8.28k | pushMD5Pipeline(*pp_md5); |
3003 | 8.28k | } |
3004 | | |
3005 | | // Start writing |
3006 | | |
3007 | 14.1k | writeHeader(); |
3008 | 14.1k | writeString(m->extra_header_text); |
3009 | | |
3010 | 14.1k | if (m->pclm) { |
3011 | 0 | enqueueObjectsPCLm(); |
3012 | 14.1k | } else { |
3013 | 14.1k | enqueueObjectsStandard(); |
3014 | 14.1k | } |
3015 | | |
3016 | | // Now start walking queue, outputting each object. |
3017 | 386k | while (m->object_queue_front < m->object_queue.size()) { |
3018 | 372k | QPDFObjectHandle cur_object = m->object_queue.at(m->object_queue_front); |
3019 | 372k | ++m->object_queue_front; |
3020 | 372k | writeObject(cur_object); |
3021 | 372k | } |
3022 | | |
3023 | | // Write out the encryption dictionary, if any |
3024 | 14.1k | if (m->encrypted) { |
3025 | 5.75k | writeEncryptionDictionary(); |
3026 | 5.75k | } |
3027 | | |
3028 | | // Now write out xref. next_objid is now the number of objects. |
3029 | 14.1k | qpdf_offset_t xref_offset = m->pipeline->getCount(); |
3030 | 14.1k | if (m->object_stream_to_objects.empty()) { |
3031 | | // Write regular cross-reference table |
3032 | 13.2k | writeXRefTable(t_normal, 0, m->next_objid - 1, m->next_objid); |
3033 | 13.2k | } else { |
3034 | | // Write cross-reference stream. |
3035 | 888 | int xref_id = m->next_objid++; |
3036 | 888 | writeXRefStream( |
3037 | 888 | xref_id, xref_id, xref_offset, t_normal, 0, m->next_objid - 1, m->next_objid); |
3038 | 888 | } |
3039 | 14.1k | writeString("startxref\n"); |
3040 | 14.1k | writeString(std::to_string(xref_offset)); |
3041 | 14.1k | writeString("\n%%EOF\n"); |
3042 | | |
3043 | 14.1k | if (m->deterministic_id) { |
3044 | 7.22k | QTC::TC( |
3045 | 7.22k | "qpdf", |
3046 | 7.22k | "QPDFWriter standard deterministic ID", |
3047 | 7.22k | m->object_stream_to_objects.empty() ? 0 : 1); |
3048 | 7.22k | pp_md5 = nullptr; |
3049 | 7.22k | qpdf_assert_debug(m->md5_pipeline == nullptr); |
3050 | 7.22k | } |
3051 | 14.1k | } |