/src/qpdf/libqpdf/QPDFWriter.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/assert_debug.h> |
2 | | |
3 | | #include <qpdf/qpdf-config.h> // include early for large file support |
4 | | |
5 | | #include <qpdf/QPDFWriter_private.hh> |
6 | | |
7 | | #include <qpdf/MD5.hh> |
8 | | #include <qpdf/Pl_AES_PDF.hh> |
9 | | #include <qpdf/Pl_Flate.hh> |
10 | | #include <qpdf/Pl_MD5.hh> |
11 | | #include <qpdf/Pl_PNGFilter.hh> |
12 | | #include <qpdf/Pl_RC4.hh> |
13 | | #include <qpdf/Pl_StdioFile.hh> |
14 | | #include <qpdf/Pl_String.hh> |
15 | | #include <qpdf/QIntC.hh> |
16 | | #include <qpdf/QPDFObjectHandle_private.hh> |
17 | | #include <qpdf/QPDFObject_private.hh> |
18 | | #include <qpdf/QPDF_private.hh> |
19 | | #include <qpdf/QTC.hh> |
20 | | #include <qpdf/QUtil.hh> |
21 | | #include <qpdf/RC4.hh> |
22 | | #include <qpdf/Util.hh> |
23 | | |
24 | | #include <algorithm> |
25 | | #include <cstdlib> |
26 | | #include <stdexcept> |
27 | | |
28 | | using namespace std::literals; |
29 | | using namespace qpdf; |
30 | | |
31 | | QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default) |
32 | 0 | { |
33 | | // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
34 | 0 | } |
35 | | |
36 | | QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) : |
37 | 0 | handler(handler) |
38 | 0 | { |
39 | 0 | } |
40 | | |
41 | | QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT |
42 | | // (modernize-use-equals-default) |
43 | 0 | { |
44 | | // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
45 | 0 | } |
46 | | |
47 | | void |
48 | | QPDFWriter::FunctionProgressReporter::reportProgress(int progress) |
49 | 0 | { |
50 | 0 | this->handler(progress); |
51 | 0 | } |
52 | | |
53 | | class QPDFWriter::Members |
54 | | { |
55 | | friend class QPDFWriter; |
56 | | |
57 | | public: |
58 | | ~Members(); |
59 | | |
60 | | private: |
61 | | Members(QPDF& pdf); |
62 | | Members(Members const&) = delete; |
63 | | |
64 | | QPDF& pdf; |
65 | | QPDFObjGen root_og{-1, 0}; |
66 | | char const* filename{"unspecified"}; |
67 | | FILE* file{nullptr}; |
68 | | bool close_file{false}; |
69 | | Pl_Buffer* buffer_pipeline{nullptr}; |
70 | | Buffer* output_buffer{nullptr}; |
71 | | bool normalize_content_set{false}; |
72 | | bool normalize_content{false}; |
73 | | bool compress_streams{true}; |
74 | | bool compress_streams_set{false}; |
75 | | qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_generalized}; |
76 | | bool stream_decode_level_set{false}; |
77 | | bool recompress_flate{false}; |
78 | | bool qdf_mode{false}; |
79 | | bool preserve_unreferenced_objects{false}; |
80 | | bool newline_before_endstream{false}; |
81 | | bool static_id{false}; |
82 | | bool suppress_original_object_ids{false}; |
83 | | bool direct_stream_lengths{true}; |
84 | | bool preserve_encryption{true}; |
85 | | bool linearized{false}; |
86 | | bool pclm{false}; |
87 | | qpdf_object_stream_e object_stream_mode{qpdf_o_preserve}; |
88 | | |
89 | | std::unique_ptr<QPDF::EncryptionData> encryption; |
90 | | std::string encryption_key; |
91 | | bool encrypt_use_aes{false}; |
92 | | |
93 | | std::string id1; // for /ID key of |
94 | | std::string id2; // trailer dictionary |
95 | | std::string final_pdf_version; |
96 | | int final_extension_level{0}; |
97 | | std::string min_pdf_version; |
98 | | int min_extension_level{0}; |
99 | | std::string forced_pdf_version; |
100 | | int forced_extension_level{0}; |
101 | | std::string extra_header_text; |
102 | | int encryption_dict_objid{0}; |
103 | | std::string cur_data_key; |
104 | | std::list<std::shared_ptr<Pipeline>> to_delete; |
105 | | qpdf::pl::Count* pipeline{nullptr}; |
106 | | std::vector<QPDFObjectHandle> object_queue; |
107 | | size_t object_queue_front{0}; |
108 | | QPDFWriter::ObjTable obj; |
109 | | QPDFWriter::NewObjTable new_obj; |
110 | | int next_objid{1}; |
111 | | int cur_stream_length_id{0}; |
112 | | size_t cur_stream_length{0}; |
113 | | bool added_newline{false}; |
114 | | size_t max_ostream_index{0}; |
115 | | std::set<QPDFObjGen> normalized_streams; |
116 | | std::map<QPDFObjGen, int> page_object_to_seq; |
117 | | std::map<QPDFObjGen, int> contents_to_page_seq; |
118 | | std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects; |
119 | | std::vector<Pipeline*> pipeline_stack; |
120 | | unsigned long next_stack_id{2}; |
121 | | std::string count_buffer; |
122 | | bool deterministic_id{false}; |
123 | | Pl_MD5* md5_pipeline{nullptr}; |
124 | | std::string deterministic_id_data; |
125 | | bool did_write_setup{false}; |
126 | | |
127 | | // For linearization only |
128 | | std::string lin_pass1_filename; |
129 | | |
130 | | // For progress reporting |
131 | | std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter; |
132 | | int events_expected{0}; |
133 | | int events_seen{0}; |
134 | | int next_progress_report{0}; |
135 | | }; |
136 | | |
137 | | QPDFWriter::Members::Members(QPDF& pdf) : |
138 | 7.98k | pdf(pdf), |
139 | 7.98k | root_og(pdf.getRoot().getObjGen().isIndirect() ? pdf.getRoot().getObjGen() : QPDFObjGen(-1, 0)) |
140 | 7.98k | { |
141 | 7.98k | } |
142 | | |
143 | | QPDFWriter::Members::~Members() |
144 | 7.75k | { |
145 | 7.75k | if (file && close_file) { |
146 | 0 | fclose(file); |
147 | 0 | } |
148 | 7.75k | delete output_buffer; |
149 | 7.75k | } |
150 | | |
151 | | QPDFWriter::QPDFWriter(QPDF& pdf) : |
152 | 7.98k | m(new Members(pdf)) |
153 | 7.98k | { |
154 | 7.98k | } |
155 | | |
156 | | QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) : |
157 | 0 | m(new Members(pdf)) |
158 | 0 | { |
159 | 0 | setOutputFilename(filename); |
160 | 0 | } |
161 | | |
162 | | QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) : |
163 | 0 | m(new Members(pdf)) |
164 | 0 | { |
165 | 0 | setOutputFile(description, file, close_file); |
166 | 0 | } |
167 | | |
168 | | void |
169 | | QPDFWriter::setOutputFilename(char const* filename) |
170 | 0 | { |
171 | 0 | char const* description = filename; |
172 | 0 | FILE* f = nullptr; |
173 | 0 | bool close_file = false; |
174 | 0 | if (filename == nullptr) { |
175 | 0 | description = "standard output"; |
176 | 0 | QTC::TC("qpdf", "QPDFWriter write to stdout"); |
177 | 0 | f = stdout; |
178 | 0 | QUtil::binary_stdout(); |
179 | 0 | } else { |
180 | 0 | QTC::TC("qpdf", "QPDFWriter write to file"); |
181 | 0 | f = QUtil::safe_fopen(filename, "wb+"); |
182 | 0 | close_file = true; |
183 | 0 | } |
184 | 0 | setOutputFile(description, f, close_file); |
185 | 0 | } |
186 | | |
187 | | void |
188 | | QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file) |
189 | 0 | { |
190 | 0 | m->filename = description; |
191 | 0 | m->file = file; |
192 | 0 | m->close_file = close_file; |
193 | 0 | std::shared_ptr<Pipeline> p = std::make_shared<Pl_StdioFile>("qpdf output", file); |
194 | 0 | m->to_delete.push_back(p); |
195 | 0 | initializePipelineStack(p.get()); |
196 | 0 | } |
197 | | |
198 | | void |
199 | | QPDFWriter::setOutputMemory() |
200 | 0 | { |
201 | 0 | m->filename = "memory buffer"; |
202 | 0 | m->buffer_pipeline = new Pl_Buffer("qpdf output"); |
203 | 0 | m->to_delete.push_back(std::shared_ptr<Pipeline>(m->buffer_pipeline)); |
204 | 0 | initializePipelineStack(m->buffer_pipeline); |
205 | 0 | } |
206 | | |
207 | | Buffer* |
208 | | QPDFWriter::getBuffer() |
209 | 0 | { |
210 | 0 | Buffer* result = m->output_buffer; |
211 | 0 | m->output_buffer = nullptr; |
212 | 0 | return result; |
213 | 0 | } |
214 | | |
215 | | std::shared_ptr<Buffer> |
216 | | QPDFWriter::getBufferSharedPointer() |
217 | 0 | { |
218 | 0 | return std::shared_ptr<Buffer>(getBuffer()); |
219 | 0 | } |
220 | | |
221 | | void |
222 | | QPDFWriter::setOutputPipeline(Pipeline* p) |
223 | 7.75k | { |
224 | 7.75k | m->filename = "custom pipeline"; |
225 | 7.75k | initializePipelineStack(p); |
226 | 7.75k | } |
227 | | |
228 | | void |
229 | | QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode) |
230 | 7.75k | { |
231 | 7.75k | m->object_stream_mode = mode; |
232 | 7.75k | } |
233 | | |
234 | | void |
235 | | QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode) |
236 | 0 | { |
237 | 0 | switch (mode) { |
238 | 0 | case qpdf_s_uncompress: |
239 | 0 | m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level); |
240 | 0 | m->compress_streams = false; |
241 | 0 | break; |
242 | | |
243 | 0 | case qpdf_s_preserve: |
244 | 0 | m->stream_decode_level = qpdf_dl_none; |
245 | 0 | m->compress_streams = false; |
246 | 0 | break; |
247 | | |
248 | 0 | case qpdf_s_compress: |
249 | 0 | m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level); |
250 | 0 | m->compress_streams = true; |
251 | 0 | break; |
252 | 0 | } |
253 | 0 | m->stream_decode_level_set = true; |
254 | 0 | m->compress_streams_set = true; |
255 | 0 | } |
256 | | |
257 | | void |
258 | | QPDFWriter::setCompressStreams(bool val) |
259 | 0 | { |
260 | 0 | m->compress_streams = val; |
261 | 0 | m->compress_streams_set = true; |
262 | 0 | } |
263 | | |
264 | | void |
265 | | QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val) |
266 | 7.75k | { |
267 | 7.75k | m->stream_decode_level = val; |
268 | 7.75k | m->stream_decode_level_set = true; |
269 | 7.75k | } |
270 | | |
271 | | void |
272 | | QPDFWriter::setRecompressFlate(bool val) |
273 | 0 | { |
274 | 0 | m->recompress_flate = val; |
275 | 0 | } |
276 | | |
277 | | void |
278 | | QPDFWriter::setContentNormalization(bool val) |
279 | 0 | { |
280 | 0 | m->normalize_content_set = true; |
281 | 0 | m->normalize_content = val; |
282 | 0 | } |
283 | | |
284 | | void |
285 | | QPDFWriter::setQDFMode(bool val) |
286 | 0 | { |
287 | 0 | m->qdf_mode = val; |
288 | 0 | } |
289 | | |
290 | | void |
291 | | QPDFWriter::setPreserveUnreferencedObjects(bool val) |
292 | 0 | { |
293 | 0 | m->preserve_unreferenced_objects = val; |
294 | 0 | } |
295 | | |
296 | | void |
297 | | QPDFWriter::setNewlineBeforeEndstream(bool val) |
298 | 0 | { |
299 | 0 | m->newline_before_endstream = val; |
300 | 0 | } |
301 | | |
302 | | void |
303 | | QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level) |
304 | 15.4k | { |
305 | 15.4k | bool set_version = false; |
306 | 15.4k | bool set_extension_level = false; |
307 | 15.4k | if (m->min_pdf_version.empty()) { |
308 | 7.74k | set_version = true; |
309 | 7.74k | set_extension_level = true; |
310 | 7.74k | } else { |
311 | 7.70k | int old_major = 0; |
312 | 7.70k | int old_minor = 0; |
313 | 7.70k | int min_major = 0; |
314 | 7.70k | int min_minor = 0; |
315 | 7.70k | parseVersion(version, old_major, old_minor); |
316 | 7.70k | parseVersion(m->min_pdf_version, min_major, min_minor); |
317 | 7.70k | int compare = compareVersions(old_major, old_minor, min_major, min_minor); |
318 | 7.70k | if (compare > 0) { |
319 | 701 | QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1); |
320 | 701 | set_version = true; |
321 | 701 | set_extension_level = true; |
322 | 7.00k | } else if (compare == 0) { |
323 | 454 | if (extension_level > m->min_extension_level) { |
324 | 1 | QTC::TC("qpdf", "QPDFWriter increasing extension level"); |
325 | 1 | set_extension_level = true; |
326 | 1 | } |
327 | 454 | } |
328 | 7.70k | } |
329 | | |
330 | 15.4k | if (set_version) { |
331 | 8.44k | m->min_pdf_version = version; |
332 | 8.44k | } |
333 | 15.4k | if (set_extension_level) { |
334 | 8.44k | m->min_extension_level = extension_level; |
335 | 8.44k | } |
336 | 15.4k | } |
337 | | |
338 | | void |
339 | | QPDFWriter::setMinimumPDFVersion(PDFVersion const& v) |
340 | 0 | { |
341 | 0 | std::string version; |
342 | 0 | int extension_level; |
343 | 0 | v.getVersion(version, extension_level); |
344 | 0 | setMinimumPDFVersion(version, extension_level); |
345 | 0 | } |
346 | | |
347 | | void |
348 | | QPDFWriter::forcePDFVersion(std::string const& version, int extension_level) |
349 | 0 | { |
350 | 0 | m->forced_pdf_version = version; |
351 | 0 | m->forced_extension_level = extension_level; |
352 | 0 | } |
353 | | |
354 | | void |
355 | | QPDFWriter::setExtraHeaderText(std::string const& text) |
356 | 0 | { |
357 | 0 | m->extra_header_text = text; |
358 | 0 | if (!m->extra_header_text.empty() && *m->extra_header_text.rbegin() != '\n') { |
359 | 0 | QTC::TC("qpdf", "QPDFWriter extra header text add newline"); |
360 | 0 | m->extra_header_text += "\n"; |
361 | 0 | } else { |
362 | 0 | QTC::TC("qpdf", "QPDFWriter extra header text no newline"); |
363 | 0 | } |
364 | 0 | } |
365 | | |
366 | | void |
367 | | QPDFWriter::setStaticID(bool val) |
368 | 7.75k | { |
369 | 7.75k | m->static_id = val; |
370 | 7.75k | } |
371 | | |
372 | | void |
373 | | QPDFWriter::setDeterministicID(bool val) |
374 | 0 | { |
375 | 0 | m->deterministic_id = val; |
376 | 0 | } |
377 | | |
378 | | void |
379 | | QPDFWriter::setStaticAesIV(bool val) |
380 | 0 | { |
381 | 0 | if (val) { |
382 | 0 | Pl_AES_PDF::useStaticIV(); |
383 | 0 | } |
384 | 0 | } |
385 | | |
386 | | void |
387 | | QPDFWriter::setSuppressOriginalObjectIDs(bool val) |
388 | 0 | { |
389 | 0 | m->suppress_original_object_ids = val; |
390 | 0 | } |
391 | | |
392 | | void |
393 | | QPDFWriter::setPreserveEncryption(bool val) |
394 | 0 | { |
395 | 0 | m->preserve_encryption = val; |
396 | 0 | } |
397 | | |
398 | | void |
399 | | QPDFWriter::setLinearization(bool val) |
400 | 0 | { |
401 | 0 | m->linearized = val; |
402 | 0 | if (val) { |
403 | 0 | m->pclm = false; |
404 | 0 | } |
405 | 0 | } |
406 | | |
407 | | void |
408 | | QPDFWriter::setLinearizationPass1Filename(std::string const& filename) |
409 | 0 | { |
410 | 0 | m->lin_pass1_filename = filename; |
411 | 0 | } |
412 | | |
413 | | void |
414 | | QPDFWriter::setPCLm(bool val) |
415 | 0 | { |
416 | 0 | m->pclm = val; |
417 | 0 | if (val) { |
418 | 0 | m->linearized = false; |
419 | 0 | } |
420 | 0 | } |
421 | | |
422 | | void |
423 | | QPDFWriter::setR2EncryptionParametersInsecure( |
424 | | char const* user_password, |
425 | | char const* owner_password, |
426 | | bool allow_print, |
427 | | bool allow_modify, |
428 | | bool allow_extract, |
429 | | bool allow_annotate) |
430 | 0 | { |
431 | 0 | m->encryption = std::make_unique<QPDF::EncryptionData>(1, 2, 5, true); |
432 | 0 | if (!allow_print) { |
433 | 0 | m->encryption->setP(3, false); |
434 | 0 | } |
435 | 0 | if (!allow_modify) { |
436 | 0 | m->encryption->setP(4, false); |
437 | 0 | } |
438 | 0 | if (!allow_extract) { |
439 | 0 | m->encryption->setP(5, false); |
440 | 0 | } |
441 | 0 | if (!allow_annotate) { |
442 | 0 | m->encryption->setP(6, false); |
443 | 0 | } |
444 | 0 | setEncryptionParameters(user_password, owner_password); |
445 | 0 | } |
446 | | |
447 | | void |
448 | | QPDFWriter::setR3EncryptionParametersInsecure( |
449 | | char const* user_password, |
450 | | char const* owner_password, |
451 | | bool allow_accessibility, |
452 | | bool allow_extract, |
453 | | bool allow_assemble, |
454 | | bool allow_annotate_and_form, |
455 | | bool allow_form_filling, |
456 | | bool allow_modify_other, |
457 | | qpdf_r3_print_e print) |
458 | 7.75k | { |
459 | 7.75k | m->encryption = std::make_unique<QPDF::EncryptionData>(2, 3, 16, true); |
460 | 7.75k | interpretR3EncryptionParameters( |
461 | 7.75k | allow_accessibility, |
462 | 7.75k | allow_extract, |
463 | 7.75k | allow_assemble, |
464 | 7.75k | allow_annotate_and_form, |
465 | 7.75k | allow_form_filling, |
466 | 7.75k | allow_modify_other, |
467 | 7.75k | print, |
468 | 7.75k | qpdf_r3m_all); |
469 | 7.75k | setEncryptionParameters(user_password, owner_password); |
470 | 7.75k | } |
471 | | |
472 | | void |
473 | | QPDFWriter::setR4EncryptionParametersInsecure( |
474 | | char const* user_password, |
475 | | char const* owner_password, |
476 | | bool allow_accessibility, |
477 | | bool allow_extract, |
478 | | bool allow_assemble, |
479 | | bool allow_annotate_and_form, |
480 | | bool allow_form_filling, |
481 | | bool allow_modify_other, |
482 | | qpdf_r3_print_e print, |
483 | | bool encrypt_metadata, |
484 | | bool use_aes) |
485 | 0 | { |
486 | 0 | m->encryption = std::make_unique<QPDF::EncryptionData>(4, 4, 16, encrypt_metadata); |
487 | 0 | m->encrypt_use_aes = use_aes; |
488 | 0 | interpretR3EncryptionParameters( |
489 | 0 | allow_accessibility, |
490 | 0 | allow_extract, |
491 | 0 | allow_assemble, |
492 | 0 | allow_annotate_and_form, |
493 | 0 | allow_form_filling, |
494 | 0 | allow_modify_other, |
495 | 0 | print, |
496 | 0 | qpdf_r3m_all); |
497 | 0 | setEncryptionParameters(user_password, owner_password); |
498 | 0 | } |
499 | | |
500 | | void |
501 | | QPDFWriter::setR5EncryptionParameters( |
502 | | char const* user_password, |
503 | | char const* owner_password, |
504 | | bool allow_accessibility, |
505 | | bool allow_extract, |
506 | | bool allow_assemble, |
507 | | bool allow_annotate_and_form, |
508 | | bool allow_form_filling, |
509 | | bool allow_modify_other, |
510 | | qpdf_r3_print_e print, |
511 | | bool encrypt_metadata) |
512 | 0 | { |
513 | 0 | m->encryption = std::make_unique<QPDF::EncryptionData>(5, 5, 32, encrypt_metadata); |
514 | 0 | m->encrypt_use_aes = true; |
515 | 0 | interpretR3EncryptionParameters( |
516 | 0 | allow_accessibility, |
517 | 0 | allow_extract, |
518 | 0 | allow_assemble, |
519 | 0 | allow_annotate_and_form, |
520 | 0 | allow_form_filling, |
521 | 0 | allow_modify_other, |
522 | 0 | print, |
523 | 0 | qpdf_r3m_all); |
524 | 0 | setEncryptionParameters(user_password, owner_password); |
525 | 0 | } |
526 | | |
527 | | void |
528 | | QPDFWriter::setR6EncryptionParameters( |
529 | | char const* user_password, |
530 | | char const* owner_password, |
531 | | bool allow_accessibility, |
532 | | bool allow_extract, |
533 | | bool allow_assemble, |
534 | | bool allow_annotate_and_form, |
535 | | bool allow_form_filling, |
536 | | bool allow_modify_other, |
537 | | qpdf_r3_print_e print, |
538 | | bool encrypt_metadata) |
539 | 0 | { |
540 | 0 | m->encryption = std::make_unique<QPDF::EncryptionData>(5, 6, 32, encrypt_metadata); |
541 | 0 | interpretR3EncryptionParameters( |
542 | 0 | allow_accessibility, |
543 | 0 | allow_extract, |
544 | 0 | allow_assemble, |
545 | 0 | allow_annotate_and_form, |
546 | 0 | allow_form_filling, |
547 | 0 | allow_modify_other, |
548 | 0 | print, |
549 | 0 | qpdf_r3m_all); |
550 | 0 | m->encrypt_use_aes = true; |
551 | 0 | setEncryptionParameters(user_password, owner_password); |
552 | 0 | } |
553 | | |
554 | | void |
555 | | QPDFWriter::interpretR3EncryptionParameters( |
556 | | bool allow_accessibility, |
557 | | bool allow_extract, |
558 | | bool allow_assemble, |
559 | | bool allow_annotate_and_form, |
560 | | bool allow_form_filling, |
561 | | bool allow_modify_other, |
562 | | qpdf_r3_print_e print, |
563 | | qpdf_r3_modify_e modify) |
564 | 7.75k | { |
565 | | // Acrobat 5 security options: |
566 | | |
567 | | // Checkboxes: |
568 | | // Enable Content Access for the Visually Impaired |
569 | | // Allow Content Copying and Extraction |
570 | | |
571 | | // Allowed changes menu: |
572 | | // None |
573 | | // Only Document Assembly |
574 | | // Only Form Field Fill-in or Signing |
575 | | // Comment Authoring, Form Field Fill-in or Signing |
576 | | // General Editing, Comment and Form Field Authoring |
577 | | |
578 | | // Allowed printing menu: |
579 | | // None |
580 | | // Low Resolution |
581 | | // Full printing |
582 | | |
583 | | // Meanings of bits in P when R >= 3 |
584 | | // |
585 | | // 3: low-resolution printing |
586 | | // 4: document modification except as controlled by 6, 9, and 11 |
587 | | // 5: extraction |
588 | | // 6: add/modify annotations (comment), fill in forms |
589 | | // if 4+6 are set, also allows modification of form fields |
590 | | // 9: fill in forms even if 6 is clear |
591 | | // 10: accessibility; ignored by readers, should always be set |
592 | | // 11: document assembly even if 4 is clear |
593 | | // 12: high-resolution printing |
594 | 7.75k | if (!allow_accessibility && m->encryption->getR() <= 3) { |
595 | | // Bit 10 is deprecated and should always be set. This used to mean accessibility. There |
596 | | // is no way to disable accessibility with R > 3. |
597 | 0 | m->encryption->setP(10, false); |
598 | 0 | } |
599 | 7.75k | if (!allow_extract) { |
600 | 0 | m->encryption->setP(5, false); |
601 | 0 | } |
602 | | |
603 | 7.75k | switch (print) { |
604 | 0 | case qpdf_r3p_none: |
605 | 0 | m->encryption->setP(3, false); // any printing |
606 | 0 | [[fallthrough]]; |
607 | 0 | case qpdf_r3p_low: |
608 | 0 | m->encryption->setP(12, false); // high resolution printing |
609 | 0 | [[fallthrough]]; |
610 | 7.75k | case qpdf_r3p_full: |
611 | 7.75k | break; |
612 | | // no default so gcc warns for missing cases |
613 | 7.75k | } |
614 | | |
615 | | // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full |
616 | | // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're |
617 | | // stuck with it. See also allow checks below to control the bits individually. |
618 | | |
619 | | // NOT EXERCISED IN TEST SUITE |
620 | 7.75k | switch (modify) { |
621 | 0 | case qpdf_r3m_none: |
622 | 0 | m->encryption->setP(11, false); // document assembly |
623 | 0 | [[fallthrough]]; |
624 | 0 | case qpdf_r3m_assembly: |
625 | 0 | m->encryption->setP(9, false); // filling in form fields |
626 | 0 | [[fallthrough]]; |
627 | 0 | case qpdf_r3m_form: |
628 | 0 | m->encryption->setP(6, false); // modify annotations, fill in form fields |
629 | 0 | [[fallthrough]]; |
630 | 0 | case qpdf_r3m_annotate: |
631 | 0 | m->encryption->setP(4, false); // other modifications |
632 | 0 | [[fallthrough]]; |
633 | 7.75k | case qpdf_r3m_all: |
634 | 7.75k | break; |
635 | | // no default so gcc warns for missing cases |
636 | 7.75k | } |
637 | | // END NOT EXERCISED IN TEST SUITE |
638 | | |
639 | 7.75k | if (!allow_assemble) { |
640 | 0 | m->encryption->setP(11, false); |
641 | 0 | } |
642 | 7.75k | if (!allow_annotate_and_form) { |
643 | 0 | m->encryption->setP(6, false); |
644 | 0 | } |
645 | 7.75k | if (!allow_form_filling) { |
646 | 0 | m->encryption->setP(9, false); |
647 | 0 | } |
648 | 7.75k | if (!allow_modify_other) { |
649 | 0 | m->encryption->setP(4, false); |
650 | 0 | } |
651 | 7.75k | } |
652 | | |
653 | | void |
654 | | QPDFWriter::setEncryptionParameters(char const* user_password, char const* owner_password) |
655 | 7.75k | { |
656 | 7.75k | generateID(); |
657 | 7.75k | m->encryption->setId1(m->id1); |
658 | 7.75k | m->encryption_key = m->encryption->compute_parameters(user_password, owner_password); |
659 | 7.75k | setEncryptionMinimumVersion(); |
660 | 7.75k | } |
661 | | |
662 | | void |
663 | | QPDFWriter::copyEncryptionParameters(QPDF& qpdf) |
664 | 0 | { |
665 | 0 | m->preserve_encryption = false; |
666 | 0 | QPDFObjectHandle trailer = qpdf.getTrailer(); |
667 | 0 | if (trailer.hasKey("/Encrypt")) { |
668 | 0 | generateID(); |
669 | 0 | m->id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue(); |
670 | 0 | QPDFObjectHandle encrypt = trailer.getKey("/Encrypt"); |
671 | 0 | int V = encrypt.getKey("/V").getIntValueAsInt(); |
672 | 0 | int key_len = 5; |
673 | 0 | if (V > 1) { |
674 | 0 | key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8; |
675 | 0 | } |
676 | 0 | const bool encrypt_metadata = |
677 | 0 | encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool() |
678 | 0 | ? encrypt.getKey("/EncryptMetadata").getBoolValue() |
679 | 0 | : true; |
680 | 0 | if (V >= 4) { |
681 | | // When copying encryption parameters, use AES even if the original file did not. |
682 | | // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of |
683 | | // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF |
684 | | // all potentially having different values. |
685 | 0 | m->encrypt_use_aes = true; |
686 | 0 | } |
687 | 0 | QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", encrypt_metadata ? 0 : 1); |
688 | 0 | QTC::TC("qpdf", "QPDFWriter copy use_aes", m->encrypt_use_aes ? 0 : 1); |
689 | |
|
690 | 0 | m->encryption = std::make_unique<QPDF::EncryptionData>( |
691 | 0 | V, |
692 | 0 | encrypt.getKey("/R").getIntValueAsInt(), |
693 | 0 | key_len, |
694 | 0 | static_cast<int>(encrypt.getKey("/P").getIntValue()), |
695 | 0 | encrypt.getKey("/O").getStringValue(), |
696 | 0 | encrypt.getKey("/U").getStringValue(), |
697 | 0 | V < 5 ? "" : encrypt.getKey("/OE").getStringValue(), |
698 | 0 | V < 5 ? "" : encrypt.getKey("/UE").getStringValue(), |
699 | 0 | V < 5 ? "" : encrypt.getKey("/Perms").getStringValue(), |
700 | 0 | m->id1, // m->id1 == the other file's id1 |
701 | 0 | encrypt_metadata); |
702 | 0 | m->encryption_key = V >= 5 |
703 | 0 | ? qpdf.getEncryptionKey() |
704 | 0 | : m->encryption->compute_encryption_key(qpdf.getPaddedUserPassword()); |
705 | 0 | setEncryptionMinimumVersion(); |
706 | 0 | } |
707 | 0 | } |
708 | | |
709 | | void |
710 | | QPDFWriter::disableIncompatibleEncryption(int major, int minor, int extension_level) |
711 | 0 | { |
712 | 0 | if (!m->encryption) { |
713 | 0 | return; |
714 | 0 | } |
715 | 0 | if (compareVersions(major, minor, 1, 3) < 0) { |
716 | 0 | m->encryption = nullptr; |
717 | 0 | return; |
718 | 0 | } |
719 | 0 | int V = m->encryption->getV(); |
720 | 0 | int R = m->encryption->getR(); |
721 | 0 | if (compareVersions(major, minor, 1, 4) < 0) { |
722 | 0 | if (V > 1 || R > 2) { |
723 | 0 | m->encryption = nullptr; |
724 | 0 | } |
725 | 0 | } else if (compareVersions(major, minor, 1, 5) < 0) { |
726 | 0 | if (V > 2 || R > 3) { |
727 | 0 | m->encryption = nullptr; |
728 | 0 | } |
729 | 0 | } else if (compareVersions(major, minor, 1, 6) < 0) { |
730 | 0 | if (m->encrypt_use_aes) { |
731 | 0 | m->encryption = nullptr; |
732 | 0 | } |
733 | 0 | } else if ( |
734 | 0 | (compareVersions(major, minor, 1, 7) < 0) || |
735 | 0 | ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) { |
736 | 0 | if (V >= 5 || R >= 5) { |
737 | 0 | m->encryption = nullptr; |
738 | 0 | } |
739 | 0 | } |
740 | |
|
741 | 0 | if (!m->encryption) { |
742 | 0 | QTC::TC("qpdf", "QPDFWriter forced version disabled encryption"); |
743 | 0 | } |
744 | 0 | } |
745 | | |
746 | | void |
747 | | QPDFWriter::parseVersion(std::string const& version, int& major, int& minor) const |
748 | 15.4k | { |
749 | 15.4k | major = QUtil::string_to_int(version.c_str()); |
750 | 15.4k | minor = 0; |
751 | 15.4k | size_t p = version.find('.'); |
752 | 15.4k | if ((p != std::string::npos) && (version.length() > p)) { |
753 | 15.4k | minor = QUtil::string_to_int(version.substr(p + 1).c_str()); |
754 | 15.4k | } |
755 | 15.4k | std::string tmp = std::to_string(major) + "." + std::to_string(minor); |
756 | 15.4k | if (tmp != version) { |
757 | | // The version number in the input is probably invalid. This happens with some files that |
758 | | // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately |
759 | | // QPDFWriter doesn't have a way to give a warning, so we just ignore this case. |
760 | 30 | } |
761 | 15.4k | } |
762 | | |
763 | | int |
764 | | QPDFWriter::compareVersions(int major1, int minor1, int major2, int minor2) const |
765 | 7.69k | { |
766 | 7.69k | if (major1 < major2) { |
767 | 116 | return -1; |
768 | 7.57k | } else if (major1 > major2) { |
769 | 125 | return 1; |
770 | 7.45k | } else if (minor1 < minor2) { |
771 | 6.42k | return -1; |
772 | 6.42k | } else if (minor1 > minor2) { |
773 | 576 | return 1; |
774 | 576 | } else { |
775 | 454 | return 0; |
776 | 454 | } |
777 | 7.69k | } |
778 | | |
779 | | void |
780 | | QPDFWriter::setEncryptionMinimumVersion() |
781 | 7.74k | { |
782 | 7.74k | auto const R = m->encryption->getR(); |
783 | 7.74k | if (R >= 6) { |
784 | 0 | setMinimumPDFVersion("1.7", 8); |
785 | 7.74k | } else if (R == 5) { |
786 | 0 | setMinimumPDFVersion("1.7", 3); |
787 | 7.74k | } else if (R == 4) { |
788 | 0 | setMinimumPDFVersion(m->encrypt_use_aes ? "1.6" : "1.5"); |
789 | 7.74k | } else if (R == 3) { |
790 | 7.74k | setMinimumPDFVersion("1.4"); |
791 | 7.74k | } else { |
792 | 0 | setMinimumPDFVersion("1.3"); |
793 | 0 | } |
794 | 7.74k | } |
795 | | |
796 | | void |
797 | | QPDFWriter::setDataKey(int objid) |
798 | 80.4k | { |
799 | 80.4k | if (m->encryption) { |
800 | 80.4k | m->cur_data_key = QPDF::compute_data_key( |
801 | 80.4k | m->encryption_key, |
802 | 80.4k | objid, |
803 | 80.4k | 0, |
804 | 80.4k | m->encrypt_use_aes, |
805 | 80.4k | m->encryption->getV(), |
806 | 80.4k | m->encryption->getR()); |
807 | 80.4k | } |
808 | 80.4k | } |
809 | | |
810 | | unsigned int |
811 | | QPDFWriter::bytesNeeded(long long n) |
812 | 0 | { |
813 | 0 | unsigned int bytes = 0; |
814 | 0 | while (n) { |
815 | 0 | ++bytes; |
816 | 0 | n >>= 8; |
817 | 0 | } |
818 | 0 | return bytes; |
819 | 0 | } |
820 | | |
821 | | void |
822 | | QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes) |
823 | 0 | { |
824 | 0 | if (bytes > sizeof(unsigned long long)) { |
825 | 0 | throw std::logic_error("QPDFWriter::writeBinary called with too many bytes"); |
826 | 0 | } |
827 | 0 | unsigned char data[sizeof(unsigned long long)]; |
828 | 0 | for (unsigned int i = 0; i < bytes; ++i) { |
829 | 0 | data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff); |
830 | 0 | val >>= 8; |
831 | 0 | } |
832 | 0 | m->pipeline->write(data, bytes); |
833 | 0 | } |
834 | | |
835 | | void |
836 | | QPDFWriter::writeString(std::string_view str) |
837 | 3.50M | { |
838 | 3.50M | m->pipeline->write(reinterpret_cast<unsigned char const*>(str.data()), str.size()); |
839 | 3.50M | } |
840 | | |
841 | | void |
842 | | QPDFWriter::writeStringQDF(std::string_view str) |
843 | 902k | { |
844 | 902k | if (m->qdf_mode) { |
845 | 0 | m->pipeline->write(reinterpret_cast<unsigned char const*>(str.data()), str.size()); |
846 | 0 | } |
847 | 902k | } |
848 | | |
849 | | void |
850 | | QPDFWriter::writeStringNoQDF(std::string_view str) |
851 | 21.3k | { |
852 | 21.3k | if (!m->qdf_mode) { |
853 | 21.3k | m->pipeline->write(reinterpret_cast<unsigned char const*>(str.data()), str.size()); |
854 | 21.3k | } |
855 | 21.3k | } |
856 | | |
857 | | void |
858 | | QPDFWriter::writePad(size_t nspaces) |
859 | 0 | { |
860 | 0 | writeString(std::string(nspaces, ' ')); |
861 | 0 | } |
862 | | |
863 | | Pipeline* |
864 | | QPDFWriter::pushPipeline(Pipeline* p) |
865 | 21.8k | { |
866 | 21.8k | qpdf_assert_debug(!dynamic_cast<pl::Count*>(p)); |
867 | 21.8k | m->pipeline_stack.emplace_back(p); |
868 | 21.8k | return p; |
869 | 21.8k | } |
870 | | |
871 | | void |
872 | | QPDFWriter::initializePipelineStack(Pipeline* p) |
873 | 7.75k | { |
874 | 7.75k | m->pipeline = new pl::Count(1, p); |
875 | 7.75k | m->to_delete.emplace_back(std::shared_ptr<Pipeline>(m->pipeline)); |
876 | 7.75k | m->pipeline_stack.emplace_back(m->pipeline); |
877 | 7.75k | } |
878 | | |
879 | | void |
880 | | QPDFWriter::activatePipelineStack(PipelinePopper& pp, std::string& str) |
881 | 30.9k | { |
882 | 30.9k | activatePipelineStack(pp, false, &str, nullptr); |
883 | 30.9k | } |
884 | | |
885 | | void |
886 | | QPDFWriter::activatePipelineStack(PipelinePopper& pp, std::unique_ptr<pl::Link> link) |
887 | 0 | { |
888 | 0 | m->count_buffer.clear(); |
889 | 0 | activatePipelineStack(pp, false, &m->count_buffer, std::move(link)); |
890 | 0 | } |
891 | | |
892 | | void |
893 | | QPDFWriter::activatePipelineStack( |
894 | | PipelinePopper& pp, bool discard, std::string* str, std::unique_ptr<pl::Link> link) |
895 | 52.8k | { |
896 | 52.8k | pl::Count* c; |
897 | 52.8k | if (link) { |
898 | 0 | c = new pl::Count(m->next_stack_id, m->count_buffer, std::move(link)); |
899 | 52.8k | } else if (discard) { |
900 | 0 | c = new pl::Count(m->next_stack_id, nullptr); |
901 | 52.8k | } else if (!str) { |
902 | 21.8k | c = new pl::Count(m->next_stack_id, m->pipeline_stack.back()); |
903 | 30.9k | } else { |
904 | 30.9k | c = new pl::Count(m->next_stack_id, *str); |
905 | 30.9k | } |
906 | 52.8k | pp.stack_id = m->next_stack_id; |
907 | 52.8k | m->pipeline_stack.emplace_back(c); |
908 | 52.8k | m->pipeline = c; |
909 | 52.8k | ++m->next_stack_id; |
910 | 52.8k | } |
911 | | |
912 | | QPDFWriter::PipelinePopper::~PipelinePopper() |
913 | 60.4k | { |
914 | 60.4k | if (!stack_id) { |
915 | 7.63k | return; |
916 | 7.63k | } |
917 | 52.8k | qpdf_assert_debug(qw->m->pipeline_stack.size() >= 2); |
918 | 52.8k | qw->m->pipeline->finish(); |
919 | 52.8k | qpdf_assert_debug(dynamic_cast<pl::Count*>(qw->m->pipeline_stack.back()) == qw->m->pipeline); |
920 | | // It might be possible for this assertion to fail if writeLinearized exits by exception when |
921 | | // deterministic ID, but I don't think so. As of this writing, this is the only case in which |
922 | | // two dynamically allocated PipelinePopper objects ever exist at the same time, so the |
923 | | // assertion will fail if they get popped out of order from automatic destruction. |
924 | 52.8k | qpdf_assert_debug(qw->m->pipeline->id() == stack_id); |
925 | 52.8k | delete qw->m->pipeline_stack.back(); |
926 | 52.8k | qw->m->pipeline_stack.pop_back(); |
927 | 74.6k | while (!dynamic_cast<pl::Count*>(qw->m->pipeline_stack.back())) { |
928 | 21.8k | Pipeline* p = qw->m->pipeline_stack.back(); |
929 | 21.8k | if (dynamic_cast<Pl_MD5*>(p) == qw->m->md5_pipeline) { |
930 | 21.8k | qw->m->md5_pipeline = nullptr; |
931 | 21.8k | } |
932 | 21.8k | qw->m->pipeline_stack.pop_back(); |
933 | 21.8k | delete p; |
934 | 21.8k | } |
935 | 52.8k | qw->m->pipeline = dynamic_cast<pl::Count*>(qw->m->pipeline_stack.back()); |
936 | 52.8k | } |
937 | | |
938 | | void |
939 | | QPDFWriter::adjustAESStreamLength(size_t& length) |
940 | 21.9k | { |
941 | 21.9k | if (m->encryption && !m->cur_data_key.empty() && m->encrypt_use_aes) { |
942 | | // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16. It will |
943 | | // also be prepended by 16 bits of random data. |
944 | 0 | length += 32 - (length & 0xf); |
945 | 0 | } |
946 | 21.9k | } |
947 | | |
948 | | void |
949 | | QPDFWriter::pushEncryptionFilter(PipelinePopper& pp) |
950 | 21.8k | { |
951 | 21.8k | if (m->encryption && !m->cur_data_key.empty()) { |
952 | 21.8k | Pipeline* p = nullptr; |
953 | 21.8k | if (m->encrypt_use_aes) { |
954 | 0 | p = new Pl_AES_PDF( |
955 | 0 | "aes stream encryption", |
956 | 0 | m->pipeline, |
957 | 0 | true, |
958 | 0 | QUtil::unsigned_char_pointer(m->cur_data_key), |
959 | 0 | m->cur_data_key.length()); |
960 | 21.8k | } else { |
961 | 21.8k | p = new Pl_RC4( |
962 | 21.8k | "rc4 stream encryption", |
963 | 21.8k | m->pipeline, |
964 | 21.8k | QUtil::unsigned_char_pointer(m->cur_data_key), |
965 | 21.8k | QIntC::to_int(m->cur_data_key.length())); |
966 | 21.8k | } |
967 | 21.8k | pushPipeline(p); |
968 | 21.8k | } |
969 | | // Must call this unconditionally so we can call popPipelineStack to balance |
970 | | // pushEncryptionFilter(). |
971 | 21.8k | activatePipelineStack(pp); |
972 | 21.8k | } |
973 | | |
974 | | void |
975 | | QPDFWriter::pushMD5Pipeline(PipelinePopper& pp) |
976 | 0 | { |
977 | 0 | if (!m->id2.empty()) { |
978 | | // Can't happen in the code |
979 | 0 | throw std::logic_error( |
980 | 0 | "Deterministic ID computation enabled after ID generation has already occurred."); |
981 | 0 | } |
982 | 0 | qpdf_assert_debug(m->deterministic_id); |
983 | 0 | qpdf_assert_debug(m->md5_pipeline == nullptr); |
984 | 0 | qpdf_assert_debug(m->pipeline->getCount() == 0); |
985 | 0 | m->md5_pipeline = new Pl_MD5("qpdf md5", m->pipeline); |
986 | 0 | m->md5_pipeline->persistAcrossFinish(true); |
987 | | // Special case code in popPipelineStack clears m->md5_pipeline upon deletion. |
988 | 0 | pushPipeline(m->md5_pipeline); |
989 | 0 | activatePipelineStack(pp); |
990 | 0 | } |
991 | | |
992 | | void |
993 | | QPDFWriter::computeDeterministicIDData() |
994 | 0 | { |
995 | 0 | qpdf_assert_debug(m->md5_pipeline != nullptr); |
996 | 0 | qpdf_assert_debug(m->deterministic_id_data.empty()); |
997 | 0 | m->deterministic_id_data = m->md5_pipeline->getHexDigest(); |
998 | 0 | m->md5_pipeline->enable(false); |
999 | 0 | } |
1000 | | |
1001 | | int |
1002 | | QPDFWriter::openObject(int objid) |
1003 | 87.9k | { |
1004 | 87.9k | if (objid == 0) { |
1005 | 7.50k | objid = m->next_objid++; |
1006 | 7.50k | } |
1007 | 87.9k | m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount()); |
1008 | 87.9k | writeString(std::to_string(objid)); |
1009 | 87.9k | writeString(" 0 obj\n"); |
1010 | 87.9k | return objid; |
1011 | 87.9k | } |
1012 | | |
1013 | | void |
1014 | | QPDFWriter::closeObject(int objid) |
1015 | 87.8k | { |
1016 | | // Write a newline before endobj as it makes the file easier to repair. |
1017 | 87.8k | writeString("\nendobj\n"); |
1018 | 87.8k | writeStringQDF("\n"); |
1019 | 87.8k | auto& new_obj = m->new_obj[objid]; |
1020 | 87.8k | new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset(); |
1021 | 87.8k | } |
1022 | | |
1023 | | void |
1024 | | QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen og) |
1025 | 0 | { |
1026 | 0 | int objid = og.getObj(); |
1027 | 0 | if ((og.getGen() != 0) || (!m->object_stream_to_objects.contains(objid))) { |
1028 | | // This is not an object stream. |
1029 | 0 | return; |
1030 | 0 | } |
1031 | | |
1032 | | // Reserve numbers for the objects that belong to this object stream. |
1033 | 0 | for (auto const& iter: m->object_stream_to_objects[objid]) { |
1034 | 0 | m->obj[iter].renumber = m->next_objid++; |
1035 | 0 | } |
1036 | 0 | } |
1037 | | |
1038 | | void |
1039 | | QPDFWriter::enqueueObject(QPDFObjectHandle object) |
1040 | 2.52M | { |
1041 | 2.52M | if (object.isIndirect()) { |
1042 | | // This owner check can only be done for indirect objects. It is possible for a direct |
1043 | | // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle from |
1044 | | // one file was insert into another file without copying. Doing that is safe even if the |
1045 | | // original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from its owner. |
1046 | 225k | if (object.getOwningQPDF() != &(m->pdf)) { |
1047 | 0 | QTC::TC("qpdf", "QPDFWriter foreign object"); |
1048 | 0 | throw std::logic_error( |
1049 | 0 | "QPDFObjectHandle from different QPDF found while writing. Use " |
1050 | 0 | "QPDF::copyForeignObject to add objects from another file."); |
1051 | 0 | } |
1052 | | |
1053 | 225k | if (m->qdf_mode && object.isStreamOfType("/XRef")) { |
1054 | | // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so |
1055 | | // will confuse fix-qdf, which expects to see only one XRef stream at the end of the |
1056 | | // file. This case can occur when creating a QDF from a file with object streams when |
1057 | | // preserving unreferenced objects since the old cross reference streams are not |
1058 | | // actually referenced by object number. |
1059 | 0 | QTC::TC("qpdf", "QPDFWriter ignore XRef in qdf mode"); |
1060 | 0 | return; |
1061 | 0 | } |
1062 | | |
1063 | 225k | QPDFObjGen og = object.getObjGen(); |
1064 | 225k | auto& obj = m->obj[og]; |
1065 | | |
1066 | 225k | if (obj.renumber == 0) { |
1067 | 80.6k | if (obj.object_stream > 0) { |
1068 | | // This is in an object stream. Don't process it here. Instead, enqueue the object |
1069 | | // stream. Object streams always have generation 0. |
1070 | | // Detect loops by storing invalid object ID -1, which will get overwritten later. |
1071 | 0 | obj.renumber = -1; |
1072 | 0 | enqueueObject(m->pdf.getObject(obj.object_stream, 0)); |
1073 | 80.6k | } else { |
1074 | 80.6k | m->object_queue.push_back(object); |
1075 | 80.6k | obj.renumber = m->next_objid++; |
1076 | | |
1077 | 80.6k | if ((og.getGen() == 0) && m->object_stream_to_objects.contains(og.getObj())) { |
1078 | | // For linearized files, uncompressed objects go at end, and we take care of |
1079 | | // assigning numbers to them elsewhere. |
1080 | 0 | if (!m->linearized) { |
1081 | 0 | assignCompressedObjectNumbers(og); |
1082 | 0 | } |
1083 | 80.6k | } else if ((!m->direct_stream_lengths) && object.isStream()) { |
1084 | | // reserve next object ID for length |
1085 | 0 | ++m->next_objid; |
1086 | 0 | } |
1087 | 80.6k | } |
1088 | 144k | } else if (obj.renumber == -1) { |
1089 | | // This can happen if a specially constructed file indicates that an object stream is |
1090 | | // inside itself. |
1091 | 0 | } |
1092 | 225k | return; |
1093 | 2.29M | } else if (!m->linearized) { |
1094 | 2.29M | if (object.isArray()) { |
1095 | 1.61M | for (auto& item: object.as_array()) { |
1096 | 1.61M | enqueueObject(item); |
1097 | 1.61M | } |
1098 | 1.29M | } else if (auto d = object.as_dictionary()) { |
1099 | 1.29M | for (auto const& item: d) { |
1100 | 199k | if (!item.second.null()) { |
1101 | 150k | enqueueObject(item.second); |
1102 | 150k | } |
1103 | 199k | } |
1104 | 1.29M | } |
1105 | 2.29M | } else { |
1106 | | // ignore |
1107 | 0 | } |
1108 | 2.52M | } |
1109 | | |
1110 | | void |
1111 | | QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags) |
1112 | 737k | { |
1113 | 737k | if (!m->linearized) { |
1114 | 737k | enqueueObject(child); |
1115 | 737k | } |
1116 | 737k | if (child.isIndirect()) { |
1117 | 122k | writeString(std::to_string(m->obj[child].renumber)); |
1118 | 122k | writeString(" 0 R"); |
1119 | 614k | } else { |
1120 | 614k | unparseObject(child, level, flags); |
1121 | 614k | } |
1122 | 737k | } |
1123 | | |
1124 | | void |
1125 | | QPDFWriter::writeTrailer( |
1126 | | trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass) |
1127 | 7.50k | { |
1128 | 7.50k | QPDFObjectHandle trailer = getTrimmedTrailer(); |
1129 | 7.50k | if (xref_stream) { |
1130 | 0 | m->cur_data_key.clear(); |
1131 | 7.50k | } else { |
1132 | 7.50k | writeString("trailer <<"); |
1133 | 7.50k | } |
1134 | 7.50k | writeStringQDF("\n"); |
1135 | 7.50k | if (which == t_lin_second) { |
1136 | 0 | writeString(" /Size "); |
1137 | 0 | writeString(std::to_string(size)); |
1138 | 7.50k | } else { |
1139 | 16.6k | for (auto const& [key, value]: trailer.as_dictionary()) { |
1140 | 16.6k | if (value.null()) { |
1141 | 2.80k | continue; |
1142 | 2.80k | } |
1143 | 13.8k | writeStringQDF(" "); |
1144 | 13.8k | writeStringNoQDF(" "); |
1145 | 13.8k | writeString(Name::normalize(key)); |
1146 | 13.8k | writeString(" "); |
1147 | 13.8k | if (key == "/Size") { |
1148 | 1.88k | writeString(std::to_string(size)); |
1149 | 1.88k | if (which == t_lin_first) { |
1150 | 0 | writeString(" /Prev "); |
1151 | 0 | qpdf_offset_t pos = m->pipeline->getCount(); |
1152 | 0 | writeString(std::to_string(prev)); |
1153 | 0 | writePad(QIntC::to_size(pos - m->pipeline->getCount() + 21)); |
1154 | 0 | } |
1155 | 11.9k | } else { |
1156 | 11.9k | unparseChild(value, 1, 0); |
1157 | 11.9k | } |
1158 | 13.8k | writeStringQDF("\n"); |
1159 | 13.8k | } |
1160 | 7.50k | } |
1161 | | |
1162 | | // Write ID |
1163 | 7.50k | writeStringQDF(" "); |
1164 | 7.50k | writeString(" /ID ["); |
1165 | 7.50k | if (linearization_pass == 1) { |
1166 | 0 | std::string original_id1 = getOriginalID1(); |
1167 | 0 | if (original_id1.empty()) { |
1168 | 0 | writeString("<00000000000000000000000000000000>"); |
1169 | 0 | } else { |
1170 | | // Write a string of zeroes equal in length to the representation of the original ID. |
1171 | | // While writing the original ID would have the same number of bytes, it would cause a |
1172 | | // change to the deterministic ID generated by older versions of the software that |
1173 | | // hard-coded the length of the ID to 16 bytes. |
1174 | 0 | writeString("<"); |
1175 | 0 | size_t len = QPDF_String(original_id1).unparse(true).length() - 2; |
1176 | 0 | for (size_t i = 0; i < len; ++i) { |
1177 | 0 | writeString("0"); |
1178 | 0 | } |
1179 | 0 | writeString(">"); |
1180 | 0 | } |
1181 | 0 | writeString("<00000000000000000000000000000000>"); |
1182 | 7.50k | } else { |
1183 | 7.50k | if ((linearization_pass == 0) && (m->deterministic_id)) { |
1184 | 0 | computeDeterministicIDData(); |
1185 | 0 | } |
1186 | 7.50k | generateID(); |
1187 | 7.50k | writeString(QPDF_String(m->id1).unparse(true)); |
1188 | 7.50k | writeString(QPDF_String(m->id2).unparse(true)); |
1189 | 7.50k | } |
1190 | 7.50k | writeString("]"); |
1191 | | |
1192 | 7.50k | if (which != t_lin_second) { |
1193 | | // Write reference to encryption dictionary |
1194 | 7.50k | if (m->encryption) { |
1195 | 7.50k | writeString(" /Encrypt "); |
1196 | 7.50k | writeString(std::to_string(m->encryption_dict_objid)); |
1197 | 7.50k | writeString(" 0 R"); |
1198 | 7.50k | } |
1199 | 7.50k | } |
1200 | | |
1201 | 7.50k | writeStringQDF("\n"); |
1202 | 7.50k | writeStringNoQDF(" "); |
1203 | 7.50k | writeString(">>"); |
1204 | 7.50k | } |
1205 | | |
1206 | | bool |
1207 | | QPDFWriter::willFilterStream( |
1208 | | QPDFObjectHandle stream, |
1209 | | bool& compress_stream, // out only |
1210 | | bool& is_root_metadata, // out only |
1211 | | std::string* stream_data) |
1212 | 21.9k | { |
1213 | 21.9k | compress_stream = false; |
1214 | 21.9k | is_root_metadata = false; |
1215 | | |
1216 | 21.9k | QPDFObjGen old_og = stream.getObjGen(); |
1217 | 21.9k | QPDFObjectHandle stream_dict = stream.getDict(); |
1218 | | |
1219 | 21.9k | if (stream.isRootMetadata()) { |
1220 | 61 | is_root_metadata = true; |
1221 | 61 | } |
1222 | 21.9k | bool filter = stream.isDataModified() || m->compress_streams || m->stream_decode_level; |
1223 | 21.9k | bool filter_on_write = stream.getFilterOnWrite(); |
1224 | 21.9k | if (!filter_on_write) { |
1225 | 0 | QTC::TC("qpdf", "QPDFWriter getFilterOnWrite false"); |
1226 | 0 | filter = false; |
1227 | 0 | } |
1228 | 21.9k | if (filter_on_write && m->compress_streams) { |
1229 | | // Don't filter if the stream is already compressed with FlateDecode. This way we don't make |
1230 | | // it worse if the original file used a better Flate algorithm, and we don't spend time and |
1231 | | // CPU cycles uncompressing and recompressing stuff. This can be overridden with |
1232 | | // setRecompressFlate(true). |
1233 | 21.9k | QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter"); |
1234 | 21.9k | if (!m->recompress_flate && !stream.isDataModified() && filter_obj.isName() && |
1235 | 21.9k | (filter_obj.getName() == "/FlateDecode" || filter_obj.getName() == "/Fl")) { |
1236 | 3.15k | QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode"); |
1237 | 3.15k | filter = false; |
1238 | 3.15k | } |
1239 | 21.9k | } |
1240 | 21.9k | bool normalize = false; |
1241 | 21.9k | bool uncompress = false; |
1242 | 21.9k | if (filter_on_write && is_root_metadata && |
1243 | 21.9k | (!m->encryption || !m->encryption->getEncryptMetadata())) { |
1244 | 0 | QTC::TC("qpdf", "QPDFWriter not compressing metadata"); |
1245 | 0 | filter = true; |
1246 | 0 | compress_stream = false; |
1247 | 0 | uncompress = true; |
1248 | 21.9k | } else if (filter_on_write && m->normalize_content && m->normalized_streams.contains(old_og)) { |
1249 | 0 | normalize = true; |
1250 | 0 | filter = true; |
1251 | 21.9k | } else if (filter_on_write && filter && m->compress_streams) { |
1252 | 18.7k | compress_stream = true; |
1253 | 18.7k | QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream"); |
1254 | 18.7k | } |
1255 | | |
1256 | | // Disable compression for empty streams to improve compatibility |
1257 | 21.9k | if (stream_dict.getKey("/Length").isInteger() && |
1258 | 21.9k | stream_dict.getKey("/Length").getIntValue() == 0) { |
1259 | 27 | filter = true; |
1260 | 27 | compress_stream = false; |
1261 | 27 | } |
1262 | | |
1263 | 21.9k | bool filtered = false; |
1264 | 30.9k | for (bool first_attempt: {true, false}) { |
1265 | 30.9k | PipelinePopper pp_stream_data(this); |
1266 | 30.9k | if (stream_data != nullptr) { |
1267 | 30.9k | activatePipelineStack(pp_stream_data, *stream_data); |
1268 | 30.9k | } else { |
1269 | 0 | activatePipelineStack(pp_stream_data, true); |
1270 | 0 | } |
1271 | 30.9k | try { |
1272 | 30.9k | filtered = stream.pipeStreamData( |
1273 | 30.9k | m->pipeline, |
1274 | 30.9k | !filter ? 0 |
1275 | 30.9k | : ((normalize ? qpdf_ef_normalize : 0) | |
1276 | 18.7k | (compress_stream ? qpdf_ef_compress : 0)), |
1277 | 30.9k | !filter ? qpdf_dl_none : (uncompress ? qpdf_dl_all : m->stream_decode_level), |
1278 | 30.9k | false, |
1279 | 30.9k | first_attempt); |
1280 | 30.9k | if (filter && !filtered) { |
1281 | | // Try again |
1282 | 9.04k | filter = false; |
1283 | 9.04k | stream.setFilterOnWrite(false); |
1284 | 21.9k | } else { |
1285 | 21.9k | break; |
1286 | 21.9k | } |
1287 | 30.9k | } catch (std::runtime_error& e) { |
1288 | 32 | if (filter && first_attempt) { |
1289 | 26 | stream.warnIfPossible("error while getting stream data: "s + e.what()); |
1290 | 26 | stream.warnIfPossible("qpdf will attempt to write the damaged stream unchanged"); |
1291 | 26 | filter = false; |
1292 | 26 | stream.setFilterOnWrite(false); |
1293 | 26 | continue; |
1294 | 26 | } |
1295 | 6 | throw std::runtime_error( |
1296 | 6 | "error while getting stream data for " + stream.unparse() + ": " + e.what()); |
1297 | 32 | } |
1298 | 9.04k | if (stream_data) { |
1299 | 9.04k | stream_data->clear(); |
1300 | 9.04k | } |
1301 | 9.04k | } |
1302 | 21.9k | if (!filtered) { |
1303 | 12.1k | compress_stream = false; |
1304 | 12.1k | } |
1305 | 21.9k | return filtered; |
1306 | 21.9k | } |
1307 | | |
1308 | | void |
1309 | | QPDFWriter::unparseObject( |
1310 | | QPDFObjectHandle object, int level, int flags, size_t stream_length, bool compress) |
1311 | 716k | { |
1312 | 716k | QPDFObjGen old_og = object.getObjGen(); |
1313 | 716k | int child_flags = flags & ~f_stream; |
1314 | 716k | if (level < 0) { |
1315 | 0 | throw std::logic_error("invalid level in QPDFWriter::unparseObject"); |
1316 | 0 | } |
1317 | | // For non-qdf, "indent" is a single space between tokens. For qdf, indent includes the |
1318 | | // preceding newline. |
1319 | 716k | std::string indent = " "; |
1320 | 716k | if (m->qdf_mode) { |
1321 | 0 | indent.append(static_cast<size_t>(2 * level), ' '); |
1322 | 0 | indent[0] = '\n'; |
1323 | 0 | } |
1324 | | |
1325 | 716k | if (auto const tc = object.getTypeCode(); tc == ::ot_array) { |
1326 | | // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the |
1327 | | // [ in the /H key of the linearization parameter dictionary. We'll do this unconditionally |
1328 | | // for all arrays because it looks nicer and doesn't make the files that much bigger. |
1329 | 61.4k | writeString("["); |
1330 | 408k | for (auto const& item: object.as_array()) { |
1331 | 408k | writeString(indent); |
1332 | 408k | writeStringQDF(" "); |
1333 | 408k | unparseChild(item, level + 1, child_flags); |
1334 | 408k | } |
1335 | 61.4k | writeString(indent); |
1336 | 61.4k | writeString("]"); |
1337 | 655k | } else if (tc == ::ot_dictionary) { |
1338 | | // Handle special cases for specific dictionaries. |
1339 | | |
1340 | 96.8k | if (old_og == m->root_og) { |
1341 | | // Extensions dictionaries. |
1342 | | |
1343 | | // We have one of several cases: |
1344 | | // |
1345 | | // * We need ADBE |
1346 | | // - We already have Extensions |
1347 | | // - If it has the right ADBE, preserve it |
1348 | | // - Otherwise, replace ADBE |
1349 | | // - We don't have Extensions: create one from scratch |
1350 | | // * We don't want ADBE |
1351 | | // - We already have Extensions |
1352 | | // - If it only has ADBE, remove it |
1353 | | // - If it has other things, keep those and remove ADBE |
1354 | | // - We have no extensions: no action required |
1355 | | // |
1356 | | // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE |
1357 | | // dictionary, so we can modify in place. |
1358 | | |
1359 | 7.48k | auto extensions = object.getKey("/Extensions"); |
1360 | 7.48k | const bool has_extensions = extensions.isDictionary(); |
1361 | 7.48k | const bool need_extensions_adbe = m->final_extension_level > 0; |
1362 | | |
1363 | 7.48k | if (has_extensions || need_extensions_adbe) { |
1364 | | // Make a shallow copy of this object so we can modify it safely without affecting |
1365 | | // the original. This code has logic to skip certain keys in agreement with |
1366 | | // prepareFileForWrite and with skip_stream_parameters so that replacing them |
1367 | | // doesn't leave unreferenced objects in the output. We can use unsafeShallowCopy |
1368 | | // here because all we are doing is removing or replacing top-level keys. |
1369 | 235 | object = object.unsafeShallowCopy(); |
1370 | 235 | if (!has_extensions) { |
1371 | 0 | extensions = QPDFObjectHandle(); |
1372 | 0 | } |
1373 | | |
1374 | 235 | const bool have_extensions_adbe = extensions && extensions.hasKey("/ADBE"); |
1375 | 235 | const bool have_extensions_other = |
1376 | 235 | extensions && extensions.getKeys().size() > (have_extensions_adbe ? 1u : 0u); |
1377 | | |
1378 | 235 | if (need_extensions_adbe) { |
1379 | 28 | if (!(have_extensions_other || have_extensions_adbe)) { |
1380 | | // We need Extensions and don't have it. Create it here. |
1381 | 0 | QTC::TC("qpdf", "QPDFWriter create Extensions", m->qdf_mode ? 0 : 1); |
1382 | 0 | extensions = object.replaceKeyAndGetNew( |
1383 | 0 | "/Extensions", QPDFObjectHandle::newDictionary()); |
1384 | 0 | } |
1385 | 207 | } else if (!have_extensions_other) { |
1386 | | // We have Extensions dictionary and don't want one. |
1387 | 66 | if (have_extensions_adbe) { |
1388 | 64 | QTC::TC("qpdf", "QPDFWriter remove existing Extensions"); |
1389 | 64 | object.removeKey("/Extensions"); |
1390 | 64 | extensions = QPDFObjectHandle(); // uninitialized |
1391 | 64 | } |
1392 | 66 | } |
1393 | | |
1394 | 235 | if (extensions) { |
1395 | 171 | QTC::TC("qpdf", "QPDFWriter preserve Extensions"); |
1396 | 171 | QPDFObjectHandle adbe = extensions.getKey("/ADBE"); |
1397 | 171 | if (adbe.isDictionary() && |
1398 | 171 | adbe.getKey("/BaseVersion").isNameAndEquals("/" + m->final_pdf_version) && |
1399 | 171 | adbe.getKey("/ExtensionLevel").isInteger() && |
1400 | 171 | (adbe.getKey("/ExtensionLevel").getIntValue() == |
1401 | 15 | m->final_extension_level)) { |
1402 | 11 | QTC::TC("qpdf", "QPDFWriter preserve ADBE"); |
1403 | 160 | } else { |
1404 | 160 | if (need_extensions_adbe) { |
1405 | 17 | extensions.replaceKey( |
1406 | 17 | "/ADBE", |
1407 | 17 | QPDFObjectHandle::parse( |
1408 | 17 | "<< /BaseVersion /" + m->final_pdf_version + |
1409 | 17 | " /ExtensionLevel " + std::to_string(m->final_extension_level) + |
1410 | 17 | " >>")); |
1411 | 143 | } else { |
1412 | 143 | QTC::TC("qpdf", "QPDFWriter remove ADBE"); |
1413 | 143 | extensions.removeKey("/ADBE"); |
1414 | 143 | } |
1415 | 160 | } |
1416 | 171 | } |
1417 | 235 | } |
1418 | 7.48k | } |
1419 | | |
1420 | | // Stream dictionaries. |
1421 | | |
1422 | 96.8k | if (flags & f_stream) { |
1423 | | // Suppress /Length since we will write it manually |
1424 | | |
1425 | | // Make a shallow copy of this object so we can modify it safely without affecting the |
1426 | | // original. This code has logic to skip certain keys in agreement with |
1427 | | // prepareFileForWrite and with skip_stream_parameters so that replacing them doesn't |
1428 | | // leave unreferenced objects in the output. We can use unsafeShallowCopy here because |
1429 | | // all we are doing is removing or replacing top-level keys. |
1430 | 21.9k | object = object.unsafeShallowCopy(); |
1431 | | |
1432 | 21.9k | object.removeKey("/Length"); |
1433 | | |
1434 | | // If /DecodeParms is an empty list, remove it. |
1435 | 21.9k | if (object.getKey("/DecodeParms").isArray() && |
1436 | 21.9k | (0 == object.getKey("/DecodeParms").getArrayNItems())) { |
1437 | 1 | QTC::TC("qpdf", "QPDFWriter remove empty DecodeParms"); |
1438 | 1 | object.removeKey("/DecodeParms"); |
1439 | 1 | } |
1440 | | |
1441 | 21.9k | if (flags & f_filtered) { |
1442 | | // We will supply our own filter and decode parameters. |
1443 | 9.79k | object.removeKey("/Filter"); |
1444 | 9.79k | object.removeKey("/DecodeParms"); |
1445 | 12.1k | } else { |
1446 | | // Make sure, no matter what else we have, that we don't have /Crypt in the output |
1447 | | // filters. |
1448 | 12.1k | QPDFObjectHandle filter = object.getKey("/Filter"); |
1449 | 12.1k | QPDFObjectHandle decode_parms = object.getKey("/DecodeParms"); |
1450 | 12.1k | if (filter.isOrHasName("/Crypt")) { |
1451 | 121 | if (filter.isName()) { |
1452 | 17 | object.removeKey("/Filter"); |
1453 | 17 | object.removeKey("/DecodeParms"); |
1454 | 104 | } else { |
1455 | 104 | int idx = -1; |
1456 | 4.93k | for (int i = 0; i < filter.getArrayNItems(); ++i) { |
1457 | 4.93k | QPDFObjectHandle item = filter.getArrayItem(i); |
1458 | 4.93k | if (item.isNameAndEquals("/Crypt")) { |
1459 | 104 | idx = i; |
1460 | 104 | break; |
1461 | 104 | } |
1462 | 4.93k | } |
1463 | 104 | if (idx >= 0) { |
1464 | | // If filter is an array, then the code in QPDF_Stream has already |
1465 | | // verified that DecodeParms and Filters are arrays of the same length, |
1466 | | // but if they weren't for some reason, eraseItem does type and bounds |
1467 | | // checking. |
1468 | 104 | QTC::TC("qpdf", "QPDFWriter remove Crypt"); |
1469 | 104 | filter.eraseItem(idx); |
1470 | 104 | decode_parms.eraseItem(idx); |
1471 | 104 | } |
1472 | 104 | } |
1473 | 121 | } |
1474 | 12.1k | } |
1475 | 21.9k | } |
1476 | | |
1477 | 96.8k | writeString("<<"); |
1478 | | |
1479 | 370k | for (auto const& [key, value]: object.as_dictionary()) { |
1480 | 370k | if (!value.null()) { |
1481 | 316k | writeString(indent); |
1482 | 316k | writeStringQDF(" "); |
1483 | 316k | writeString(Name::normalize(key)); |
1484 | 316k | writeString(" "); |
1485 | 316k | if (key == "/Contents" && object.isDictionaryOfType("/Sig") && |
1486 | 316k | object.hasKey("/ByteRange")) { |
1487 | 10 | QTC::TC("qpdf", "QPDFWriter no encryption sig contents"); |
1488 | 10 | unparseChild(value, level + 1, child_flags | f_hex_string | f_no_encryption); |
1489 | 316k | } else { |
1490 | 316k | unparseChild(value, level + 1, child_flags); |
1491 | 316k | } |
1492 | 316k | } |
1493 | 370k | } |
1494 | | |
1495 | 96.8k | if (flags & f_stream) { |
1496 | 21.8k | writeString(indent); |
1497 | 21.8k | writeStringQDF(" "); |
1498 | 21.8k | writeString("/Length "); |
1499 | | |
1500 | 21.8k | if (m->direct_stream_lengths) { |
1501 | 21.8k | writeString(std::to_string(stream_length)); |
1502 | 21.8k | } else { |
1503 | 0 | writeString(std::to_string(m->cur_stream_length_id)); |
1504 | 0 | writeString(" 0 R"); |
1505 | 0 | } |
1506 | 21.8k | if (compress && (flags & f_filtered)) { |
1507 | 9.70k | writeString(indent); |
1508 | 9.70k | writeStringQDF(" "); |
1509 | 9.70k | writeString("/Filter /FlateDecode"); |
1510 | 9.70k | } |
1511 | 21.8k | } |
1512 | | |
1513 | 96.8k | writeString(indent); |
1514 | 96.8k | writeString(">>"); |
1515 | 558k | } else if (tc == ::ot_stream) { |
1516 | | // Write stream data to a buffer. |
1517 | 21.9k | if (!m->direct_stream_lengths) { |
1518 | 0 | m->cur_stream_length_id = m->obj[old_og].renumber + 1; |
1519 | 0 | } |
1520 | | |
1521 | 21.9k | flags |= f_stream; |
1522 | 21.9k | bool compress_stream = false; |
1523 | 21.9k | bool is_metadata = false; |
1524 | 21.9k | std::string stream_data; |
1525 | 21.9k | if (willFilterStream(object, compress_stream, is_metadata, &stream_data)) { |
1526 | 9.79k | flags |= f_filtered; |
1527 | 9.79k | } |
1528 | 21.9k | QPDFObjectHandle stream_dict = object.getDict(); |
1529 | | |
1530 | 21.9k | m->cur_stream_length = stream_data.size(); |
1531 | 21.9k | if (is_metadata && m->encryption && !m->encryption->getEncryptMetadata()) { |
1532 | | // Don't encrypt stream data for the metadata stream |
1533 | 0 | m->cur_data_key.clear(); |
1534 | 0 | } |
1535 | 21.9k | adjustAESStreamLength(m->cur_stream_length); |
1536 | 21.9k | unparseObject(stream_dict, 0, flags, m->cur_stream_length, compress_stream); |
1537 | 21.9k | char last_char = stream_data.empty() ? '\0' : stream_data.back(); |
1538 | 21.9k | writeString("\nstream\n"); |
1539 | 21.9k | { |
1540 | 21.9k | PipelinePopper pp_enc(this); |
1541 | 21.9k | pushEncryptionFilter(pp_enc); |
1542 | 21.9k | writeString(stream_data); |
1543 | 21.9k | } |
1544 | | |
1545 | 21.9k | if ((m->added_newline = |
1546 | 21.9k | m->newline_before_endstream || (m->qdf_mode && last_char != '\n'))) { |
1547 | 0 | writeString("\nendstream"); |
1548 | 21.9k | } else { |
1549 | 21.9k | writeString("endstream"); |
1550 | 21.9k | } |
1551 | 536k | } else if (tc == ::ot_string) { |
1552 | 18.9k | std::string val; |
1553 | 18.9k | if (m->encryption && !(flags & f_in_ostream) && !(flags & f_no_encryption) && |
1554 | 18.9k | !m->cur_data_key.empty()) { |
1555 | 14.5k | val = object.getStringValue(); |
1556 | 14.5k | if (m->encrypt_use_aes) { |
1557 | 0 | Pl_Buffer bufpl("encrypted string"); |
1558 | 0 | Pl_AES_PDF pl( |
1559 | 0 | "aes encrypt string", |
1560 | 0 | &bufpl, |
1561 | 0 | true, |
1562 | 0 | QUtil::unsigned_char_pointer(m->cur_data_key), |
1563 | 0 | m->cur_data_key.length()); |
1564 | 0 | pl.writeString(val); |
1565 | 0 | pl.finish(); |
1566 | 0 | val = QPDF_String(bufpl.getString()).unparse(true); |
1567 | 14.5k | } else { |
1568 | 14.5k | auto tmp_ph = QUtil::make_unique_cstr(val); |
1569 | 14.5k | char* tmp = tmp_ph.get(); |
1570 | 14.5k | size_t vlen = val.length(); |
1571 | 14.5k | RC4 rc4( |
1572 | 14.5k | QUtil::unsigned_char_pointer(m->cur_data_key), |
1573 | 14.5k | QIntC::to_int(m->cur_data_key.length())); |
1574 | 14.5k | auto data = QUtil::unsigned_char_pointer(tmp); |
1575 | 14.5k | rc4.process(data, vlen, data); |
1576 | 14.5k | val = QPDF_String(std::string(tmp, vlen)).unparse(); |
1577 | 14.5k | } |
1578 | 14.5k | } else if (flags & f_hex_string) { |
1579 | 10 | val = QPDF_String(object.getStringValue()).unparse(true); |
1580 | 4.44k | } else { |
1581 | 4.44k | val = object.unparseResolved(); |
1582 | 4.44k | } |
1583 | 18.9k | writeString(val); |
1584 | 517k | } else { |
1585 | 517k | writeString(object.unparseResolved()); |
1586 | 517k | } |
1587 | 716k | } |
1588 | | |
1589 | | void |
1590 | | QPDFWriter::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj) |
1591 | 0 | { |
1592 | 0 | qpdf_assert_debug(first_obj > 0); |
1593 | 0 | bool is_first = true; |
1594 | 0 | auto id = std::to_string(first_obj) + ' '; |
1595 | 0 | for (auto& offset: offsets) { |
1596 | 0 | if (is_first) { |
1597 | 0 | is_first = false; |
1598 | 0 | } else { |
1599 | 0 | writeStringQDF("\n"); |
1600 | 0 | writeStringNoQDF(" "); |
1601 | 0 | } |
1602 | 0 | writeString(id); |
1603 | 0 | util::increment(id, 1); |
1604 | 0 | writeString(std::to_string(offset)); |
1605 | 0 | } |
1606 | 0 | writeString("\n"); |
1607 | 0 | } |
1608 | | |
1609 | | void |
1610 | | QPDFWriter::writeObjectStream(QPDFObjectHandle object) |
1611 | 0 | { |
1612 | | // Note: object might be null if this is a place-holder for an object stream that we are |
1613 | | // generating from scratch. |
1614 | |
|
1615 | 0 | QPDFObjGen old_og = object.getObjGen(); |
1616 | 0 | qpdf_assert_debug(old_og.getGen() == 0); |
1617 | 0 | int old_id = old_og.getObj(); |
1618 | 0 | int new_stream_id = m->obj[old_og].renumber; |
1619 | |
|
1620 | 0 | std::vector<qpdf_offset_t> offsets; |
1621 | 0 | qpdf_offset_t first = 0; |
1622 | | |
1623 | | // Generate stream itself. We have to do this in two passes so we can calculate offsets in the |
1624 | | // first pass. |
1625 | 0 | std::string stream_buffer_pass1; |
1626 | 0 | std::string stream_buffer_pass2; |
1627 | 0 | int first_obj = -1; |
1628 | 0 | const bool compressed = m->compress_streams && !m->qdf_mode; |
1629 | 0 | { |
1630 | | // Pass 1 |
1631 | 0 | PipelinePopper pp_ostream_pass1(this); |
1632 | 0 | activatePipelineStack(pp_ostream_pass1, stream_buffer_pass1); |
1633 | |
|
1634 | 0 | int count = -1; |
1635 | 0 | for (auto const& obj: m->object_stream_to_objects[old_id]) { |
1636 | 0 | ++count; |
1637 | 0 | int new_obj = m->obj[obj].renumber; |
1638 | 0 | if (first_obj == -1) { |
1639 | 0 | first_obj = new_obj; |
1640 | 0 | } |
1641 | 0 | if (m->qdf_mode) { |
1642 | 0 | writeString( |
1643 | 0 | "%% Object stream: object " + std::to_string(new_obj) + ", index " + |
1644 | 0 | std::to_string(count)); |
1645 | 0 | if (!m->suppress_original_object_ids) { |
1646 | 0 | writeString("; original object ID: " + std::to_string(obj.getObj())); |
1647 | | // For compatibility, only write the generation if non-zero. While object |
1648 | | // streams only allow objects with generation 0, if we are generating object |
1649 | | // streams, the old object could have a non-zero generation. |
1650 | 0 | if (obj.getGen() != 0) { |
1651 | 0 | QTC::TC("qpdf", "QPDFWriter original obj non-zero gen"); |
1652 | 0 | writeString(" " + std::to_string(obj.getGen())); |
1653 | 0 | } |
1654 | 0 | } |
1655 | 0 | writeString("\n"); |
1656 | 0 | } |
1657 | |
|
1658 | 0 | offsets.push_back(m->pipeline->getCount()); |
1659 | | // To avoid double-counting objects being written in object streams for progress |
1660 | | // reporting, decrement in pass 1. |
1661 | 0 | indicateProgress(true, false); |
1662 | |
|
1663 | 0 | QPDFObjectHandle obj_to_write = m->pdf.getObject(obj); |
1664 | 0 | if (obj_to_write.isStream()) { |
1665 | | // This condition occurred in a fuzz input. Ideally we should block it at parse |
1666 | | // time, but it's not clear to me how to construct a case for this. |
1667 | 0 | obj_to_write.warnIfPossible("stream found inside object stream; treating as null"); |
1668 | 0 | obj_to_write = QPDFObjectHandle::newNull(); |
1669 | 0 | } |
1670 | 0 | writeObject(obj_to_write, count); |
1671 | |
|
1672 | 0 | m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count); |
1673 | 0 | } |
1674 | 0 | } |
1675 | 0 | { |
1676 | 0 | PipelinePopper pp_ostream(this); |
1677 | | // Adjust offsets to skip over comment before first object |
1678 | 0 | first = offsets.at(0); |
1679 | 0 | for (auto& iter: offsets) { |
1680 | 0 | iter -= first; |
1681 | 0 | } |
1682 | | |
1683 | | // Take one pass at writing pairs of numbers so we can get their size information |
1684 | 0 | { |
1685 | 0 | PipelinePopper pp_discard(this); |
1686 | 0 | activatePipelineStack(pp_discard, true); |
1687 | 0 | writeObjectStreamOffsets(offsets, first_obj); |
1688 | 0 | first += m->pipeline->getCount(); |
1689 | 0 | } |
1690 | | |
1691 | | // Set up a stream to write the stream data into a buffer. |
1692 | 0 | if (compressed) { |
1693 | 0 | activatePipelineStack( |
1694 | 0 | pp_ostream, |
1695 | 0 | pl::create<Pl_Flate>( |
1696 | 0 | pl::create<pl::String>(stream_buffer_pass2), Pl_Flate::a_deflate)); |
1697 | 0 | } else { |
1698 | 0 | activatePipelineStack(pp_ostream, stream_buffer_pass2); |
1699 | 0 | } |
1700 | 0 | writeObjectStreamOffsets(offsets, first_obj); |
1701 | 0 | writeString(stream_buffer_pass1); |
1702 | 0 | stream_buffer_pass1.clear(); |
1703 | 0 | stream_buffer_pass1.shrink_to_fit(); |
1704 | 0 | } |
1705 | | |
1706 | | // Write the object |
1707 | 0 | openObject(new_stream_id); |
1708 | 0 | setDataKey(new_stream_id); |
1709 | 0 | writeString("<<"); |
1710 | 0 | writeStringQDF("\n "); |
1711 | 0 | writeString(" /Type /ObjStm"); |
1712 | 0 | writeStringQDF("\n "); |
1713 | 0 | size_t length = stream_buffer_pass2.size(); |
1714 | 0 | adjustAESStreamLength(length); |
1715 | 0 | writeString(" /Length " + std::to_string(length)); |
1716 | 0 | writeStringQDF("\n "); |
1717 | 0 | if (compressed) { |
1718 | 0 | writeString(" /Filter /FlateDecode"); |
1719 | 0 | } |
1720 | 0 | writeString(" /N " + std::to_string(offsets.size())); |
1721 | 0 | writeStringQDF("\n "); |
1722 | 0 | writeString(" /First " + std::to_string(first)); |
1723 | 0 | if (!object.isNull()) { |
1724 | | // If the original object has an /Extends key, preserve it. |
1725 | 0 | QPDFObjectHandle dict = object.getDict(); |
1726 | 0 | QPDFObjectHandle extends = dict.getKey("/Extends"); |
1727 | 0 | if (extends.isIndirect()) { |
1728 | 0 | QTC::TC("qpdf", "QPDFWriter copy Extends"); |
1729 | 0 | writeStringQDF("\n "); |
1730 | 0 | writeString(" /Extends "); |
1731 | 0 | unparseChild(extends, 1, f_in_ostream); |
1732 | 0 | } |
1733 | 0 | } |
1734 | 0 | writeStringQDF("\n"); |
1735 | 0 | writeStringNoQDF(" "); |
1736 | 0 | writeString(">>\nstream\n"); |
1737 | 0 | if (m->encryption) { |
1738 | 0 | QTC::TC("qpdf", "QPDFWriter encrypt object stream"); |
1739 | 0 | } |
1740 | 0 | { |
1741 | 0 | PipelinePopper pp_enc(this); |
1742 | 0 | pushEncryptionFilter(pp_enc); |
1743 | 0 | writeString(stream_buffer_pass2); |
1744 | 0 | } |
1745 | 0 | if (m->newline_before_endstream) { |
1746 | 0 | writeString("\n"); |
1747 | 0 | } |
1748 | 0 | writeString("endstream"); |
1749 | 0 | m->cur_data_key.clear(); |
1750 | 0 | closeObject(new_stream_id); |
1751 | 0 | } |
1752 | | |
1753 | | void |
1754 | | QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index) |
1755 | 80.4k | { |
1756 | 80.4k | QPDFObjGen old_og = object.getObjGen(); |
1757 | | |
1758 | 80.4k | if ((object_stream_index == -1) && (old_og.getGen() == 0) && |
1759 | 80.4k | (m->object_stream_to_objects.count(old_og.getObj()))) { |
1760 | 0 | writeObjectStream(object); |
1761 | 0 | return; |
1762 | 0 | } |
1763 | | |
1764 | 80.4k | indicateProgress(false, false); |
1765 | 80.4k | auto new_id = m->obj[old_og].renumber; |
1766 | 80.4k | if (m->qdf_mode) { |
1767 | 0 | if (m->page_object_to_seq.contains(old_og)) { |
1768 | 0 | writeString("%% Page "); |
1769 | 0 | writeString(std::to_string(m->page_object_to_seq[old_og])); |
1770 | 0 | writeString("\n"); |
1771 | 0 | } |
1772 | 0 | if (m->contents_to_page_seq.contains(old_og)) { |
1773 | 0 | writeString("%% Contents for page "); |
1774 | 0 | writeString(std::to_string(m->contents_to_page_seq[old_og])); |
1775 | 0 | writeString("\n"); |
1776 | 0 | } |
1777 | 0 | } |
1778 | 80.4k | if (object_stream_index == -1) { |
1779 | 80.4k | if (m->qdf_mode && (!m->suppress_original_object_ids)) { |
1780 | 0 | writeString("%% Original object ID: " + object.getObjGen().unparse(' ') + "\n"); |
1781 | 0 | } |
1782 | 80.4k | openObject(new_id); |
1783 | 80.4k | setDataKey(new_id); |
1784 | 80.4k | unparseObject(object, 0, 0); |
1785 | 80.4k | m->cur_data_key.clear(); |
1786 | 80.4k | closeObject(new_id); |
1787 | 80.4k | } else { |
1788 | 0 | unparseObject(object, 0, f_in_ostream); |
1789 | 0 | writeString("\n"); |
1790 | 0 | } |
1791 | | |
1792 | 80.4k | if ((!m->direct_stream_lengths) && object.isStream()) { |
1793 | 0 | if (m->qdf_mode) { |
1794 | 0 | if (m->added_newline) { |
1795 | 0 | writeString("%QDF: ignore_newline\n"); |
1796 | 0 | } |
1797 | 0 | } |
1798 | 0 | openObject(new_id + 1); |
1799 | 0 | writeString(std::to_string(m->cur_stream_length)); |
1800 | 0 | closeObject(new_id + 1); |
1801 | 0 | } |
1802 | 80.4k | } |
1803 | | |
1804 | | std::string |
1805 | | QPDFWriter::getOriginalID1() |
1806 | 7.75k | { |
1807 | 7.75k | QPDFObjectHandle trailer = m->pdf.getTrailer(); |
1808 | 7.75k | if (trailer.hasKey("/ID")) { |
1809 | 1.01k | return trailer.getKey("/ID").getArrayItem(0).getStringValue(); |
1810 | 6.73k | } else { |
1811 | 6.73k | return ""; |
1812 | 6.73k | } |
1813 | 7.75k | } |
1814 | | |
1815 | | void |
1816 | | QPDFWriter::generateID() |
1817 | 15.2k | { |
1818 | | // Generate the ID lazily so that we can handle the user's preference to use static or |
1819 | | // deterministic ID generation. |
1820 | | |
1821 | 15.2k | if (!m->id2.empty()) { |
1822 | 7.50k | return; |
1823 | 7.50k | } |
1824 | | |
1825 | 7.75k | QPDFObjectHandle trailer = m->pdf.getTrailer(); |
1826 | | |
1827 | 7.75k | std::string result; |
1828 | | |
1829 | 7.75k | if (m->static_id) { |
1830 | | // For test suite use only... |
1831 | 7.75k | static unsigned char tmp[] = { |
1832 | 7.75k | 0x31, |
1833 | 7.75k | 0x41, |
1834 | 7.75k | 0x59, |
1835 | 7.75k | 0x26, |
1836 | 7.75k | 0x53, |
1837 | 7.75k | 0x58, |
1838 | 7.75k | 0x97, |
1839 | 7.75k | 0x93, |
1840 | 7.75k | 0x23, |
1841 | 7.75k | 0x84, |
1842 | 7.75k | 0x62, |
1843 | 7.75k | 0x64, |
1844 | 7.75k | 0x33, |
1845 | 7.75k | 0x83, |
1846 | 7.75k | 0x27, |
1847 | 7.75k | 0x95, |
1848 | 7.75k | 0x00}; |
1849 | 7.75k | result = reinterpret_cast<char*>(tmp); |
1850 | 7.75k | } else { |
1851 | | // The PDF specification has guidelines for creating IDs, but it states clearly that the |
1852 | | // only thing that's really important is that it is very likely to be unique. We can't |
1853 | | // really follow the guidelines in the spec exactly because we haven't written the file yet. |
1854 | | // This scheme should be fine though. The deterministic ID case uses a digest of a |
1855 | | // sufficient portion of the file's contents such no two non-matching files would match in |
1856 | | // the subsets used for this computation. Note that we explicitly omit the filename from |
1857 | | // the digest calculation for deterministic ID so that the same file converted with qpdf, in |
1858 | | // that case, would have the same ID regardless of the output file's name. |
1859 | |
|
1860 | 0 | std::string seed; |
1861 | 0 | if (m->deterministic_id) { |
1862 | 0 | if (m->deterministic_id_data.empty()) { |
1863 | 0 | QTC::TC("qpdf", "QPDFWriter deterministic with no data"); |
1864 | 0 | throw std::runtime_error( |
1865 | 0 | "INTERNAL ERROR: QPDFWriter::generateID has no data for " |
1866 | 0 | "deterministic ID. This may happen if deterministic ID " |
1867 | 0 | "and file encryption are requested together."); |
1868 | 0 | } |
1869 | 0 | seed += m->deterministic_id_data; |
1870 | 0 | } else { |
1871 | 0 | seed += std::to_string(QUtil::get_current_time()); |
1872 | 0 | seed += m->filename; |
1873 | 0 | seed += " "; |
1874 | 0 | } |
1875 | 0 | seed += " QPDF "; |
1876 | 0 | if (trailer.hasKey("/Info")) { |
1877 | 0 | for (auto const& item: trailer.getKey("/Info").as_dictionary()) { |
1878 | 0 | if (item.second.isString()) { |
1879 | 0 | seed += " "; |
1880 | 0 | seed += item.second.getStringValue(); |
1881 | 0 | } |
1882 | 0 | } |
1883 | 0 | } |
1884 | |
|
1885 | 0 | MD5 m; |
1886 | 0 | m.encodeString(seed.c_str()); |
1887 | 0 | MD5::Digest digest; |
1888 | 0 | m.digest(digest); |
1889 | 0 | result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest)); |
1890 | 0 | } |
1891 | | |
1892 | | // If /ID already exists, follow the spec: use the original first word and generate a new second |
1893 | | // word. Otherwise, we'll use the generated ID for both. |
1894 | | |
1895 | 7.75k | m->id2 = result; |
1896 | | // Note: keep /ID from old file even if --static-id was given. |
1897 | 7.75k | m->id1 = getOriginalID1(); |
1898 | 7.75k | if (m->id1.empty()) { |
1899 | 6.79k | m->id1 = m->id2; |
1900 | 6.79k | } |
1901 | 7.75k | } |
1902 | | |
1903 | | void |
1904 | | QPDFWriter::initializeSpecialStreams() |
1905 | 7.74k | { |
1906 | | // Mark all page content streams in case we are filtering or normalizing. |
1907 | 7.74k | std::vector<QPDFObjectHandle> pages = m->pdf.getAllPages(); |
1908 | 7.74k | int num = 0; |
1909 | 12.1k | for (auto& page: pages) { |
1910 | 12.1k | m->page_object_to_seq[page.getObjGen()] = ++num; |
1911 | 12.1k | QPDFObjectHandle contents = page.getKey("/Contents"); |
1912 | 12.1k | std::vector<QPDFObjGen> contents_objects; |
1913 | 12.1k | if (contents.isArray()) { |
1914 | 272 | int n = contents.getArrayNItems(); |
1915 | 7.90k | for (int i = 0; i < n; ++i) { |
1916 | 7.63k | contents_objects.push_back(contents.getArrayItem(i).getObjGen()); |
1917 | 7.63k | } |
1918 | 11.8k | } else if (contents.isStream()) { |
1919 | 1.92k | contents_objects.push_back(contents.getObjGen()); |
1920 | 1.92k | } |
1921 | | |
1922 | 12.1k | for (auto const& c: contents_objects) { |
1923 | 9.56k | m->contents_to_page_seq[c] = num; |
1924 | 9.56k | m->normalized_streams.insert(c); |
1925 | 9.56k | } |
1926 | 12.1k | } |
1927 | 7.74k | } |
1928 | | |
1929 | | void |
1930 | | QPDFWriter::preserveObjectStreams() |
1931 | 0 | { |
1932 | 0 | auto const& xref = QPDF::Writer::getXRefTable(m->pdf); |
1933 | | // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object |
1934 | | // streams out of old objects that have generation numbers greater than zero. However in an |
1935 | | // existing PDF, all object stream objects and all objects in them must have generation 0 |
1936 | | // because the PDF spec does not provide any way to do otherwise. This code filters out objects |
1937 | | // that are not allowed to be in object streams. In addition to removing objects that were |
1938 | | // erroneously included in object streams in the source PDF, it also prevents unreferenced |
1939 | | // objects from being included. |
1940 | 0 | auto end = xref.cend(); |
1941 | 0 | m->obj.streams_empty = true; |
1942 | 0 | if (m->preserve_unreferenced_objects) { |
1943 | 0 | for (auto iter = xref.cbegin(); iter != end; ++iter) { |
1944 | 0 | if (iter->second.getType() == 2) { |
1945 | | // Pdf contains object streams. |
1946 | 0 | QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced"); |
1947 | 0 | m->obj.streams_empty = false; |
1948 | 0 | m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); |
1949 | 0 | } |
1950 | 0 | } |
1951 | 0 | } else { |
1952 | | // Start by scanning for first compressed object in case we don't have any object streams to |
1953 | | // process. |
1954 | 0 | for (auto iter = xref.cbegin(); iter != end; ++iter) { |
1955 | 0 | if (iter->second.getType() == 2) { |
1956 | | // Pdf contains object streams. |
1957 | 0 | QTC::TC("qpdf", "QPDFWriter preserve object streams"); |
1958 | 0 | m->obj.streams_empty = false; |
1959 | 0 | auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf); |
1960 | | // The object pointed to by iter may be a previous generation, in which case it is |
1961 | | // removed by getCompressibleObjSet. We need to restart the loop (while the object |
1962 | | // table may contain multiple generations of an object). |
1963 | 0 | for (iter = xref.cbegin(); iter != end; ++iter) { |
1964 | 0 | if (iter->second.getType() == 2) { |
1965 | 0 | auto id = static_cast<size_t>(iter->first.getObj()); |
1966 | 0 | if (id < eligible.size() && eligible[id]) { |
1967 | 0 | m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); |
1968 | 0 | } else { |
1969 | 0 | QTC::TC("qpdf", "QPDFWriter exclude from object stream"); |
1970 | 0 | } |
1971 | 0 | } |
1972 | 0 | } |
1973 | 0 | return; |
1974 | 0 | } |
1975 | 0 | } |
1976 | 0 | } |
1977 | 0 | } |
1978 | | |
1979 | | void |
1980 | | QPDFWriter::generateObjectStreams() |
1981 | 0 | { |
1982 | | // Basic strategy: make a list of objects that can go into an object stream. Then figure out |
1983 | | // how many object streams are needed so that we can distribute objects approximately evenly |
1984 | | // without having any object stream exceed 100 members. We don't have to worry about linearized |
1985 | | // files here -- if the file is linearized, we take care of excluding things that aren't allowed |
1986 | | // here later. |
1987 | | |
1988 | | // This code doesn't do anything with /Extends. |
1989 | |
|
1990 | 0 | std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf); |
1991 | 0 | size_t n_object_streams = (eligible.size() + 99U) / 100U; |
1992 | |
|
1993 | 0 | initializeTables(2U * n_object_streams); |
1994 | 0 | if (n_object_streams == 0) { |
1995 | 0 | m->obj.streams_empty = true; |
1996 | 0 | return; |
1997 | 0 | } |
1998 | 0 | size_t n_per = eligible.size() / n_object_streams; |
1999 | 0 | if (n_per * n_object_streams < eligible.size()) { |
2000 | 0 | ++n_per; |
2001 | 0 | } |
2002 | 0 | unsigned int n = 0; |
2003 | 0 | int cur_ostream = m->pdf.newIndirectNull().getObjectID(); |
2004 | 0 | for (auto const& item: eligible) { |
2005 | 0 | if (n == n_per) { |
2006 | 0 | QTC::TC("qpdf", "QPDFWriter generate >1 ostream"); |
2007 | 0 | n = 0; |
2008 | | // Construct a new null object as the "original" object stream. The rest of the code |
2009 | | // knows that this means we're creating the object stream from scratch. |
2010 | 0 | cur_ostream = m->pdf.newIndirectNull().getObjectID(); |
2011 | 0 | } |
2012 | 0 | auto& obj = m->obj[item]; |
2013 | 0 | obj.object_stream = cur_ostream; |
2014 | 0 | obj.gen = item.getGen(); |
2015 | 0 | ++n; |
2016 | 0 | } |
2017 | 0 | } |
2018 | | |
2019 | | QPDFObjectHandle |
2020 | | QPDFWriter::getTrimmedTrailer() |
2021 | 15.1k | { |
2022 | | // Remove keys from the trailer that necessarily have to be replaced when writing the file. |
2023 | | |
2024 | 15.1k | QPDFObjectHandle trailer = m->pdf.getTrailer().unsafeShallowCopy(); |
2025 | | |
2026 | | // Remove encryption keys |
2027 | 15.1k | trailer.removeKey("/ID"); |
2028 | 15.1k | trailer.removeKey("/Encrypt"); |
2029 | | |
2030 | | // Remove modification information |
2031 | 15.1k | trailer.removeKey("/Prev"); |
2032 | | |
2033 | | // Remove all trailer keys that potentially come from a cross-reference stream |
2034 | 15.1k | trailer.removeKey("/Index"); |
2035 | 15.1k | trailer.removeKey("/W"); |
2036 | 15.1k | trailer.removeKey("/Length"); |
2037 | 15.1k | trailer.removeKey("/Filter"); |
2038 | 15.1k | trailer.removeKey("/DecodeParms"); |
2039 | 15.1k | trailer.removeKey("/Type"); |
2040 | 15.1k | trailer.removeKey("/XRefStm"); |
2041 | | |
2042 | 15.1k | return trailer; |
2043 | 15.1k | } |
2044 | | |
2045 | | // Make document extension level information direct as required by the spec. |
2046 | | void |
2047 | | QPDFWriter::prepareFileForWrite() |
2048 | 7.66k | { |
2049 | 7.66k | m->pdf.fixDanglingReferences(); |
2050 | 7.66k | auto root = m->pdf.getRoot(); |
2051 | 7.66k | auto oh = root.getKey("/Extensions"); |
2052 | 7.66k | if (oh.isDictionary()) { |
2053 | 275 | const bool extensions_indirect = oh.isIndirect(); |
2054 | 275 | if (extensions_indirect) { |
2055 | 24 | QTC::TC("qpdf", "QPDFWriter make Extensions direct"); |
2056 | 24 | oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy()); |
2057 | 24 | } |
2058 | 275 | if (oh.hasKey("/ADBE")) { |
2059 | 230 | auto adbe = oh.getKey("/ADBE"); |
2060 | 230 | if (adbe.isIndirect()) { |
2061 | 142 | QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1); |
2062 | 142 | adbe.makeDirect(); |
2063 | 142 | oh.replaceKey("/ADBE", adbe); |
2064 | 142 | } |
2065 | 230 | } |
2066 | 275 | } |
2067 | 7.66k | } |
2068 | | |
2069 | | void |
2070 | | QPDFWriter::initializeTables(size_t extra) |
2071 | 7.72k | { |
2072 | 7.72k | auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra; |
2073 | 7.72k | m->obj.resize(size); |
2074 | 7.72k | m->new_obj.resize(size); |
2075 | 7.72k | } |
2076 | | |
2077 | | void |
2078 | | QPDFWriter::doWriteSetup() |
2079 | 7.74k | { |
2080 | 7.74k | if (m->did_write_setup) { |
2081 | 0 | return; |
2082 | 0 | } |
2083 | 7.74k | m->did_write_setup = true; |
2084 | | |
2085 | | // Do preliminary setup |
2086 | | |
2087 | 7.74k | if (m->linearized) { |
2088 | 0 | m->qdf_mode = false; |
2089 | 0 | } |
2090 | | |
2091 | 7.74k | if (m->pclm) { |
2092 | 0 | m->stream_decode_level = qpdf_dl_none; |
2093 | 0 | m->compress_streams = false; |
2094 | 0 | m->encryption = nullptr; |
2095 | 0 | } |
2096 | | |
2097 | 7.74k | if (m->qdf_mode) { |
2098 | 0 | if (!m->normalize_content_set) { |
2099 | 0 | m->normalize_content = true; |
2100 | 0 | } |
2101 | 0 | if (!m->compress_streams_set) { |
2102 | 0 | m->compress_streams = false; |
2103 | 0 | } |
2104 | 0 | if (!m->stream_decode_level_set) { |
2105 | 0 | m->stream_decode_level = qpdf_dl_generalized; |
2106 | 0 | } |
2107 | 0 | } |
2108 | | |
2109 | 7.74k | if (m->encryption) { |
2110 | | // Encryption has been explicitly set |
2111 | 7.74k | m->preserve_encryption = false; |
2112 | 7.74k | } else if (m->normalize_content || !m->compress_streams || m->pclm || m->qdf_mode) { |
2113 | | // Encryption makes looking at contents pretty useless. If the user explicitly encrypted |
2114 | | // though, we still obey that. |
2115 | 0 | m->preserve_encryption = false; |
2116 | 0 | } |
2117 | | |
2118 | 7.74k | if (m->preserve_encryption) { |
2119 | 0 | copyEncryptionParameters(m->pdf); |
2120 | 0 | } |
2121 | | |
2122 | 7.74k | if (!m->forced_pdf_version.empty()) { |
2123 | 0 | int major = 0; |
2124 | 0 | int minor = 0; |
2125 | 0 | parseVersion(m->forced_pdf_version, major, minor); |
2126 | 0 | disableIncompatibleEncryption(major, minor, m->forced_extension_level); |
2127 | 0 | if (compareVersions(major, minor, 1, 5) < 0) { |
2128 | 0 | QTC::TC("qpdf", "QPDFWriter forcing object stream disable"); |
2129 | 0 | m->object_stream_mode = qpdf_o_disable; |
2130 | 0 | } |
2131 | 0 | } |
2132 | | |
2133 | 7.74k | if (m->qdf_mode || m->normalize_content || m->stream_decode_level) { |
2134 | 7.74k | initializeSpecialStreams(); |
2135 | 7.74k | } |
2136 | | |
2137 | 7.74k | if (m->qdf_mode) { |
2138 | | // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing |
2139 | | // recomputed stream length data. Certain streams such as object streams, xref streams, and |
2140 | | // hint streams always get direct stream lengths. |
2141 | 0 | m->direct_stream_lengths = false; |
2142 | 0 | } |
2143 | | |
2144 | 7.74k | switch (m->object_stream_mode) { |
2145 | 7.72k | case qpdf_o_disable: |
2146 | 7.72k | initializeTables(); |
2147 | 7.72k | m->obj.streams_empty = true; |
2148 | 7.72k | break; |
2149 | | |
2150 | 0 | case qpdf_o_preserve: |
2151 | 0 | initializeTables(); |
2152 | 0 | preserveObjectStreams(); |
2153 | 0 | break; |
2154 | | |
2155 | 0 | case qpdf_o_generate: |
2156 | 0 | generateObjectStreams(); |
2157 | 0 | break; |
2158 | | |
2159 | | // no default so gcc will warn for missing case tag |
2160 | 7.74k | } |
2161 | | |
2162 | 7.71k | if (!m->obj.streams_empty) { |
2163 | 0 | if (m->linearized) { |
2164 | | // Page dictionaries are not allowed to be compressed objects. |
2165 | 0 | for (auto& page: m->pdf.getAllPages()) { |
2166 | 0 | if (m->obj[page].object_stream > 0) { |
2167 | 0 | QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); |
2168 | 0 | m->obj[page].object_stream = 0; |
2169 | 0 | } |
2170 | 0 | } |
2171 | 0 | } |
2172 | |
|
2173 | 0 | if (m->linearized || m->encryption) { |
2174 | | // The document catalog is not allowed to be compressed in linearized files either. It |
2175 | | // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to |
2176 | | // handle encrypted files with compressed document catalogs, so we disable them in that |
2177 | | // case as well. |
2178 | 0 | if (m->obj[m->root_og].object_stream > 0) { |
2179 | 0 | QTC::TC("qpdf", "QPDFWriter uncompressing root"); |
2180 | 0 | m->obj[m->root_og].object_stream = 0; |
2181 | 0 | } |
2182 | 0 | } |
2183 | | |
2184 | | // Generate reverse mapping from object stream to objects |
2185 | 0 | m->obj.forEach([this](auto id, auto const& item) -> void { |
2186 | 0 | if (item.object_stream > 0) { |
2187 | 0 | auto& vec = m->object_stream_to_objects[item.object_stream]; |
2188 | 0 | vec.emplace_back(id, item.gen); |
2189 | 0 | if (m->max_ostream_index < vec.size()) { |
2190 | 0 | ++m->max_ostream_index; |
2191 | 0 | } |
2192 | 0 | } |
2193 | 0 | }); |
2194 | 0 | --m->max_ostream_index; |
2195 | |
|
2196 | 0 | if (m->object_stream_to_objects.empty()) { |
2197 | 0 | m->obj.streams_empty = true; |
2198 | 0 | } else { |
2199 | 0 | setMinimumPDFVersion("1.5"); |
2200 | 0 | } |
2201 | 0 | } |
2202 | | |
2203 | 7.71k | setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel()); |
2204 | 7.71k | m->final_pdf_version = m->min_pdf_version; |
2205 | 7.71k | m->final_extension_level = m->min_extension_level; |
2206 | 7.71k | if (!m->forced_pdf_version.empty()) { |
2207 | 0 | QTC::TC("qpdf", "QPDFWriter using forced PDF version"); |
2208 | 0 | m->final_pdf_version = m->forced_pdf_version; |
2209 | 0 | m->final_extension_level = m->forced_extension_level; |
2210 | 0 | } |
2211 | 7.71k | } |
2212 | | |
2213 | | void |
2214 | | QPDFWriter::write() |
2215 | 7.74k | { |
2216 | 7.74k | doWriteSetup(); |
2217 | | |
2218 | | // Set up progress reporting. For linearized files, we write two passes. events_expected is an |
2219 | | // approximation, but it's good enough for progress reporting, which is mostly a guess anyway. |
2220 | 7.74k | m->events_expected = QIntC::to_int(m->pdf.getObjectCount() * (m->linearized ? 2 : 1)); |
2221 | | |
2222 | 7.74k | prepareFileForWrite(); |
2223 | | |
2224 | 7.74k | if (m->linearized) { |
2225 | 0 | writeLinearized(); |
2226 | 7.74k | } else { |
2227 | 7.74k | writeStandard(); |
2228 | 7.74k | } |
2229 | | |
2230 | 7.74k | m->pipeline->finish(); |
2231 | 7.74k | if (m->close_file) { |
2232 | 0 | fclose(m->file); |
2233 | 0 | } |
2234 | 7.74k | m->file = nullptr; |
2235 | 7.74k | if (m->buffer_pipeline) { |
2236 | 0 | m->output_buffer = m->buffer_pipeline->getBuffer(); |
2237 | 0 | m->buffer_pipeline = nullptr; |
2238 | 0 | } |
2239 | 7.74k | indicateProgress(false, true); |
2240 | 7.74k | } |
2241 | | |
2242 | | QPDFObjGen |
2243 | | QPDFWriter::getRenumberedObjGen(QPDFObjGen og) |
2244 | 0 | { |
2245 | 0 | return {m->obj[og].renumber, 0}; |
2246 | 0 | } |
2247 | | |
2248 | | std::map<QPDFObjGen, QPDFXRefEntry> |
2249 | | QPDFWriter::getWrittenXRefTable() |
2250 | 0 | { |
2251 | 0 | std::map<QPDFObjGen, QPDFXRefEntry> result; |
2252 | |
|
2253 | 0 | auto it = result.begin(); |
2254 | 0 | m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void { |
2255 | 0 | if (item.xref.getType() != 0) { |
2256 | 0 | it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref); |
2257 | 0 | } |
2258 | 0 | }); |
2259 | 0 | return result; |
2260 | 0 | } |
2261 | | |
2262 | | void |
2263 | | QPDFWriter::enqueuePart(std::vector<QPDFObjectHandle>& part) |
2264 | 0 | { |
2265 | 0 | for (auto const& oh: part) { |
2266 | 0 | enqueueObject(oh); |
2267 | 0 | } |
2268 | 0 | } |
2269 | | |
2270 | | void |
2271 | | QPDFWriter::writeEncryptionDictionary() |
2272 | 7.50k | { |
2273 | 7.50k | m->encryption_dict_objid = openObject(m->encryption_dict_objid); |
2274 | 7.50k | auto& enc = *m->encryption; |
2275 | 7.50k | auto const V = enc.getV(); |
2276 | | |
2277 | 7.50k | writeString("<<"); |
2278 | 7.50k | if (V >= 4) { |
2279 | 0 | writeString(" /CF << /StdCF << /AuthEvent /DocOpen /CFM "); |
2280 | 0 | writeString(m->encrypt_use_aes ? ((V < 5) ? "/AESV2" : "/AESV3") : "/V2"); |
2281 | | // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of |
2282 | | // MacOS won't open encrypted files without it. |
2283 | 0 | writeString((V < 5) ? " /Length 16 >> >>" : " /Length 32 >> >>"); |
2284 | 0 | if (!m->encryption->getEncryptMetadata()) { |
2285 | 0 | writeString(" /EncryptMetadata false"); |
2286 | 0 | } |
2287 | 0 | } |
2288 | 7.50k | writeString(" /Filter /Standard /Length "); |
2289 | 7.50k | writeString(std::to_string(enc.getLengthBytes() * 8)); |
2290 | 7.50k | writeString(" /O "); |
2291 | 7.50k | writeString(QPDF_String(enc.getO()).unparse(true)); |
2292 | 7.50k | if (V >= 4) { |
2293 | 0 | writeString(" /OE "); |
2294 | 0 | writeString(QPDF_String(enc.getOE()).unparse(true)); |
2295 | 0 | } |
2296 | 7.50k | writeString(" /P "); |
2297 | 7.50k | writeString(std::to_string(enc.getP())); |
2298 | 7.50k | if (V >= 5) { |
2299 | 0 | writeString(" /Perms "); |
2300 | 0 | writeString(QPDF_String(enc.getPerms()).unparse(true)); |
2301 | 0 | } |
2302 | 7.50k | writeString(" /R "); |
2303 | 7.50k | writeString(std::to_string(enc.getR())); |
2304 | | |
2305 | 7.50k | if (V >= 4) { |
2306 | 0 | writeString(" /StmF /StdCF /StrF /StdCF"); |
2307 | 0 | } |
2308 | 7.50k | writeString(" /U "); |
2309 | 7.50k | writeString(QPDF_String(enc.getU()).unparse(true)); |
2310 | 7.50k | if (V >= 4) { |
2311 | 0 | writeString(" /UE "); |
2312 | 0 | writeString(QPDF_String(enc.getUE()).unparse(true)); |
2313 | 0 | } |
2314 | 7.50k | writeString(" /V "); |
2315 | 7.50k | writeString(std::to_string(enc.getV())); |
2316 | 7.50k | writeString(" >>"); |
2317 | 7.50k | closeObject(m->encryption_dict_objid); |
2318 | 7.50k | } |
2319 | | |
2320 | | std::string |
2321 | | QPDFWriter::getFinalVersion() |
2322 | 0 | { |
2323 | 0 | doWriteSetup(); |
2324 | 0 | return m->final_pdf_version; |
2325 | 0 | } |
2326 | | |
2327 | | void |
2328 | | QPDFWriter::writeHeader() |
2329 | 7.63k | { |
2330 | 7.63k | writeString("%PDF-"); |
2331 | 7.63k | writeString(m->final_pdf_version); |
2332 | 7.63k | if (m->pclm) { |
2333 | | // PCLm version |
2334 | 0 | writeString("\n%PCLm 1.0\n"); |
2335 | 7.63k | } else { |
2336 | | // This string of binary characters would not be valid UTF-8, so it really should be treated |
2337 | | // as binary. |
2338 | 7.63k | writeString("\n%\xbf\xf7\xa2\xfe\n"); |
2339 | 7.63k | } |
2340 | 7.63k | writeStringQDF("%QDF-1.0\n\n"); |
2341 | | |
2342 | | // Note: do not write extra header text here. Linearized PDFs must include the entire |
2343 | | // linearization parameter dictionary within the first 1024 characters of the PDF file, so for |
2344 | | // linearized files, we have to write extra header text after the linearization parameter |
2345 | | // dictionary. |
2346 | 7.63k | } |
2347 | | |
2348 | | void |
2349 | | QPDFWriter::writeHintStream(int hint_id) |
2350 | 0 | { |
2351 | 0 | std::string hint_buffer; |
2352 | 0 | int S = 0; |
2353 | 0 | int O = 0; |
2354 | 0 | bool compressed = (m->compress_streams && !m->qdf_mode); |
2355 | 0 | QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed); |
2356 | |
|
2357 | 0 | openObject(hint_id); |
2358 | 0 | setDataKey(hint_id); |
2359 | |
|
2360 | 0 | size_t hlen = hint_buffer.size(); |
2361 | |
|
2362 | 0 | writeString("<< "); |
2363 | 0 | if (compressed) { |
2364 | 0 | writeString("/Filter /FlateDecode "); |
2365 | 0 | } |
2366 | 0 | writeString("/S "); |
2367 | 0 | writeString(std::to_string(S)); |
2368 | 0 | if (O) { |
2369 | 0 | writeString(" /O "); |
2370 | 0 | writeString(std::to_string(O)); |
2371 | 0 | } |
2372 | 0 | writeString(" /Length "); |
2373 | 0 | adjustAESStreamLength(hlen); |
2374 | 0 | writeString(std::to_string(hlen)); |
2375 | 0 | writeString(" >>\nstream\n"); |
2376 | |
|
2377 | 0 | if (m->encryption) { |
2378 | 0 | QTC::TC("qpdf", "QPDFWriter encrypted hint stream"); |
2379 | 0 | } |
2380 | 0 | char last_char = hint_buffer.empty() ? '\0' : hint_buffer.back(); |
2381 | 0 | { |
2382 | 0 | PipelinePopper pp_enc(this); |
2383 | 0 | pushEncryptionFilter(pp_enc); |
2384 | 0 | writeString(hint_buffer); |
2385 | 0 | } |
2386 | |
|
2387 | 0 | if (last_char != '\n') { |
2388 | 0 | writeString("\n"); |
2389 | 0 | } |
2390 | 0 | writeString("endstream"); |
2391 | 0 | closeObject(hint_id); |
2392 | 0 | } |
2393 | | |
2394 | | qpdf_offset_t |
2395 | | QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size) |
2396 | 7.50k | { |
2397 | | // There are too many extra arguments to replace overloaded function with defaults in the header |
2398 | | // file...too much risk of leaving something off. |
2399 | 7.50k | return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0); |
2400 | 7.50k | } |
2401 | | |
2402 | | qpdf_offset_t |
2403 | | QPDFWriter::writeXRefTable( |
2404 | | trailer_e which, |
2405 | | int first, |
2406 | | int last, |
2407 | | int size, |
2408 | | qpdf_offset_t prev, |
2409 | | bool suppress_offsets, |
2410 | | int hint_id, |
2411 | | qpdf_offset_t hint_offset, |
2412 | | qpdf_offset_t hint_length, |
2413 | | int linearization_pass) |
2414 | 7.50k | { |
2415 | 7.50k | writeString("xref\n"); |
2416 | 7.50k | writeString(std::to_string(first)); |
2417 | 7.50k | writeString(" "); |
2418 | 7.50k | writeString(std::to_string(last - first + 1)); |
2419 | 7.50k | qpdf_offset_t space_before_zero = m->pipeline->getCount(); |
2420 | 7.50k | writeString("\n"); |
2421 | 101k | for (int i = first; i <= last; ++i) { |
2422 | 93.4k | if (i == 0) { |
2423 | 7.50k | writeString("0000000000 65535 f \n"); |
2424 | 85.9k | } else { |
2425 | 85.9k | qpdf_offset_t offset = 0; |
2426 | 85.9k | if (!suppress_offsets) { |
2427 | 85.9k | offset = m->new_obj[i].xref.getOffset(); |
2428 | 85.9k | if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) { |
2429 | 0 | offset += hint_length; |
2430 | 0 | } |
2431 | 85.9k | } |
2432 | 85.9k | writeString(QUtil::int_to_string(offset, 10)); |
2433 | 85.9k | writeString(" 00000 n \n"); |
2434 | 85.9k | } |
2435 | 93.4k | } |
2436 | 7.50k | writeTrailer(which, size, false, prev, linearization_pass); |
2437 | 7.50k | writeString("\n"); |
2438 | 7.50k | return space_before_zero; |
2439 | 7.50k | } |
2440 | | |
2441 | | qpdf_offset_t |
2442 | | QPDFWriter::writeXRefStream( |
2443 | | int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size) |
2444 | 0 | { |
2445 | | // There are too many extra arguments to replace overloaded function with defaults in the header |
2446 | | // file...too much risk of leaving something off. |
2447 | 0 | return writeXRefStream( |
2448 | 0 | objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0); |
2449 | 0 | } |
2450 | | |
2451 | | qpdf_offset_t |
2452 | | QPDFWriter::writeXRefStream( |
2453 | | int xref_id, |
2454 | | int max_id, |
2455 | | qpdf_offset_t max_offset, |
2456 | | trailer_e which, |
2457 | | int first, |
2458 | | int last, |
2459 | | int size, |
2460 | | qpdf_offset_t prev, |
2461 | | int hint_id, |
2462 | | qpdf_offset_t hint_offset, |
2463 | | qpdf_offset_t hint_length, |
2464 | | bool skip_compression, |
2465 | | int linearization_pass) |
2466 | 0 | { |
2467 | 0 | qpdf_offset_t xref_offset = m->pipeline->getCount(); |
2468 | 0 | qpdf_offset_t space_before_zero = xref_offset - 1; |
2469 | | |
2470 | | // field 1 contains offsets and object stream identifiers |
2471 | 0 | unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id)); |
2472 | | |
2473 | | // field 2 contains object stream indices |
2474 | 0 | unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index)); |
2475 | |
|
2476 | 0 | unsigned int esize = 1 + f1_size + f2_size; |
2477 | | |
2478 | | // Must store in xref table in advance of writing the actual data rather than waiting for |
2479 | | // openObject to do it. |
2480 | 0 | m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount()); |
2481 | |
|
2482 | 0 | std::string xref_data; |
2483 | 0 | const bool compressed = m->compress_streams && !m->qdf_mode; |
2484 | 0 | { |
2485 | 0 | PipelinePopper pp_xref(this); |
2486 | 0 | if (compressed) { |
2487 | 0 | m->count_buffer.clear(); |
2488 | 0 | auto link = pl::create<pl::String>(xref_data); |
2489 | 0 | if (!skip_compression) { |
2490 | | // Write the stream dictionary for compression but don't actually compress. This |
2491 | | // helps us with computation of padding for pass 1 of linearization. |
2492 | 0 | link = pl::create<Pl_Flate>(std::move(link), Pl_Flate::a_deflate); |
2493 | 0 | } |
2494 | 0 | activatePipelineStack( |
2495 | 0 | pp_xref, pl::create<Pl_PNGFilter>(std::move(link), Pl_PNGFilter::a_encode, esize)); |
2496 | 0 | } else { |
2497 | 0 | activatePipelineStack(pp_xref, xref_data); |
2498 | 0 | } |
2499 | |
|
2500 | 0 | for (int i = first; i <= last; ++i) { |
2501 | 0 | QPDFXRefEntry& e = m->new_obj[i].xref; |
2502 | 0 | switch (e.getType()) { |
2503 | 0 | case 0: |
2504 | 0 | writeBinary(0, 1); |
2505 | 0 | writeBinary(0, f1_size); |
2506 | 0 | writeBinary(0, f2_size); |
2507 | 0 | break; |
2508 | | |
2509 | 0 | case 1: |
2510 | 0 | { |
2511 | 0 | qpdf_offset_t offset = e.getOffset(); |
2512 | 0 | if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) { |
2513 | 0 | offset += hint_length; |
2514 | 0 | } |
2515 | 0 | writeBinary(1, 1); |
2516 | 0 | writeBinary(QIntC::to_ulonglong(offset), f1_size); |
2517 | 0 | writeBinary(0, f2_size); |
2518 | 0 | } |
2519 | 0 | break; |
2520 | | |
2521 | 0 | case 2: |
2522 | 0 | writeBinary(2, 1); |
2523 | 0 | writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size); |
2524 | 0 | writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size); |
2525 | 0 | break; |
2526 | | |
2527 | 0 | default: |
2528 | 0 | throw std::logic_error("invalid type writing xref stream"); |
2529 | 0 | break; |
2530 | 0 | } |
2531 | 0 | } |
2532 | 0 | } |
2533 | | |
2534 | 0 | openObject(xref_id); |
2535 | 0 | writeString("<<"); |
2536 | 0 | writeStringQDF("\n "); |
2537 | 0 | writeString(" /Type /XRef"); |
2538 | 0 | writeStringQDF("\n "); |
2539 | 0 | writeString(" /Length " + std::to_string(xref_data.size())); |
2540 | 0 | if (compressed) { |
2541 | 0 | writeStringQDF("\n "); |
2542 | 0 | writeString(" /Filter /FlateDecode"); |
2543 | 0 | writeStringQDF("\n "); |
2544 | 0 | writeString(" /DecodeParms << /Columns " + std::to_string(esize) + " /Predictor 12 >>"); |
2545 | 0 | } |
2546 | 0 | writeStringQDF("\n "); |
2547 | 0 | writeString(" /W [ 1 " + std::to_string(f1_size) + " " + std::to_string(f2_size) + " ]"); |
2548 | 0 | if (!((first == 0) && (last == size - 1))) { |
2549 | 0 | writeString( |
2550 | 0 | " /Index [ " + std::to_string(first) + " " + std::to_string(last - first + 1) + " ]"); |
2551 | 0 | } |
2552 | 0 | writeTrailer(which, size, true, prev, linearization_pass); |
2553 | 0 | writeString("\nstream\n"); |
2554 | 0 | writeString(xref_data); |
2555 | 0 | writeString("\nendstream"); |
2556 | 0 | closeObject(xref_id); |
2557 | 0 | return space_before_zero; |
2558 | 0 | } |
2559 | | |
2560 | | size_t |
2561 | | QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes) |
2562 | 0 | { |
2563 | | // This routine is called right after a linearization first pass xref stream has been written |
2564 | | // without compression. Calculate the amount of padding that would be required in the worst |
2565 | | // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is |
2566 | | // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add |
2567 | | // 10 extra bytes for number length increases. |
2568 | |
|
2569 | 0 | return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384))); |
2570 | 0 | } |
2571 | | |
2572 | | void |
2573 | | QPDFWriter::writeLinearized() |
2574 | 0 | { |
2575 | | // Optimize file and enqueue objects in order |
2576 | |
|
2577 | 0 | std::map<int, int> stream_cache; |
2578 | |
|
2579 | 0 | auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) { |
2580 | 0 | auto& result = stream_cache[stream.getObjectID()]; |
2581 | 0 | if (result == 0) { |
2582 | 0 | bool compress_stream; |
2583 | 0 | bool is_metadata; |
2584 | 0 | if (willFilterStream(stream, compress_stream, is_metadata, nullptr)) { |
2585 | 0 | result = 2; |
2586 | 0 | } else { |
2587 | 0 | result = 1; |
2588 | 0 | } |
2589 | 0 | } |
2590 | 0 | return result; |
2591 | 0 | }; |
2592 | |
|
2593 | 0 | QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters); |
2594 | |
|
2595 | 0 | std::vector<QPDFObjectHandle> part4; |
2596 | 0 | std::vector<QPDFObjectHandle> part6; |
2597 | 0 | std::vector<QPDFObjectHandle> part7; |
2598 | 0 | std::vector<QPDFObjectHandle> part8; |
2599 | 0 | std::vector<QPDFObjectHandle> part9; |
2600 | 0 | QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9); |
2601 | | |
2602 | | // Object number sequence: |
2603 | | // |
2604 | | // second half |
2605 | | // second half uncompressed objects |
2606 | | // second half xref stream, if any |
2607 | | // second half compressed objects |
2608 | | // first half |
2609 | | // linearization dictionary |
2610 | | // first half xref stream, if any |
2611 | | // part 4 uncompresesd objects |
2612 | | // encryption dictionary, if any |
2613 | | // hint stream |
2614 | | // part 6 uncompressed objects |
2615 | | // first half compressed objects |
2616 | | // |
2617 | | |
2618 | | // Second half objects |
2619 | 0 | int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size()); |
2620 | 0 | int second_half_first_obj = 1; |
2621 | 0 | int after_second_half = 1 + second_half_uncompressed; |
2622 | 0 | m->next_objid = after_second_half; |
2623 | 0 | int second_half_xref = 0; |
2624 | 0 | bool need_xref_stream = !m->obj.streams_empty; |
2625 | 0 | if (need_xref_stream) { |
2626 | 0 | second_half_xref = m->next_objid++; |
2627 | 0 | } |
2628 | | // Assign numbers to all compressed objects in the second half. |
2629 | 0 | std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9}; |
2630 | 0 | for (int i = 0; i < 3; ++i) { |
2631 | 0 | for (auto const& oh: *vecs2[i]) { |
2632 | 0 | assignCompressedObjectNumbers(oh.getObjGen()); |
2633 | 0 | } |
2634 | 0 | } |
2635 | 0 | int second_half_end = m->next_objid - 1; |
2636 | 0 | int second_trailer_size = m->next_objid; |
2637 | | |
2638 | | // First half objects |
2639 | 0 | int first_half_start = m->next_objid; |
2640 | 0 | int lindict_id = m->next_objid++; |
2641 | 0 | int first_half_xref = 0; |
2642 | 0 | if (need_xref_stream) { |
2643 | 0 | first_half_xref = m->next_objid++; |
2644 | 0 | } |
2645 | 0 | int part4_first_obj = m->next_objid; |
2646 | 0 | m->next_objid += QIntC::to_int(part4.size()); |
2647 | 0 | int after_part4 = m->next_objid; |
2648 | 0 | if (m->encryption) { |
2649 | 0 | m->encryption_dict_objid = m->next_objid++; |
2650 | 0 | } |
2651 | 0 | int hint_id = m->next_objid++; |
2652 | 0 | int part6_first_obj = m->next_objid; |
2653 | 0 | m->next_objid += QIntC::to_int(part6.size()); |
2654 | 0 | int after_part6 = m->next_objid; |
2655 | | // Assign numbers to all compressed objects in the first half |
2656 | 0 | std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6}; |
2657 | 0 | for (int i = 0; i < 2; ++i) { |
2658 | 0 | for (auto const& oh: *vecs1[i]) { |
2659 | 0 | assignCompressedObjectNumbers(oh.getObjGen()); |
2660 | 0 | } |
2661 | 0 | } |
2662 | 0 | int first_half_end = m->next_objid - 1; |
2663 | 0 | int first_trailer_size = m->next_objid; |
2664 | |
|
2665 | 0 | int part4_end_marker = part4.back().getObjectID(); |
2666 | 0 | int part6_end_marker = part6.back().getObjectID(); |
2667 | 0 | qpdf_offset_t space_before_zero = 0; |
2668 | 0 | qpdf_offset_t file_size = 0; |
2669 | 0 | qpdf_offset_t part6_end_offset = 0; |
2670 | 0 | qpdf_offset_t first_half_max_obj_offset = 0; |
2671 | 0 | qpdf_offset_t second_xref_offset = 0; |
2672 | 0 | qpdf_offset_t first_xref_end = 0; |
2673 | 0 | qpdf_offset_t second_xref_end = 0; |
2674 | |
|
2675 | 0 | m->next_objid = part4_first_obj; |
2676 | 0 | enqueuePart(part4); |
2677 | 0 | if (m->next_objid != after_part4) { |
2678 | | // This can happen with very botched files as in the fuzzer test. There are likely some |
2679 | | // faulty assumptions in calculateLinearizationData |
2680 | 0 | throw std::runtime_error("error encountered after writing part 4 of linearized data"); |
2681 | 0 | } |
2682 | 0 | m->next_objid = part6_first_obj; |
2683 | 0 | enqueuePart(part6); |
2684 | 0 | if (m->next_objid != after_part6) { |
2685 | 0 | throw std::runtime_error("error encountered after writing part 6 of linearized data"); |
2686 | 0 | } |
2687 | 0 | m->next_objid = second_half_first_obj; |
2688 | 0 | enqueuePart(part7); |
2689 | 0 | enqueuePart(part8); |
2690 | 0 | enqueuePart(part9); |
2691 | 0 | if (m->next_objid != after_second_half) { |
2692 | 0 | throw std::runtime_error("error encountered after writing part 9 of linearized data"); |
2693 | 0 | } |
2694 | | |
2695 | 0 | qpdf_offset_t hint_length = 0; |
2696 | 0 | std::string hint_buffer; |
2697 | | |
2698 | | // Write file in two passes. Part numbers refer to PDF spec 1.4. |
2699 | |
|
2700 | 0 | FILE* lin_pass1_file = nullptr; |
2701 | 0 | auto pp_pass1 = std::make_unique<PipelinePopper>(this); |
2702 | 0 | auto pp_md5 = std::make_unique<PipelinePopper>(this); |
2703 | 0 | for (int pass: {1, 2}) { |
2704 | 0 | if (pass == 1) { |
2705 | 0 | if (!m->lin_pass1_filename.empty()) { |
2706 | 0 | lin_pass1_file = QUtil::safe_fopen(m->lin_pass1_filename.c_str(), "wb"); |
2707 | 0 | pushPipeline(new Pl_StdioFile("linearization pass1", lin_pass1_file)); |
2708 | 0 | activatePipelineStack(*pp_pass1); |
2709 | 0 | } else { |
2710 | 0 | activatePipelineStack(*pp_pass1, true); |
2711 | 0 | } |
2712 | 0 | if (m->deterministic_id) { |
2713 | 0 | pushMD5Pipeline(*pp_md5); |
2714 | 0 | } |
2715 | 0 | } |
2716 | | |
2717 | | // Part 1: header |
2718 | |
|
2719 | 0 | writeHeader(); |
2720 | | |
2721 | | // Part 2: linearization parameter dictionary. Save enough space to write real dictionary. |
2722 | | // 200 characters is enough space if all numerical values in the parameter dictionary that |
2723 | | // contain offsets are 20 digits long plus a few extra characters for safety. The entire |
2724 | | // linearization parameter dictionary must appear within the first 1024 characters of the |
2725 | | // file. |
2726 | |
|
2727 | 0 | qpdf_offset_t pos = m->pipeline->getCount(); |
2728 | 0 | openObject(lindict_id); |
2729 | 0 | writeString("<<"); |
2730 | 0 | if (pass == 2) { |
2731 | 0 | std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages(); |
2732 | 0 | int first_page_object = m->obj[pages.at(0)].renumber; |
2733 | 0 | int npages = QIntC::to_int(pages.size()); |
2734 | |
|
2735 | 0 | writeString(" /Linearized 1 /L "); |
2736 | 0 | writeString(std::to_string(file_size + hint_length)); |
2737 | | // Implementation note 121 states that a space is mandatory after this open bracket. |
2738 | 0 | writeString(" /H [ "); |
2739 | 0 | writeString(std::to_string(m->new_obj[hint_id].xref.getOffset())); |
2740 | 0 | writeString(" "); |
2741 | 0 | writeString(std::to_string(hint_length)); |
2742 | 0 | writeString(" ] /O "); |
2743 | 0 | writeString(std::to_string(first_page_object)); |
2744 | 0 | writeString(" /E "); |
2745 | 0 | writeString(std::to_string(part6_end_offset + hint_length)); |
2746 | 0 | writeString(" /N "); |
2747 | 0 | writeString(std::to_string(npages)); |
2748 | 0 | writeString(" /T "); |
2749 | 0 | writeString(std::to_string(space_before_zero + hint_length)); |
2750 | 0 | } |
2751 | 0 | writeString(" >>"); |
2752 | 0 | closeObject(lindict_id); |
2753 | 0 | static int const pad = 200; |
2754 | 0 | writePad(QIntC::to_size(pos - m->pipeline->getCount() + pad)); |
2755 | 0 | writeString("\n"); |
2756 | | |
2757 | | // If the user supplied any additional header text, write it here after the linearization |
2758 | | // parameter dictionary. |
2759 | 0 | writeString(m->extra_header_text); |
2760 | | |
2761 | | // Part 3: first page cross reference table and trailer. |
2762 | |
|
2763 | 0 | qpdf_offset_t first_xref_offset = m->pipeline->getCount(); |
2764 | 0 | qpdf_offset_t hint_offset = 0; |
2765 | 0 | if (pass == 2) { |
2766 | 0 | hint_offset = m->new_obj[hint_id].xref.getOffset(); |
2767 | 0 | } |
2768 | 0 | if (need_xref_stream) { |
2769 | | // Must pad here too. |
2770 | 0 | if (pass == 1) { |
2771 | | // Set first_half_max_obj_offset to a value large enough to force four bytes to be |
2772 | | // reserved for each file offset. This would provide adequate space for the xref |
2773 | | // stream as long as the last object in page 1 starts with in the first 4 GB of the |
2774 | | // file, which is extremely likely. In the second pass, we will know the actual |
2775 | | // value for this, but it's okay if it's smaller. |
2776 | 0 | first_half_max_obj_offset = 1 << 25; |
2777 | 0 | } |
2778 | 0 | pos = m->pipeline->getCount(); |
2779 | 0 | writeXRefStream( |
2780 | 0 | first_half_xref, |
2781 | 0 | first_half_end, |
2782 | 0 | first_half_max_obj_offset, |
2783 | 0 | t_lin_first, |
2784 | 0 | first_half_start, |
2785 | 0 | first_half_end, |
2786 | 0 | first_trailer_size, |
2787 | 0 | hint_length + second_xref_offset, |
2788 | 0 | hint_id, |
2789 | 0 | hint_offset, |
2790 | 0 | hint_length, |
2791 | 0 | (pass == 1), |
2792 | 0 | pass); |
2793 | 0 | qpdf_offset_t endpos = m->pipeline->getCount(); |
2794 | 0 | if (pass == 1) { |
2795 | | // Pad so we have enough room for the real xref stream. |
2796 | 0 | writePad(calculateXrefStreamPadding(endpos - pos)); |
2797 | 0 | first_xref_end = m->pipeline->getCount(); |
2798 | 0 | } else { |
2799 | | // Pad so that the next object starts at the same place as in pass 1. |
2800 | 0 | writePad(QIntC::to_size(first_xref_end - endpos)); |
2801 | |
|
2802 | 0 | if (m->pipeline->getCount() != first_xref_end) { |
2803 | 0 | throw std::logic_error( |
2804 | 0 | "insufficient padding for first pass xref stream; " |
2805 | 0 | "first_xref_end=" + |
2806 | 0 | std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos)); |
2807 | 0 | } |
2808 | 0 | } |
2809 | 0 | writeString("\n"); |
2810 | 0 | } else { |
2811 | 0 | writeXRefTable( |
2812 | 0 | t_lin_first, |
2813 | 0 | first_half_start, |
2814 | 0 | first_half_end, |
2815 | 0 | first_trailer_size, |
2816 | 0 | hint_length + second_xref_offset, |
2817 | 0 | (pass == 1), |
2818 | 0 | hint_id, |
2819 | 0 | hint_offset, |
2820 | 0 | hint_length, |
2821 | 0 | pass); |
2822 | 0 | writeString("startxref\n0\n%%EOF\n"); |
2823 | 0 | } |
2824 | | |
2825 | | // Parts 4 through 9 |
2826 | | |
2827 | 0 | for (auto const& cur_object: m->object_queue) { |
2828 | 0 | if (cur_object.getObjectID() == part6_end_marker) { |
2829 | 0 | first_half_max_obj_offset = m->pipeline->getCount(); |
2830 | 0 | } |
2831 | 0 | writeObject(cur_object); |
2832 | 0 | if (cur_object.getObjectID() == part4_end_marker) { |
2833 | 0 | if (m->encryption) { |
2834 | 0 | writeEncryptionDictionary(); |
2835 | 0 | } |
2836 | 0 | if (pass == 1) { |
2837 | 0 | m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount()); |
2838 | 0 | } else { |
2839 | | // Part 5: hint stream |
2840 | 0 | writeString(hint_buffer); |
2841 | 0 | } |
2842 | 0 | } |
2843 | 0 | if (cur_object.getObjectID() == part6_end_marker) { |
2844 | 0 | part6_end_offset = m->pipeline->getCount(); |
2845 | 0 | } |
2846 | 0 | } |
2847 | | |
2848 | | // Part 10: overflow hint stream -- not used |
2849 | | |
2850 | | // Part 11: main cross reference table and trailer |
2851 | |
|
2852 | 0 | second_xref_offset = m->pipeline->getCount(); |
2853 | 0 | if (need_xref_stream) { |
2854 | 0 | pos = m->pipeline->getCount(); |
2855 | 0 | space_before_zero = writeXRefStream( |
2856 | 0 | second_half_xref, |
2857 | 0 | second_half_end, |
2858 | 0 | second_xref_offset, |
2859 | 0 | t_lin_second, |
2860 | 0 | 0, |
2861 | 0 | second_half_end, |
2862 | 0 | second_trailer_size, |
2863 | 0 | 0, |
2864 | 0 | 0, |
2865 | 0 | 0, |
2866 | 0 | 0, |
2867 | 0 | (pass == 1), |
2868 | 0 | pass); |
2869 | 0 | qpdf_offset_t endpos = m->pipeline->getCount(); |
2870 | |
|
2871 | 0 | if (pass == 1) { |
2872 | | // Pad so we have enough room for the real xref stream. See comments for previous |
2873 | | // xref stream on how we calculate the padding. |
2874 | 0 | writePad(calculateXrefStreamPadding(endpos - pos)); |
2875 | 0 | writeString("\n"); |
2876 | 0 | second_xref_end = m->pipeline->getCount(); |
2877 | 0 | } else { |
2878 | | // Make the file size the same. |
2879 | 0 | writePad( |
2880 | 0 | QIntC::to_size(second_xref_end + hint_length - 1 - m->pipeline->getCount())); |
2881 | 0 | writeString("\n"); |
2882 | | |
2883 | | // If this assertion fails, maybe we didn't have enough padding above. |
2884 | 0 | if (m->pipeline->getCount() != second_xref_end + hint_length) { |
2885 | 0 | throw std::logic_error( |
2886 | 0 | "count mismatch after xref stream; possible insufficient padding?"); |
2887 | 0 | } |
2888 | 0 | } |
2889 | 0 | } else { |
2890 | 0 | space_before_zero = writeXRefTable( |
2891 | 0 | t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass); |
2892 | 0 | } |
2893 | 0 | writeString("startxref\n"); |
2894 | 0 | writeString(std::to_string(first_xref_offset)); |
2895 | 0 | writeString("\n%%EOF\n"); |
2896 | |
|
2897 | 0 | if (pass == 1) { |
2898 | 0 | if (m->deterministic_id) { |
2899 | 0 | QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1); |
2900 | 0 | computeDeterministicIDData(); |
2901 | 0 | pp_md5 = nullptr; |
2902 | 0 | qpdf_assert_debug(m->md5_pipeline == nullptr); |
2903 | 0 | } |
2904 | | |
2905 | | // Close first pass pipeline |
2906 | 0 | file_size = m->pipeline->getCount(); |
2907 | 0 | pp_pass1 = nullptr; |
2908 | | |
2909 | | // Save hint offset since it will be set to zero by calling openObject. |
2910 | 0 | qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset(); |
2911 | | |
2912 | | // Write hint stream to a buffer |
2913 | 0 | { |
2914 | 0 | PipelinePopper pp_hint(this); |
2915 | 0 | activatePipelineStack(pp_hint, hint_buffer); |
2916 | 0 | writeHintStream(hint_id); |
2917 | 0 | } |
2918 | 0 | hint_length = QIntC::to_offset(hint_buffer.size()); |
2919 | | |
2920 | | // Restore hint offset |
2921 | 0 | m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1); |
2922 | 0 | if (lin_pass1_file) { |
2923 | | // Write some debugging information |
2924 | 0 | fprintf( |
2925 | 0 | lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str()); |
2926 | 0 | fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str()); |
2927 | 0 | fprintf( |
2928 | 0 | lin_pass1_file, |
2929 | 0 | "%% second_xref_offset=%s\n", |
2930 | 0 | std::to_string(second_xref_offset).c_str()); |
2931 | 0 | fprintf( |
2932 | 0 | lin_pass1_file, |
2933 | 0 | "%% second_xref_end=%s\n", |
2934 | 0 | std::to_string(second_xref_end).c_str()); |
2935 | 0 | fclose(lin_pass1_file); |
2936 | 0 | lin_pass1_file = nullptr; |
2937 | 0 | } |
2938 | 0 | } |
2939 | 0 | } |
2940 | 0 | } |
2941 | | |
2942 | | void |
2943 | | QPDFWriter::enqueueObjectsStandard() |
2944 | 7.63k | { |
2945 | 7.63k | if (m->preserve_unreferenced_objects) { |
2946 | 0 | QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard"); |
2947 | 0 | for (auto const& oh: m->pdf.getAllObjects()) { |
2948 | 0 | enqueueObject(oh); |
2949 | 0 | } |
2950 | 0 | } |
2951 | | |
2952 | | // Put root first on queue. |
2953 | 7.63k | QPDFObjectHandle trailer = getTrimmedTrailer(); |
2954 | 7.63k | enqueueObject(trailer.getKey("/Root")); |
2955 | | |
2956 | | // Next place any other objects referenced from the trailer dictionary into the queue, handling |
2957 | | // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op. |
2958 | 16.8k | for (auto& item: trailer.as_dictionary()) { |
2959 | 16.8k | if (!item.second.null()) { |
2960 | 14.0k | enqueueObject(item.second); |
2961 | 14.0k | } |
2962 | 16.8k | } |
2963 | 7.63k | } |
2964 | | |
2965 | | void |
2966 | | QPDFWriter::enqueueObjectsPCLm() |
2967 | 0 | { |
2968 | | // Image transform stream content for page strip images. Each of this new stream has to come |
2969 | | // after every page image strip written in the pclm file. |
2970 | 0 | std::string image_transform_content = "q /image Do Q\n"; |
2971 | | |
2972 | | // enqueue all pages first |
2973 | 0 | std::vector<QPDFObjectHandle> all = m->pdf.getAllPages(); |
2974 | 0 | for (auto& page: all) { |
2975 | | // enqueue page |
2976 | 0 | enqueueObject(page); |
2977 | | |
2978 | | // enqueue page contents stream |
2979 | 0 | enqueueObject(page.getKey("/Contents")); |
2980 | | |
2981 | | // enqueue all the strips for each page |
2982 | 0 | QPDFObjectHandle strips = page.getKey("/Resources").getKey("/XObject"); |
2983 | 0 | for (auto& image: strips.as_dictionary()) { |
2984 | 0 | if (!image.second.null()) { |
2985 | 0 | enqueueObject(image.second); |
2986 | 0 | enqueueObject(QPDFObjectHandle::newStream(&m->pdf, image_transform_content)); |
2987 | 0 | } |
2988 | 0 | } |
2989 | 0 | } |
2990 | | |
2991 | | // Put root in queue. |
2992 | 0 | QPDFObjectHandle trailer = getTrimmedTrailer(); |
2993 | 0 | enqueueObject(trailer.getKey("/Root")); |
2994 | 0 | } |
2995 | | |
2996 | | void |
2997 | | QPDFWriter::indicateProgress(bool decrement, bool finished) |
2998 | 87.9k | { |
2999 | 87.9k | if (decrement) { |
3000 | 0 | --m->events_seen; |
3001 | 0 | return; |
3002 | 0 | } |
3003 | | |
3004 | 87.9k | ++m->events_seen; |
3005 | | |
3006 | 87.9k | if (!m->progress_reporter.get()) { |
3007 | 87.9k | return; |
3008 | 87.9k | } |
3009 | | |
3010 | 0 | if (finished || (m->events_seen >= m->next_progress_report)) { |
3011 | 0 | int percentage = |
3012 | 0 | (finished ? 100 |
3013 | 0 | : m->next_progress_report == 0 |
3014 | 0 | ? 0 |
3015 | 0 | : std::min(99, 1 + ((100 * m->events_seen) / m->events_expected))); |
3016 | 0 | m->progress_reporter->reportProgress(percentage); |
3017 | 0 | } |
3018 | 0 | int increment = std::max(1, (m->events_expected / 100)); |
3019 | 0 | while (m->events_seen >= m->next_progress_report) { |
3020 | 0 | m->next_progress_report += increment; |
3021 | 0 | } |
3022 | 0 | } |
3023 | | |
3024 | | void |
3025 | | QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr) |
3026 | 0 | { |
3027 | 0 | m->progress_reporter = pr; |
3028 | 0 | } |
3029 | | |
3030 | | void |
3031 | | QPDFWriter::writeStandard() |
3032 | 7.63k | { |
3033 | 7.63k | auto pp_md5 = PipelinePopper(this); |
3034 | 7.63k | if (m->deterministic_id) { |
3035 | 0 | pushMD5Pipeline(pp_md5); |
3036 | 0 | } |
3037 | | |
3038 | | // Start writing |
3039 | | |
3040 | 7.63k | writeHeader(); |
3041 | 7.63k | writeString(m->extra_header_text); |
3042 | | |
3043 | 7.63k | if (m->pclm) { |
3044 | 0 | enqueueObjectsPCLm(); |
3045 | 7.63k | } else { |
3046 | 7.63k | enqueueObjectsStandard(); |
3047 | 7.63k | } |
3048 | | |
3049 | | // Now start walking queue, outputting each object. |
3050 | 88.0k | while (m->object_queue_front < m->object_queue.size()) { |
3051 | 80.4k | QPDFObjectHandle cur_object = m->object_queue.at(m->object_queue_front); |
3052 | 80.4k | ++m->object_queue_front; |
3053 | 80.4k | writeObject(cur_object); |
3054 | 80.4k | } |
3055 | | |
3056 | | // Write out the encryption dictionary, if any |
3057 | 7.63k | if (m->encryption) { |
3058 | 7.50k | writeEncryptionDictionary(); |
3059 | 7.50k | } |
3060 | | |
3061 | | // Now write out xref. next_objid is now the number of objects. |
3062 | 7.63k | qpdf_offset_t xref_offset = m->pipeline->getCount(); |
3063 | 7.63k | if (m->object_stream_to_objects.empty()) { |
3064 | | // Write regular cross-reference table |
3065 | 7.50k | writeXRefTable(t_normal, 0, m->next_objid - 1, m->next_objid); |
3066 | 7.50k | } else { |
3067 | | // Write cross-reference stream. |
3068 | 130 | int xref_id = m->next_objid++; |
3069 | 130 | writeXRefStream( |
3070 | 130 | xref_id, xref_id, xref_offset, t_normal, 0, m->next_objid - 1, m->next_objid); |
3071 | 130 | } |
3072 | 7.63k | writeString("startxref\n"); |
3073 | 7.63k | writeString(std::to_string(xref_offset)); |
3074 | 7.63k | writeString("\n%%EOF\n"); |
3075 | | |
3076 | 7.63k | if (m->deterministic_id) { |
3077 | 0 | QTC::TC( |
3078 | 0 | "qpdf", |
3079 | 0 | "QPDFWriter standard deterministic ID", |
3080 | 0 | m->object_stream_to_objects.empty() ? 0 : 1); |
3081 | 0 | } |
3082 | 7.63k | } |