/src/qpdf/libqpdf/QPDFWriter.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/assert_debug.h> |
2 | | |
3 | | #include <qpdf/qpdf-config.h> // include early for large file support |
4 | | |
5 | | #include <qpdf/QPDFWriter_private.hh> |
6 | | |
7 | | #include <qpdf/MD5.hh> |
8 | | #include <qpdf/Pl_AES_PDF.hh> |
9 | | #include <qpdf/Pl_Flate.hh> |
10 | | #include <qpdf/Pl_MD5.hh> |
11 | | #include <qpdf/Pl_PNGFilter.hh> |
12 | | #include <qpdf/Pl_RC4.hh> |
13 | | #include <qpdf/Pl_StdioFile.hh> |
14 | | #include <qpdf/Pl_String.hh> |
15 | | #include <qpdf/QIntC.hh> |
16 | | #include <qpdf/QPDFObjectHandle_private.hh> |
17 | | #include <qpdf/QPDFObject_private.hh> |
18 | | #include <qpdf/QPDF_private.hh> |
19 | | #include <qpdf/QTC.hh> |
20 | | #include <qpdf/QUtil.hh> |
21 | | #include <qpdf/RC4.hh> |
22 | | #include <qpdf/Util.hh> |
23 | | |
24 | | #include <algorithm> |
25 | | #include <cstdlib> |
26 | | #include <stdexcept> |
27 | | |
28 | | using namespace std::literals; |
29 | | using namespace qpdf; |
30 | | |
31 | | QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default) |
32 | 0 | { |
33 | | // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
34 | 0 | } |
35 | | |
36 | | QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) : |
37 | 0 | handler(handler) |
38 | 0 | { |
39 | 0 | } |
40 | | |
41 | | QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT |
42 | | // (modernize-use-equals-default) |
43 | 0 | { |
44 | | // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
45 | 0 | } |
46 | | |
47 | | void |
48 | | QPDFWriter::FunctionProgressReporter::reportProgress(int progress) |
49 | 0 | { |
50 | 0 | handler(progress); |
51 | 0 | } |
52 | | |
53 | | namespace |
54 | | { |
55 | | class Pl_stack |
56 | | { |
57 | | // A pipeline Popper is normally returned by Pl_stack::activate, or, if necessary, a |
58 | | // reference to a Popper instance can be passed into activate. When the Popper goes out of |
59 | | // scope, the pipeline stack is popped. This causes finish to be called on the current |
60 | | // pipeline and the pipeline stack to be popped until the top of stack is a previous active |
61 | | // top of stack and restores the pipeline to that point. It deletes any pipelines that it |
62 | | // pops. |
63 | | class Popper |
64 | | { |
65 | | friend class Pl_stack; |
66 | | |
67 | | public: |
68 | | Popper() = default; |
69 | | Popper(Popper const&) = delete; |
70 | | Popper(Popper&& other) noexcept |
71 | 0 | { |
72 | 0 | // For MSVC, default pops the stack |
73 | 0 | if (this != &other) { |
74 | 0 | stack = other.stack; |
75 | 0 | stack_id = other.stack_id; |
76 | 0 | other.stack = nullptr; |
77 | 0 | other.stack_id = 0; |
78 | 0 | }; |
79 | 0 | } |
80 | | Popper& operator=(Popper const&) = delete; |
81 | | Popper& |
82 | | operator=(Popper&& other) noexcept |
83 | 0 | { |
84 | 0 | // For MSVC, default pops the stack |
85 | 0 | if (this != &other) { |
86 | 0 | stack = other.stack; |
87 | 0 | stack_id = other.stack_id; |
88 | 0 | other.stack = nullptr; |
89 | 0 | other.stack_id = 0; |
90 | 0 | }; |
91 | 0 | return *this; |
92 | 0 | } |
93 | | |
94 | | ~Popper(); |
95 | | |
96 | | // Manually pop pipeline from the pipeline stack. |
97 | | void pop(); |
98 | | |
99 | | private: |
100 | | Popper(Pl_stack& stack) : |
101 | 167k | stack(&stack) |
102 | 167k | { |
103 | 167k | } |
104 | | |
105 | | Pl_stack* stack{nullptr}; |
106 | | unsigned long stack_id{0}; |
107 | | }; |
108 | | |
109 | | public: |
110 | | Pl_stack(pl::Count*& top) : |
111 | 9.75k | top(top) |
112 | 9.75k | { |
113 | 9.75k | } |
114 | | |
115 | | Popper |
116 | | popper() |
117 | 14.1k | { |
118 | 14.1k | return {*this}; |
119 | 14.1k | } |
120 | | |
121 | | void |
122 | | initialize(Pipeline* p) |
123 | 9.75k | { |
124 | 9.75k | auto c = std::make_unique<pl::Count>(++last_id, p); |
125 | 9.75k | top = c.get(); |
126 | 9.75k | stack.emplace_back(std::move(c)); |
127 | 9.75k | } |
128 | | |
129 | | Popper |
130 | | activate(std::string& str) |
131 | 106k | { |
132 | 106k | Popper pp{*this}; |
133 | 106k | activate(pp, str); |
134 | 106k | return pp; |
135 | 106k | } |
136 | | |
137 | | void |
138 | | activate(Popper& pp, std::string& str) |
139 | 106k | { |
140 | 106k | activate(pp, false, &str, nullptr); |
141 | 106k | } |
142 | | |
143 | | void |
144 | | activate(Popper& pp, std::unique_ptr<Pipeline> next) |
145 | 0 | { |
146 | 0 | count_buffer.clear(); |
147 | 0 | activate(pp, false, &count_buffer, std::move(next)); |
148 | 0 | } |
149 | | |
150 | | Popper |
151 | | activate( |
152 | | bool discard = false, |
153 | | std::string* str = nullptr, |
154 | | std::unique_ptr<Pipeline> next = nullptr) |
155 | 47.1k | { |
156 | 47.1k | Popper pp{*this}; |
157 | 47.1k | activate(pp, discard, str, std::move(next)); |
158 | 47.1k | return pp; |
159 | 47.1k | } |
160 | | |
161 | | void |
162 | | activate( |
163 | | Popper& pp, |
164 | | bool discard = false, |
165 | | std::string* str = nullptr, |
166 | | std::unique_ptr<Pipeline> next = nullptr) |
167 | 160k | { |
168 | 160k | std::unique_ptr<pl::Count> c; |
169 | 160k | if (next) { |
170 | 0 | c = std::make_unique<pl::Count>(++last_id, count_buffer, std::move(next)); |
171 | 160k | } else if (discard) { |
172 | 54.2k | c = std::make_unique<pl::Count>(++last_id, nullptr); |
173 | 106k | } else if (!str) { |
174 | 0 | c = std::make_unique<pl::Count>(++last_id, top); |
175 | 106k | } else { |
176 | 106k | c = std::make_unique<pl::Count>(++last_id, *str); |
177 | 106k | } |
178 | 160k | pp.stack_id = last_id; |
179 | 160k | top = c.get(); |
180 | 160k | stack.emplace_back(std::move(c)); |
181 | 160k | } |
182 | | void |
183 | | activate_md5(Popper& pp) |
184 | 7.07k | { |
185 | 7.07k | qpdf_assert_debug(!md5_pipeline); |
186 | 7.07k | qpdf_assert_debug(md5_id == 0); |
187 | 7.07k | qpdf_assert_debug(top->getCount() == 0); |
188 | 7.07k | md5_pipeline = std::make_unique<Pl_MD5>("qpdf md5", top); |
189 | 7.07k | md5_pipeline->persistAcrossFinish(true); |
190 | | // Special case code in pop clears m->md5_pipeline upon deletion. |
191 | 7.07k | auto c = std::make_unique<pl::Count>(++last_id, md5_pipeline.get()); |
192 | 7.07k | pp.stack_id = last_id; |
193 | 7.07k | md5_id = last_id; |
194 | 7.07k | top = c.get(); |
195 | 7.07k | stack.emplace_back(std::move(c)); |
196 | 7.07k | } |
197 | | |
198 | | // Return the hex digest and disable the MD5 pipeline. |
199 | | std::string |
200 | | hex_digest() |
201 | 6.55k | { |
202 | 6.55k | qpdf_assert_debug(md5_pipeline); |
203 | 6.55k | auto digest = md5_pipeline->getHexDigest(); |
204 | 6.55k | md5_pipeline->enable(false); |
205 | 6.55k | return digest; |
206 | 6.55k | } |
207 | | |
208 | | void |
209 | | clear_buffer() |
210 | 0 | { |
211 | 0 | count_buffer.clear(); |
212 | 0 | } |
213 | | |
214 | | private: |
215 | | void |
216 | | pop(unsigned long stack_id) |
217 | 167k | { |
218 | 167k | if (!stack_id) { |
219 | 0 | return; |
220 | 0 | } |
221 | 167k | qpdf_assert_debug(stack.size() >= 2); |
222 | 167k | top->finish(); |
223 | 167k | qpdf_assert_debug(stack.back().get() == top); |
224 | | // It used to be possible for this assertion to fail if writeLinearized exits by |
225 | | // exception when deterministic ID. There are no longer any cases in which two |
226 | | // dynamically allocated pipeline Popper objects ever exist at the same time, so the |
227 | | // assertion will fail if they get popped out of order from automatic destruction. |
228 | 167k | qpdf_assert_debug(top->id() == stack_id); |
229 | 167k | if (stack_id == md5_id) { |
230 | 7.07k | md5_pipeline = nullptr; |
231 | 7.07k | md5_id = 0; |
232 | 7.07k | } |
233 | 167k | stack.pop_back(); |
234 | 167k | top = stack.back().get(); |
235 | 167k | } |
236 | | |
237 | | std::vector<std::unique_ptr<pl::Count>> stack; |
238 | | pl::Count*& top; |
239 | | std::unique_ptr<Pl_MD5> md5_pipeline{nullptr}; |
240 | | unsigned long last_id{0}; |
241 | | unsigned long md5_id{0}; |
242 | | std::string count_buffer; |
243 | | }; |
244 | | } // namespace |
245 | | |
246 | | Pl_stack::Popper::~Popper() |
247 | 167k | { |
248 | 167k | if (stack) { |
249 | 154k | stack->pop(stack_id); |
250 | 154k | } |
251 | 167k | } |
252 | | |
253 | | void |
254 | | Pl_stack::Popper::pop() |
255 | 13.1k | { |
256 | 13.1k | if (stack) { |
257 | 13.1k | stack->pop(stack_id); |
258 | 13.1k | } |
259 | 13.1k | stack_id = 0; |
260 | 13.1k | stack = nullptr; |
261 | 13.1k | } |
262 | | |
263 | | class QPDFWriter::Members |
264 | | { |
265 | | friend class QPDFWriter; |
266 | | |
267 | | public: |
268 | | ~Members(); |
269 | | |
270 | | private: |
271 | | Members(QPDF& pdf); |
272 | | Members(Members const&) = delete; |
273 | | |
274 | | QPDF& pdf; |
275 | | QPDFObjGen root_og{-1, 0}; |
276 | | char const* filename{"unspecified"}; |
277 | | FILE* file{nullptr}; |
278 | | bool close_file{false}; |
279 | | std::unique_ptr<Pl_Buffer> buffer_pipeline{nullptr}; |
280 | | Buffer* output_buffer{nullptr}; |
281 | | bool normalize_content_set{false}; |
282 | | bool normalize_content{false}; |
283 | | bool compress_streams{true}; |
284 | | bool compress_streams_set{false}; |
285 | | qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_generalized}; |
286 | | bool stream_decode_level_set{false}; |
287 | | bool recompress_flate{false}; |
288 | | bool qdf_mode{false}; |
289 | | bool preserve_unreferenced_objects{false}; |
290 | | bool newline_before_endstream{false}; |
291 | | bool static_id{false}; |
292 | | bool suppress_original_object_ids{false}; |
293 | | bool direct_stream_lengths{true}; |
294 | | bool preserve_encryption{true}; |
295 | | bool linearized{false}; |
296 | | bool pclm{false}; |
297 | | qpdf_object_stream_e object_stream_mode{qpdf_o_preserve}; |
298 | | |
299 | | std::unique_ptr<QPDF::EncryptionData> encryption; |
300 | | std::string encryption_key; |
301 | | bool encrypt_use_aes{false}; |
302 | | |
303 | | std::string id1; // for /ID key of |
304 | | std::string id2; // trailer dictionary |
305 | | std::string final_pdf_version; |
306 | | int final_extension_level{0}; |
307 | | std::string min_pdf_version; |
308 | | int min_extension_level{0}; |
309 | | std::string forced_pdf_version; |
310 | | int forced_extension_level{0}; |
311 | | std::string extra_header_text; |
312 | | int encryption_dict_objid{0}; |
313 | | std::string cur_data_key; |
314 | | std::unique_ptr<Pipeline> file_pl; |
315 | | qpdf::pl::Count* pipeline{nullptr}; |
316 | | std::vector<QPDFObjectHandle> object_queue; |
317 | | size_t object_queue_front{0}; |
318 | | QPDFWriter::ObjTable obj; |
319 | | QPDFWriter::NewObjTable new_obj; |
320 | | int next_objid{1}; |
321 | | int cur_stream_length_id{0}; |
322 | | size_t cur_stream_length{0}; |
323 | | bool added_newline{false}; |
324 | | size_t max_ostream_index{0}; |
325 | | std::set<QPDFObjGen> normalized_streams; |
326 | | std::map<QPDFObjGen, int> page_object_to_seq; |
327 | | std::map<QPDFObjGen, int> contents_to_page_seq; |
328 | | std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects; |
329 | | Pl_stack pipeline_stack; |
330 | | bool deterministic_id{false}; |
331 | | std::string deterministic_id_data; |
332 | | bool did_write_setup{false}; |
333 | | |
334 | | // For linearization only |
335 | | std::string lin_pass1_filename; |
336 | | |
337 | | // For progress reporting |
338 | | std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter; |
339 | | int events_expected{0}; |
340 | | int events_seen{0}; |
341 | | int next_progress_report{0}; |
342 | | }; |
343 | | |
344 | | QPDFWriter::Members::Members(QPDF& pdf) : |
345 | 9.87k | pdf(pdf), |
346 | 9.87k | root_og(pdf.getRoot().getObjGen().isIndirect() ? pdf.getRoot().getObjGen() : QPDFObjGen(-1, 0)), |
347 | 9.87k | pipeline_stack(pipeline) |
348 | 9.87k | { |
349 | 9.87k | } |
350 | | |
351 | | QPDFWriter::Members::~Members() |
352 | 9.75k | { |
353 | 9.75k | if (file && close_file) { |
354 | 0 | fclose(file); |
355 | 0 | } |
356 | 9.75k | delete output_buffer; |
357 | 9.75k | } |
358 | | |
359 | | QPDFWriter::QPDFWriter(QPDF& pdf) : |
360 | 9.87k | m(new Members(pdf)) |
361 | 9.87k | { |
362 | 9.87k | } |
363 | | |
364 | | QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) : |
365 | 0 | m(new Members(pdf)) |
366 | 0 | { |
367 | 0 | setOutputFilename(filename); |
368 | 0 | } |
369 | | |
370 | | QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) : |
371 | 0 | m(new Members(pdf)) |
372 | 0 | { |
373 | 0 | setOutputFile(description, file, close_file); |
374 | 0 | } |
375 | | |
376 | | void |
377 | | QPDFWriter::setOutputFilename(char const* filename) |
378 | 0 | { |
379 | 0 | char const* description = filename; |
380 | 0 | FILE* f = nullptr; |
381 | 0 | bool close_file = false; |
382 | 0 | if (filename == nullptr) { |
383 | 0 | description = "standard output"; |
384 | 0 | QTC::TC("qpdf", "QPDFWriter write to stdout"); |
385 | 0 | f = stdout; |
386 | 0 | QUtil::binary_stdout(); |
387 | 0 | } else { |
388 | 0 | QTC::TC("qpdf", "QPDFWriter write to file"); |
389 | 0 | f = QUtil::safe_fopen(filename, "wb+"); |
390 | 0 | close_file = true; |
391 | 0 | } |
392 | 0 | setOutputFile(description, f, close_file); |
393 | 0 | } |
394 | | |
395 | | void |
396 | | QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file) |
397 | 0 | { |
398 | 0 | m->filename = description; |
399 | 0 | m->file = file; |
400 | 0 | m->close_file = close_file; |
401 | 0 | m->file_pl = std::make_unique<Pl_StdioFile>("qpdf output", file); |
402 | 0 | m->pipeline_stack.initialize(m->file_pl.get()); |
403 | 0 | } |
404 | | |
405 | | void |
406 | | QPDFWriter::setOutputMemory() |
407 | 0 | { |
408 | 0 | m->filename = "memory buffer"; |
409 | 0 | m->buffer_pipeline = std::make_unique<Pl_Buffer>("qpdf output"); |
410 | 0 | m->pipeline_stack.initialize(m->buffer_pipeline.get()); |
411 | 0 | } |
412 | | |
413 | | Buffer* |
414 | | QPDFWriter::getBuffer() |
415 | 0 | { |
416 | 0 | Buffer* result = m->output_buffer; |
417 | 0 | m->output_buffer = nullptr; |
418 | 0 | return result; |
419 | 0 | } |
420 | | |
421 | | std::shared_ptr<Buffer> |
422 | | QPDFWriter::getBufferSharedPointer() |
423 | 0 | { |
424 | 0 | return std::shared_ptr<Buffer>(getBuffer()); |
425 | 0 | } |
426 | | |
427 | | void |
428 | | QPDFWriter::setOutputPipeline(Pipeline* p) |
429 | 9.75k | { |
430 | 9.75k | m->filename = "custom pipeline"; |
431 | 9.75k | m->pipeline_stack.initialize(p); |
432 | 9.75k | } |
433 | | |
434 | | void |
435 | | QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode) |
436 | 9.75k | { |
437 | 9.75k | m->object_stream_mode = mode; |
438 | 9.75k | } |
439 | | |
440 | | void |
441 | | QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode) |
442 | 0 | { |
443 | 0 | switch (mode) { |
444 | 0 | case qpdf_s_uncompress: |
445 | 0 | m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level); |
446 | 0 | m->compress_streams = false; |
447 | 0 | break; |
448 | | |
449 | 0 | case qpdf_s_preserve: |
450 | 0 | m->stream_decode_level = qpdf_dl_none; |
451 | 0 | m->compress_streams = false; |
452 | 0 | break; |
453 | | |
454 | 0 | case qpdf_s_compress: |
455 | 0 | m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level); |
456 | 0 | m->compress_streams = true; |
457 | 0 | break; |
458 | 0 | } |
459 | 0 | m->stream_decode_level_set = true; |
460 | 0 | m->compress_streams_set = true; |
461 | 0 | } |
462 | | |
463 | | void |
464 | | QPDFWriter::setCompressStreams(bool val) |
465 | 0 | { |
466 | 0 | m->compress_streams = val; |
467 | 0 | m->compress_streams_set = true; |
468 | 0 | } |
469 | | |
470 | | void |
471 | | QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val) |
472 | 9.75k | { |
473 | 9.75k | m->stream_decode_level = val; |
474 | 9.75k | m->stream_decode_level_set = true; |
475 | 9.75k | } |
476 | | |
477 | | void |
478 | | QPDFWriter::setRecompressFlate(bool val) |
479 | 0 | { |
480 | 0 | m->recompress_flate = val; |
481 | 0 | } |
482 | | |
483 | | void |
484 | | QPDFWriter::setContentNormalization(bool val) |
485 | 0 | { |
486 | 0 | m->normalize_content_set = true; |
487 | 0 | m->normalize_content = val; |
488 | 0 | } |
489 | | |
490 | | void |
491 | | QPDFWriter::setQDFMode(bool val) |
492 | 0 | { |
493 | 0 | m->qdf_mode = val; |
494 | 0 | } |
495 | | |
496 | | void |
497 | | QPDFWriter::setPreserveUnreferencedObjects(bool val) |
498 | 0 | { |
499 | 0 | m->preserve_unreferenced_objects = val; |
500 | 0 | } |
501 | | |
502 | | void |
503 | | QPDFWriter::setNewlineBeforeEndstream(bool val) |
504 | 0 | { |
505 | 0 | m->newline_before_endstream = val; |
506 | 0 | } |
507 | | |
508 | | void |
509 | | QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level) |
510 | 18.4k | { |
511 | 18.4k | bool set_version = false; |
512 | 18.4k | bool set_extension_level = false; |
513 | 18.4k | if (m->min_pdf_version.empty()) { |
514 | 9.57k | set_version = true; |
515 | 9.57k | set_extension_level = true; |
516 | 9.57k | } else { |
517 | 8.87k | int old_major = 0; |
518 | 8.87k | int old_minor = 0; |
519 | 8.87k | int min_major = 0; |
520 | 8.87k | int min_minor = 0; |
521 | 8.87k | parseVersion(version, old_major, old_minor); |
522 | 8.87k | parseVersion(m->min_pdf_version, min_major, min_minor); |
523 | 8.87k | int compare = compareVersions(old_major, old_minor, min_major, min_minor); |
524 | 8.87k | if (compare > 0) { |
525 | 557 | QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1); |
526 | 557 | set_version = true; |
527 | 557 | set_extension_level = true; |
528 | 8.31k | } else if (compare == 0) { |
529 | 60 | if (extension_level > m->min_extension_level) { |
530 | 1 | QTC::TC("qpdf", "QPDFWriter increasing extension level"); |
531 | 1 | set_extension_level = true; |
532 | 1 | } |
533 | 60 | } |
534 | 8.87k | } |
535 | | |
536 | 18.4k | if (set_version) { |
537 | 10.1k | m->min_pdf_version = version; |
538 | 10.1k | } |
539 | 18.4k | if (set_extension_level) { |
540 | 10.1k | m->min_extension_level = extension_level; |
541 | 10.1k | } |
542 | 18.4k | } |
543 | | |
544 | | void |
545 | | QPDFWriter::setMinimumPDFVersion(PDFVersion const& v) |
546 | 0 | { |
547 | 0 | std::string version; |
548 | 0 | int extension_level; |
549 | 0 | v.getVersion(version, extension_level); |
550 | 0 | setMinimumPDFVersion(version, extension_level); |
551 | 0 | } |
552 | | |
553 | | void |
554 | | QPDFWriter::forcePDFVersion(std::string const& version, int extension_level) |
555 | 0 | { |
556 | 0 | m->forced_pdf_version = version; |
557 | 0 | m->forced_extension_level = extension_level; |
558 | 0 | } |
559 | | |
560 | | void |
561 | | QPDFWriter::setExtraHeaderText(std::string const& text) |
562 | 0 | { |
563 | 0 | m->extra_header_text = text; |
564 | 0 | if (!m->extra_header_text.empty() && *m->extra_header_text.rbegin() != '\n') { |
565 | 0 | QTC::TC("qpdf", "QPDFWriter extra header text add newline"); |
566 | 0 | m->extra_header_text += "\n"; |
567 | 0 | } else { |
568 | 0 | QTC::TC("qpdf", "QPDFWriter extra header text no newline"); |
569 | 0 | } |
570 | 0 | } |
571 | | |
572 | | void |
573 | | QPDFWriter::setStaticID(bool val) |
574 | 0 | { |
575 | 0 | m->static_id = val; |
576 | 0 | } |
577 | | |
578 | | void |
579 | | QPDFWriter::setDeterministicID(bool val) |
580 | 9.75k | { |
581 | 9.75k | m->deterministic_id = val; |
582 | 9.75k | } |
583 | | |
584 | | void |
585 | | QPDFWriter::setStaticAesIV(bool val) |
586 | 0 | { |
587 | 0 | if (val) { |
588 | 0 | Pl_AES_PDF::useStaticIV(); |
589 | 0 | } |
590 | 0 | } |
591 | | |
592 | | void |
593 | | QPDFWriter::setSuppressOriginalObjectIDs(bool val) |
594 | 0 | { |
595 | 0 | m->suppress_original_object_ids = val; |
596 | 0 | } |
597 | | |
598 | | void |
599 | | QPDFWriter::setPreserveEncryption(bool val) |
600 | 0 | { |
601 | 0 | m->preserve_encryption = val; |
602 | 0 | } |
603 | | |
604 | | void |
605 | | QPDFWriter::setLinearization(bool val) |
606 | 9.75k | { |
607 | 9.75k | m->linearized = val; |
608 | 9.75k | if (val) { |
609 | 9.75k | m->pclm = false; |
610 | 9.75k | } |
611 | 9.75k | } |
612 | | |
613 | | void |
614 | | QPDFWriter::setLinearizationPass1Filename(std::string const& filename) |
615 | 0 | { |
616 | 0 | m->lin_pass1_filename = filename; |
617 | 0 | } |
618 | | |
619 | | void |
620 | | QPDFWriter::setPCLm(bool val) |
621 | 0 | { |
622 | 0 | m->pclm = val; |
623 | 0 | if (val) { |
624 | 0 | m->linearized = false; |
625 | 0 | } |
626 | 0 | } |
627 | | |
628 | | void |
629 | | QPDFWriter::setR2EncryptionParametersInsecure( |
630 | | char const* user_password, |
631 | | char const* owner_password, |
632 | | bool allow_print, |
633 | | bool allow_modify, |
634 | | bool allow_extract, |
635 | | bool allow_annotate) |
636 | 0 | { |
637 | 0 | m->encryption = std::make_unique<QPDF::EncryptionData>(1, 2, 5, true); |
638 | 0 | if (!allow_print) { |
639 | 0 | m->encryption->setP(3, false); |
640 | 0 | } |
641 | 0 | if (!allow_modify) { |
642 | 0 | m->encryption->setP(4, false); |
643 | 0 | } |
644 | 0 | if (!allow_extract) { |
645 | 0 | m->encryption->setP(5, false); |
646 | 0 | } |
647 | 0 | if (!allow_annotate) { |
648 | 0 | m->encryption->setP(6, false); |
649 | 0 | } |
650 | 0 | setEncryptionParameters(user_password, owner_password); |
651 | 0 | } |
652 | | |
653 | | void |
654 | | QPDFWriter::setR3EncryptionParametersInsecure( |
655 | | char const* user_password, |
656 | | char const* owner_password, |
657 | | bool allow_accessibility, |
658 | | bool allow_extract, |
659 | | bool allow_assemble, |
660 | | bool allow_annotate_and_form, |
661 | | bool allow_form_filling, |
662 | | bool allow_modify_other, |
663 | | qpdf_r3_print_e print) |
664 | 0 | { |
665 | 0 | m->encryption = std::make_unique<QPDF::EncryptionData>(2, 3, 16, true); |
666 | 0 | interpretR3EncryptionParameters( |
667 | 0 | allow_accessibility, |
668 | 0 | allow_extract, |
669 | 0 | allow_assemble, |
670 | 0 | allow_annotate_and_form, |
671 | 0 | allow_form_filling, |
672 | 0 | allow_modify_other, |
673 | 0 | print, |
674 | 0 | qpdf_r3m_all); |
675 | 0 | setEncryptionParameters(user_password, owner_password); |
676 | 0 | } |
677 | | |
678 | | void |
679 | | QPDFWriter::setR4EncryptionParametersInsecure( |
680 | | char const* user_password, |
681 | | char const* owner_password, |
682 | | bool allow_accessibility, |
683 | | bool allow_extract, |
684 | | bool allow_assemble, |
685 | | bool allow_annotate_and_form, |
686 | | bool allow_form_filling, |
687 | | bool allow_modify_other, |
688 | | qpdf_r3_print_e print, |
689 | | bool encrypt_metadata, |
690 | | bool use_aes) |
691 | 0 | { |
692 | 0 | m->encryption = std::make_unique<QPDF::EncryptionData>(4, 4, 16, encrypt_metadata); |
693 | 0 | m->encrypt_use_aes = use_aes; |
694 | 0 | interpretR3EncryptionParameters( |
695 | 0 | allow_accessibility, |
696 | 0 | allow_extract, |
697 | 0 | allow_assemble, |
698 | 0 | allow_annotate_and_form, |
699 | 0 | allow_form_filling, |
700 | 0 | allow_modify_other, |
701 | 0 | print, |
702 | 0 | qpdf_r3m_all); |
703 | 0 | setEncryptionParameters(user_password, owner_password); |
704 | 0 | } |
705 | | |
706 | | void |
707 | | QPDFWriter::setR5EncryptionParameters( |
708 | | char const* user_password, |
709 | | char const* owner_password, |
710 | | bool allow_accessibility, |
711 | | bool allow_extract, |
712 | | bool allow_assemble, |
713 | | bool allow_annotate_and_form, |
714 | | bool allow_form_filling, |
715 | | bool allow_modify_other, |
716 | | qpdf_r3_print_e print, |
717 | | bool encrypt_metadata) |
718 | 0 | { |
719 | 0 | m->encryption = std::make_unique<QPDF::EncryptionData>(5, 5, 32, encrypt_metadata); |
720 | 0 | m->encrypt_use_aes = true; |
721 | 0 | interpretR3EncryptionParameters( |
722 | 0 | allow_accessibility, |
723 | 0 | allow_extract, |
724 | 0 | allow_assemble, |
725 | 0 | allow_annotate_and_form, |
726 | 0 | allow_form_filling, |
727 | 0 | allow_modify_other, |
728 | 0 | print, |
729 | 0 | qpdf_r3m_all); |
730 | 0 | setEncryptionParameters(user_password, owner_password); |
731 | 0 | } |
732 | | |
733 | | void |
734 | | QPDFWriter::setR6EncryptionParameters( |
735 | | char const* user_password, |
736 | | char const* owner_password, |
737 | | bool allow_accessibility, |
738 | | bool allow_extract, |
739 | | bool allow_assemble, |
740 | | bool allow_annotate_and_form, |
741 | | bool allow_form_filling, |
742 | | bool allow_modify_other, |
743 | | qpdf_r3_print_e print, |
744 | | bool encrypt_metadata) |
745 | 0 | { |
746 | 0 | m->encryption = std::make_unique<QPDF::EncryptionData>(5, 6, 32, encrypt_metadata); |
747 | 0 | interpretR3EncryptionParameters( |
748 | 0 | allow_accessibility, |
749 | 0 | allow_extract, |
750 | 0 | allow_assemble, |
751 | 0 | allow_annotate_and_form, |
752 | 0 | allow_form_filling, |
753 | 0 | allow_modify_other, |
754 | 0 | print, |
755 | 0 | qpdf_r3m_all); |
756 | 0 | m->encrypt_use_aes = true; |
757 | 0 | setEncryptionParameters(user_password, owner_password); |
758 | 0 | } |
759 | | |
760 | | void |
761 | | QPDFWriter::interpretR3EncryptionParameters( |
762 | | bool allow_accessibility, |
763 | | bool allow_extract, |
764 | | bool allow_assemble, |
765 | | bool allow_annotate_and_form, |
766 | | bool allow_form_filling, |
767 | | bool allow_modify_other, |
768 | | qpdf_r3_print_e print, |
769 | | qpdf_r3_modify_e modify) |
770 | 0 | { |
771 | | // Acrobat 5 security options: |
772 | | |
773 | | // Checkboxes: |
774 | | // Enable Content Access for the Visually Impaired |
775 | | // Allow Content Copying and Extraction |
776 | | |
777 | | // Allowed changes menu: |
778 | | // None |
779 | | // Only Document Assembly |
780 | | // Only Form Field Fill-in or Signing |
781 | | // Comment Authoring, Form Field Fill-in or Signing |
782 | | // General Editing, Comment and Form Field Authoring |
783 | | |
784 | | // Allowed printing menu: |
785 | | // None |
786 | | // Low Resolution |
787 | | // Full printing |
788 | | |
789 | | // Meanings of bits in P when R >= 3 |
790 | | // |
791 | | // 3: low-resolution printing |
792 | | // 4: document modification except as controlled by 6, 9, and 11 |
793 | | // 5: extraction |
794 | | // 6: add/modify annotations (comment), fill in forms |
795 | | // if 4+6 are set, also allows modification of form fields |
796 | | // 9: fill in forms even if 6 is clear |
797 | | // 10: accessibility; ignored by readers, should always be set |
798 | | // 11: document assembly even if 4 is clear |
799 | | // 12: high-resolution printing |
800 | 0 | if (!allow_accessibility && m->encryption->getR() <= 3) { |
801 | | // Bit 10 is deprecated and should always be set. This used to mean accessibility. There |
802 | | // is no way to disable accessibility with R > 3. |
803 | 0 | m->encryption->setP(10, false); |
804 | 0 | } |
805 | 0 | if (!allow_extract) { |
806 | 0 | m->encryption->setP(5, false); |
807 | 0 | } |
808 | |
|
809 | 0 | switch (print) { |
810 | 0 | case qpdf_r3p_none: |
811 | 0 | m->encryption->setP(3, false); // any printing |
812 | 0 | [[fallthrough]]; |
813 | 0 | case qpdf_r3p_low: |
814 | 0 | m->encryption->setP(12, false); // high resolution printing |
815 | 0 | [[fallthrough]]; |
816 | 0 | case qpdf_r3p_full: |
817 | 0 | break; |
818 | | // no default so gcc warns for missing cases |
819 | 0 | } |
820 | | |
821 | | // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full |
822 | | // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're |
823 | | // stuck with it. See also allow checks below to control the bits individually. |
824 | | |
825 | | // NOT EXERCISED IN TEST SUITE |
826 | 0 | switch (modify) { |
827 | 0 | case qpdf_r3m_none: |
828 | 0 | m->encryption->setP(11, false); // document assembly |
829 | 0 | [[fallthrough]]; |
830 | 0 | case qpdf_r3m_assembly: |
831 | 0 | m->encryption->setP(9, false); // filling in form fields |
832 | 0 | [[fallthrough]]; |
833 | 0 | case qpdf_r3m_form: |
834 | 0 | m->encryption->setP(6, false); // modify annotations, fill in form fields |
835 | 0 | [[fallthrough]]; |
836 | 0 | case qpdf_r3m_annotate: |
837 | 0 | m->encryption->setP(4, false); // other modifications |
838 | 0 | [[fallthrough]]; |
839 | 0 | case qpdf_r3m_all: |
840 | 0 | break; |
841 | | // no default so gcc warns for missing cases |
842 | 0 | } |
843 | | // END NOT EXERCISED IN TEST SUITE |
844 | | |
845 | 0 | if (!allow_assemble) { |
846 | 0 | m->encryption->setP(11, false); |
847 | 0 | } |
848 | 0 | if (!allow_annotate_and_form) { |
849 | 0 | m->encryption->setP(6, false); |
850 | 0 | } |
851 | 0 | if (!allow_form_filling) { |
852 | 0 | m->encryption->setP(9, false); |
853 | 0 | } |
854 | 0 | if (!allow_modify_other) { |
855 | 0 | m->encryption->setP(4, false); |
856 | 0 | } |
857 | 0 | } |
858 | | |
859 | | void |
860 | | QPDFWriter::setEncryptionParameters(char const* user_password, char const* owner_password) |
861 | 0 | { |
862 | 0 | generateID(true); |
863 | 0 | m->encryption->setId1(m->id1); |
864 | 0 | m->encryption_key = m->encryption->compute_parameters(user_password, owner_password); |
865 | 0 | setEncryptionMinimumVersion(); |
866 | 0 | } |
867 | | |
868 | | void |
869 | | QPDFWriter::copyEncryptionParameters(QPDF& qpdf) |
870 | 9.75k | { |
871 | 9.75k | m->preserve_encryption = false; |
872 | 9.75k | QPDFObjectHandle trailer = qpdf.getTrailer(); |
873 | 9.75k | if (trailer.hasKey("/Encrypt")) { |
874 | 72 | generateID(true); |
875 | 72 | m->id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue(); |
876 | 72 | QPDFObjectHandle encrypt = trailer.getKey("/Encrypt"); |
877 | 72 | int V = encrypt.getKey("/V").getIntValueAsInt(); |
878 | 72 | int key_len = 5; |
879 | 72 | if (V > 1) { |
880 | 0 | key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8; |
881 | 0 | } |
882 | 72 | const bool encrypt_metadata = |
883 | 72 | encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool() |
884 | 72 | ? encrypt.getKey("/EncryptMetadata").getBoolValue() |
885 | 72 | : true; |
886 | 72 | if (V >= 4) { |
887 | | // When copying encryption parameters, use AES even if the original file did not. |
888 | | // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of |
889 | | // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF |
890 | | // all potentially having different values. |
891 | 0 | m->encrypt_use_aes = true; |
892 | 0 | } |
893 | 72 | QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", encrypt_metadata ? 0 : 1); |
894 | 72 | QTC::TC("qpdf", "QPDFWriter copy use_aes", m->encrypt_use_aes ? 0 : 1); |
895 | | |
896 | 72 | m->encryption = std::make_unique<QPDF::EncryptionData>( |
897 | 72 | V, |
898 | 72 | encrypt.getKey("/R").getIntValueAsInt(), |
899 | 72 | key_len, |
900 | 72 | static_cast<int>(encrypt.getKey("/P").getIntValue()), |
901 | 72 | encrypt.getKey("/O").getStringValue(), |
902 | 72 | encrypt.getKey("/U").getStringValue(), |
903 | 72 | V < 5 ? "" : encrypt.getKey("/OE").getStringValue(), |
904 | 72 | V < 5 ? "" : encrypt.getKey("/UE").getStringValue(), |
905 | 72 | V < 5 ? "" : encrypt.getKey("/Perms").getStringValue(), |
906 | 72 | m->id1, // m->id1 == the other file's id1 |
907 | 72 | encrypt_metadata); |
908 | 72 | m->encryption_key = V >= 5 |
909 | 72 | ? qpdf.getEncryptionKey() |
910 | 72 | : m->encryption->compute_encryption_key(qpdf.getPaddedUserPassword()); |
911 | 72 | setEncryptionMinimumVersion(); |
912 | 72 | } |
913 | 9.75k | } |
914 | | |
915 | | void |
916 | | QPDFWriter::disableIncompatibleEncryption(int major, int minor, int extension_level) |
917 | 0 | { |
918 | 0 | if (!m->encryption) { |
919 | 0 | return; |
920 | 0 | } |
921 | 0 | if (compareVersions(major, minor, 1, 3) < 0) { |
922 | 0 | m->encryption = nullptr; |
923 | 0 | return; |
924 | 0 | } |
925 | 0 | int V = m->encryption->getV(); |
926 | 0 | int R = m->encryption->getR(); |
927 | 0 | if (compareVersions(major, minor, 1, 4) < 0) { |
928 | 0 | if (V > 1 || R > 2) { |
929 | 0 | m->encryption = nullptr; |
930 | 0 | } |
931 | 0 | } else if (compareVersions(major, minor, 1, 5) < 0) { |
932 | 0 | if (V > 2 || R > 3) { |
933 | 0 | m->encryption = nullptr; |
934 | 0 | } |
935 | 0 | } else if (compareVersions(major, minor, 1, 6) < 0) { |
936 | 0 | if (m->encrypt_use_aes) { |
937 | 0 | m->encryption = nullptr; |
938 | 0 | } |
939 | 0 | } else if ( |
940 | 0 | (compareVersions(major, minor, 1, 7) < 0) || |
941 | 0 | ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) { |
942 | 0 | if (V >= 5 || R >= 5) { |
943 | 0 | m->encryption = nullptr; |
944 | 0 | } |
945 | 0 | } |
946 | |
|
947 | 0 | if (!m->encryption) { |
948 | 0 | QTC::TC("qpdf", "QPDFWriter forced version disabled encryption"); |
949 | 0 | } |
950 | 0 | } |
951 | | |
952 | | void |
953 | | QPDFWriter::parseVersion(std::string const& version, int& major, int& minor) const |
954 | 17.7k | { |
955 | 17.7k | major = QUtil::string_to_int(version.c_str()); |
956 | 17.7k | minor = 0; |
957 | 17.7k | size_t p = version.find('.'); |
958 | 17.7k | if ((p != std::string::npos) && (version.length() > p)) { |
959 | 17.7k | minor = QUtil::string_to_int(version.substr(p + 1).c_str()); |
960 | 17.7k | } |
961 | 17.7k | std::string tmp = std::to_string(major) + "." + std::to_string(minor); |
962 | 17.7k | if (tmp != version) { |
963 | | // The version number in the input is probably invalid. This happens with some files that |
964 | | // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately |
965 | | // QPDFWriter doesn't have a way to give a warning, so we just ignore this case. |
966 | 493 | } |
967 | 17.7k | } |
968 | | |
969 | | int |
970 | | QPDFWriter::compareVersions(int major1, int minor1, int major2, int minor2) const |
971 | 8.86k | { |
972 | 8.86k | if (major1 < major2) { |
973 | 79 | return -1; |
974 | 8.78k | } else if (major1 > major2) { |
975 | 58 | return 1; |
976 | 8.72k | } else if (minor1 < minor2) { |
977 | 8.16k | return -1; |
978 | 8.16k | } else if (minor1 > minor2) { |
979 | 499 | return 1; |
980 | 499 | } else { |
981 | 60 | return 0; |
982 | 60 | } |
983 | 8.86k | } |
984 | | |
985 | | void |
986 | | QPDFWriter::setEncryptionMinimumVersion() |
987 | 0 | { |
988 | 0 | auto const R = m->encryption->getR(); |
989 | 0 | if (R >= 6) { |
990 | 0 | setMinimumPDFVersion("1.7", 8); |
991 | 0 | } else if (R == 5) { |
992 | 0 | setMinimumPDFVersion("1.7", 3); |
993 | 0 | } else if (R == 4) { |
994 | 0 | setMinimumPDFVersion(m->encrypt_use_aes ? "1.6" : "1.5"); |
995 | 0 | } else if (R == 3) { |
996 | 0 | setMinimumPDFVersion("1.4"); |
997 | 0 | } else { |
998 | 0 | setMinimumPDFVersion("1.3"); |
999 | 0 | } |
1000 | 0 | } |
1001 | | |
1002 | | void |
1003 | | QPDFWriter::setDataKey(int objid) |
1004 | 133k | { |
1005 | 133k | if (m->encryption) { |
1006 | 0 | m->cur_data_key = QPDF::compute_data_key( |
1007 | 0 | m->encryption_key, |
1008 | 0 | objid, |
1009 | 0 | 0, |
1010 | 0 | m->encrypt_use_aes, |
1011 | 0 | m->encryption->getV(), |
1012 | 0 | m->encryption->getR()); |
1013 | 0 | } |
1014 | 133k | } |
1015 | | |
1016 | | unsigned int |
1017 | | QPDFWriter::bytesNeeded(long long n) |
1018 | 76.6k | { |
1019 | 76.6k | unsigned int bytes = 0; |
1020 | 176k | while (n) { |
1021 | 99.6k | ++bytes; |
1022 | 99.6k | n >>= 8; |
1023 | 99.6k | } |
1024 | 76.6k | return bytes; |
1025 | 76.6k | } |
1026 | | |
1027 | | void |
1028 | | QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes) |
1029 | 1.07M | { |
1030 | 1.07M | if (bytes > sizeof(unsigned long long)) { |
1031 | 0 | throw std::logic_error("QPDFWriter::writeBinary called with too many bytes"); |
1032 | 0 | } |
1033 | 1.07M | unsigned char data[sizeof(unsigned long long)]; |
1034 | 2.62M | for (unsigned int i = 0; i < bytes; ++i) { |
1035 | 1.55M | data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff); |
1036 | 1.55M | val >>= 8; |
1037 | 1.55M | } |
1038 | 1.07M | m->pipeline->write(data, bytes); |
1039 | 1.07M | } |
1040 | | |
1041 | | QPDFWriter& |
1042 | | QPDFWriter::write(std::string_view str) |
1043 | 9.34M | { |
1044 | 9.34M | m->pipeline->write(str); |
1045 | 9.34M | return *this; |
1046 | 9.34M | } |
1047 | | |
1048 | | QPDFWriter& |
1049 | | QPDFWriter::write(std::integral auto val) |
1050 | 1.23M | { |
1051 | 1.23M | m->pipeline->write(std::to_string(val)); |
1052 | 1.23M | return *this; |
1053 | 1.23M | } _ZN10QPDFWriter5writeITkNSt3__18integralEiEERS_T_ Line | Count | Source | 1050 | 639k | { | 1051 | 639k | m->pipeline->write(std::to_string(val)); | 1052 | 639k | return *this; | 1053 | 639k | } |
_ZN10QPDFWriter5writeITkNSt3__18integralExEERS_T_ Line | Count | Source | 1050 | 410k | { | 1051 | 410k | m->pipeline->write(std::to_string(val)); | 1052 | 410k | return *this; | 1053 | 410k | } |
_ZN10QPDFWriter5writeITkNSt3__18integralEmEERS_T_ Line | Count | Source | 1050 | 110k | { | 1051 | 110k | m->pipeline->write(std::to_string(val)); | 1052 | 110k | return *this; | 1053 | 110k | } |
_ZN10QPDFWriter5writeITkNSt3__18integralEjEERS_T_ Line | Count | Source | 1050 | 76.6k | { | 1051 | 76.6k | m->pipeline->write(std::to_string(val)); | 1052 | 76.6k | return *this; | 1053 | 76.6k | } |
|
1054 | | |
1055 | | QPDFWriter& |
1056 | | QPDFWriter::write(size_t count, char c) |
1057 | 41.7k | { |
1058 | 41.7k | m->pipeline->write(count, c); |
1059 | 41.7k | return *this; |
1060 | 41.7k | } |
1061 | | |
1062 | | QPDFWriter& |
1063 | | QPDFWriter::write_name(std::string const& str) |
1064 | 692k | { |
1065 | 692k | m->pipeline->write(Name::normalize(str)); |
1066 | 692k | return *this; |
1067 | 692k | } |
1068 | | |
1069 | | QPDFWriter& |
1070 | | QPDFWriter::write_string(std::string const& str, bool force_binary) |
1071 | 26.1k | { |
1072 | 26.1k | m->pipeline->write(QPDF_String(str).unparse(force_binary)); |
1073 | 26.1k | return *this; |
1074 | 26.1k | } |
1075 | | |
1076 | | template <typename... Args> |
1077 | | QPDFWriter& |
1078 | | QPDFWriter::write_qdf(Args&&... args) |
1079 | 836k | { |
1080 | 836k | if (m->qdf_mode) { |
1081 | 0 | m->pipeline->write(std::forward<Args>(args)...); |
1082 | 0 | } |
1083 | 836k | return *this; |
1084 | 836k | } QPDFWriter& QPDFWriter::write_qdf<char const (&) [2]>(char const (&) [2]) Line | Count | Source | 1079 | 586k | { | 1080 | 586k | if (m->qdf_mode) { | 1081 | 0 | m->pipeline->write(std::forward<Args>(args)...); | 1082 | 0 | } | 1083 | 586k | return *this; | 1084 | 586k | } |
QPDFWriter& QPDFWriter::write_qdf<char const (&) [3]>(char const (&) [3]) Line | Count | Source | 1079 | 210k | { | 1080 | 210k | if (m->qdf_mode) { | 1081 | 0 | m->pipeline->write(std::forward<Args>(args)...); | 1082 | 0 | } | 1083 | 210k | return *this; | 1084 | 210k | } |
QPDFWriter& QPDFWriter::write_qdf<char const (&) [4]>(char const (&) [4]) Line | Count | Source | 1079 | 26.5k | { | 1080 | 26.5k | if (m->qdf_mode) { | 1081 | 0 | m->pipeline->write(std::forward<Args>(args)...); | 1082 | 0 | } | 1083 | 26.5k | return *this; | 1084 | 26.5k | } |
QPDFWriter& QPDFWriter::write_qdf<char const (&) [11]>(char const (&) [11]) Line | Count | Source | 1079 | 13.6k | { | 1080 | 13.6k | if (m->qdf_mode) { | 1081 | 0 | m->pipeline->write(std::forward<Args>(args)...); | 1082 | 0 | } | 1083 | 13.6k | return *this; | 1084 | 13.6k | } |
|
1085 | | |
1086 | | template <typename... Args> |
1087 | | QPDFWriter& |
1088 | | QPDFWriter::write_no_qdf(Args&&... args) |
1089 | 387k | { |
1090 | 387k | if (!m->qdf_mode) { |
1091 | 387k | m->pipeline->write(std::forward<Args>(args)...); |
1092 | 387k | } |
1093 | 387k | return *this; |
1094 | 387k | } QPDFWriter& QPDFWriter::write_no_qdf<char const (&) [2]>(char const (&) [2]) Line | Count | Source | 1089 | 361k | { | 1090 | 361k | if (!m->qdf_mode) { | 1091 | 361k | m->pipeline->write(std::forward<Args>(args)...); | 1092 | 361k | } | 1093 | 361k | return *this; | 1094 | 361k | } |
QPDFWriter& QPDFWriter::write_no_qdf<char const (&) [4]>(char const (&) [4]) Line | Count | Source | 1089 | 26.5k | { | 1090 | 26.5k | if (!m->qdf_mode) { | 1091 | 26.5k | m->pipeline->write(std::forward<Args>(args)...); | 1092 | 26.5k | } | 1093 | 26.5k | return *this; | 1094 | 26.5k | } |
|
1095 | | |
1096 | | void |
1097 | | QPDFWriter::adjustAESStreamLength(size_t& length) |
1098 | 64.3k | { |
1099 | 64.3k | if (m->encryption && !m->cur_data_key.empty() && m->encrypt_use_aes) { |
1100 | | // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16. It will |
1101 | | // also be prepended by 16 bits of random data. |
1102 | 0 | length += 32 - (length & 0xf); |
1103 | 0 | } |
1104 | 64.3k | } |
1105 | | |
1106 | | QPDFWriter& |
1107 | | QPDFWriter::write_encrypted(std::string_view str) |
1108 | 63.9k | { |
1109 | 63.9k | if (!(m->encryption && !m->cur_data_key.empty())) { |
1110 | 63.9k | write(str); |
1111 | 63.9k | } else if (m->encrypt_use_aes) { |
1112 | 0 | write(pl::pipe<Pl_AES_PDF>(str, true, m->cur_data_key)); |
1113 | 0 | } else { |
1114 | 0 | write(pl::pipe<Pl_RC4>(str, m->cur_data_key)); |
1115 | 0 | } |
1116 | | |
1117 | 63.9k | return *this; |
1118 | 63.9k | } |
1119 | | |
1120 | | void |
1121 | | QPDFWriter::computeDeterministicIDData() |
1122 | 6.55k | { |
1123 | 6.55k | if (!m->id2.empty()) { |
1124 | | // Can't happen in the code |
1125 | 0 | throw std::logic_error( |
1126 | 0 | "Deterministic ID computation enabled after ID generation has already occurred."); |
1127 | 0 | } |
1128 | 6.55k | qpdf_assert_debug(m->deterministic_id_data.empty()); |
1129 | 6.55k | m->deterministic_id_data = m->pipeline_stack.hex_digest(); |
1130 | 6.55k | } |
1131 | | |
1132 | | int |
1133 | | QPDFWriter::openObject(int objid) |
1134 | 172k | { |
1135 | 172k | if (objid == 0) { |
1136 | 0 | objid = m->next_objid++; |
1137 | 0 | } |
1138 | 172k | m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount()); |
1139 | 172k | write(objid).write(" 0 obj\n"); |
1140 | 172k | return objid; |
1141 | 172k | } |
1142 | | |
1143 | | void |
1144 | | QPDFWriter::closeObject(int objid) |
1145 | 172k | { |
1146 | | // Write a newline before endobj as it makes the file easier to repair. |
1147 | 172k | write("\nendobj\n").write_qdf("\n"); |
1148 | 172k | auto& new_obj = m->new_obj[objid]; |
1149 | 172k | new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset(); |
1150 | 172k | } |
1151 | | |
1152 | | void |
1153 | | QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen og) |
1154 | 82.3k | { |
1155 | 82.3k | int objid = og.getObj(); |
1156 | 82.3k | if ((og.getGen() != 0) || (!m->object_stream_to_objects.contains(objid))) { |
1157 | | // This is not an object stream. |
1158 | 74.6k | return; |
1159 | 74.6k | } |
1160 | | |
1161 | | // Reserve numbers for the objects that belong to this object stream. |
1162 | 95.4k | for (auto const& iter: m->object_stream_to_objects[objid]) { |
1163 | 95.4k | m->obj[iter].renumber = m->next_objid++; |
1164 | 95.4k | } |
1165 | 7.65k | } |
1166 | | |
1167 | | void |
1168 | | QPDFWriter::enqueueObject(QPDFObjectHandle object) |
1169 | 81.9k | { |
1170 | 81.9k | if (object.isIndirect()) { |
1171 | | // This owner check can only be done for indirect objects. It is possible for a direct |
1172 | | // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle from |
1173 | | // one file was insert into another file without copying. Doing that is safe even if the |
1174 | | // original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from its owner. |
1175 | 81.9k | if (object.getOwningQPDF() != &(m->pdf)) { |
1176 | 0 | QTC::TC("qpdf", "QPDFWriter foreign object"); |
1177 | 0 | throw std::logic_error( |
1178 | 0 | "QPDFObjectHandle from different QPDF found while writing. Use " |
1179 | 0 | "QPDF::copyForeignObject to add objects from another file."); |
1180 | 0 | } |
1181 | | |
1182 | 81.9k | if (m->qdf_mode && object.isStreamOfType("/XRef")) { |
1183 | | // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so |
1184 | | // will confuse fix-qdf, which expects to see only one XRef stream at the end of the |
1185 | | // file. This case can occur when creating a QDF from a file with object streams when |
1186 | | // preserving unreferenced objects since the old cross reference streams are not |
1187 | | // actually referenced by object number. |
1188 | 0 | QTC::TC("qpdf", "QPDFWriter ignore XRef in qdf mode"); |
1189 | 0 | return; |
1190 | 0 | } |
1191 | | |
1192 | 81.9k | QPDFObjGen og = object.getObjGen(); |
1193 | 81.9k | auto& obj = m->obj[og]; |
1194 | | |
1195 | 81.9k | if (obj.renumber == 0) { |
1196 | 81.2k | if (obj.object_stream > 0) { |
1197 | | // This is in an object stream. Don't process it here. Instead, enqueue the object |
1198 | | // stream. Object streams always have generation 0. |
1199 | | // Detect loops by storing invalid object ID -1, which will get overwritten later. |
1200 | 0 | obj.renumber = -1; |
1201 | 0 | enqueueObject(m->pdf.getObject(obj.object_stream, 0)); |
1202 | 81.2k | } else { |
1203 | 81.2k | m->object_queue.push_back(object); |
1204 | 81.2k | obj.renumber = m->next_objid++; |
1205 | | |
1206 | 81.2k | if ((og.getGen() == 0) && m->object_stream_to_objects.contains(og.getObj())) { |
1207 | | // For linearized files, uncompressed objects go at end, and we take care of |
1208 | | // assigning numbers to them elsewhere. |
1209 | 7.59k | if (!m->linearized) { |
1210 | 0 | assignCompressedObjectNumbers(og); |
1211 | 0 | } |
1212 | 73.6k | } else if ((!m->direct_stream_lengths) && object.isStream()) { |
1213 | | // reserve next object ID for length |
1214 | 0 | ++m->next_objid; |
1215 | 0 | } |
1216 | 81.2k | } |
1217 | 81.2k | } else if (obj.renumber == -1) { |
1218 | | // This can happen if a specially constructed file indicates that an object stream is |
1219 | | // inside itself. |
1220 | 0 | } |
1221 | 81.9k | return; |
1222 | 81.9k | } else if (!m->linearized) { |
1223 | 0 | if (object.isArray()) { |
1224 | 0 | for (auto& item: object.as_array()) { |
1225 | 0 | enqueueObject(item); |
1226 | 0 | } |
1227 | 0 | } else if (auto d = object.as_dictionary()) { |
1228 | 0 | for (auto const& item: d) { |
1229 | 0 | if (!item.second.null()) { |
1230 | 0 | enqueueObject(item.second); |
1231 | 0 | } |
1232 | 0 | } |
1233 | 0 | } |
1234 | 0 | } else { |
1235 | | // ignore |
1236 | 0 | } |
1237 | 81.9k | } |
1238 | | |
1239 | | void |
1240 | | QPDFWriter::unparseChild(QPDFObjectHandle const& child, size_t level, int flags) |
1241 | 2.85M | { |
1242 | 2.85M | if (!m->linearized) { |
1243 | 0 | enqueueObject(child); |
1244 | 0 | } |
1245 | 2.85M | if (child.isIndirect()) { |
1246 | 410k | write(m->obj[child].renumber).write(" 0 R"); |
1247 | 2.44M | } else { |
1248 | 2.44M | unparseObject(child, level, flags); |
1249 | 2.44M | } |
1250 | 2.85M | } |
1251 | | |
1252 | | void |
1253 | | QPDFWriter::writeTrailer( |
1254 | | trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass) |
1255 | 26.7k | { |
1256 | 26.7k | QPDFObjectHandle trailer = getTrimmedTrailer(); |
1257 | 26.7k | if (xref_stream) { |
1258 | 25.5k | m->cur_data_key.clear(); |
1259 | 25.5k | } else { |
1260 | 1.14k | write("trailer <<"); |
1261 | 1.14k | } |
1262 | 26.7k | write_qdf("\n"); |
1263 | 26.7k | if (which == t_lin_second) { |
1264 | 13.0k | write(" /Size ").write(size); |
1265 | 13.6k | } else { |
1266 | 33.0k | for (auto const& [key, value]: trailer.as_dictionary()) { |
1267 | 33.0k | if (value.null()) { |
1268 | 6.56k | continue; |
1269 | 6.56k | } |
1270 | 26.5k | write_qdf(" ").write_no_qdf(" ").write_name(key).write(" "); |
1271 | 26.5k | if (key == "/Size") { |
1272 | 2.10k | write(size); |
1273 | 2.10k | if (which == t_lin_first) { |
1274 | 2.10k | write(" /Prev "); |
1275 | 2.10k | qpdf_offset_t pos = m->pipeline->getCount(); |
1276 | 2.10k | write(prev).write(QIntC::to_size(pos - m->pipeline->getCount() + 21), ' '); |
1277 | 2.10k | } |
1278 | 24.4k | } else { |
1279 | 24.4k | unparseChild(value, 1, 0); |
1280 | 24.4k | } |
1281 | 26.5k | write_qdf("\n"); |
1282 | 26.5k | } |
1283 | 13.6k | } |
1284 | | |
1285 | | // Write ID |
1286 | 26.7k | write_qdf(" ").write(" /ID ["); |
1287 | 26.7k | if (linearization_pass == 1) { |
1288 | 13.6k | std::string original_id1 = getOriginalID1(); |
1289 | 13.6k | if (original_id1.empty()) { |
1290 | 12.8k | write("<00000000000000000000000000000000>"); |
1291 | 12.8k | } else { |
1292 | | // Write a string of zeroes equal in length to the representation of the original ID. |
1293 | | // While writing the original ID would have the same number of bytes, it would cause a |
1294 | | // change to the deterministic ID generated by older versions of the software that |
1295 | | // hard-coded the length of the ID to 16 bytes. |
1296 | 783 | size_t len = QPDF_String(original_id1).unparse(true).length() - 2; |
1297 | 783 | write("<").write(len, '0').write(">"); |
1298 | 783 | } |
1299 | 13.6k | write("<00000000000000000000000000000000>"); |
1300 | 13.6k | } else { |
1301 | 13.0k | if (linearization_pass == 0 && m->deterministic_id) { |
1302 | 0 | computeDeterministicIDData(); |
1303 | 0 | } |
1304 | 13.0k | generateID(m->encryption.get()); |
1305 | 13.0k | write_string(m->id1, true).write_string(m->id2, true); |
1306 | 13.0k | } |
1307 | 26.7k | write("]"); |
1308 | | |
1309 | 26.7k | if (which != t_lin_second) { |
1310 | | // Write reference to encryption dictionary |
1311 | 13.4k | if (m->encryption) { |
1312 | 0 | write(" /Encrypt ").write(m->encryption_dict_objid).write(" 0 R"); |
1313 | 0 | } |
1314 | 13.4k | } |
1315 | | |
1316 | 26.7k | write_qdf("\n>>").write_no_qdf(" >>"); |
1317 | 26.7k | } |
1318 | | |
1319 | | bool |
1320 | | QPDFWriter::willFilterStream( |
1321 | | QPDFObjectHandle stream, |
1322 | | bool& compress_stream, // out only |
1323 | | bool& is_root_metadata, // out only |
1324 | | std::string* stream_data) |
1325 | 69.6k | { |
1326 | 69.6k | compress_stream = false; |
1327 | 69.6k | is_root_metadata = false; |
1328 | | |
1329 | 69.6k | QPDFObjGen old_og = stream.getObjGen(); |
1330 | 69.6k | QPDFObjectHandle stream_dict = stream.getDict(); |
1331 | | |
1332 | 69.6k | if (stream.isRootMetadata()) { |
1333 | 384 | is_root_metadata = true; |
1334 | 384 | } |
1335 | 69.6k | bool filter = stream.isDataModified() || m->compress_streams || m->stream_decode_level; |
1336 | 69.6k | bool filter_on_write = stream.getFilterOnWrite(); |
1337 | 69.6k | if (!filter_on_write) { |
1338 | 11.9k | QTC::TC("qpdf", "QPDFWriter getFilterOnWrite false"); |
1339 | 11.9k | filter = false; |
1340 | 11.9k | } |
1341 | 69.6k | if (filter_on_write && m->compress_streams) { |
1342 | | // Don't filter if the stream is already compressed with FlateDecode. This way we don't make |
1343 | | // it worse if the original file used a better Flate algorithm, and we don't spend time and |
1344 | | // CPU cycles uncompressing and recompressing stuff. This can be overridden with |
1345 | | // setRecompressFlate(true). |
1346 | 57.7k | QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter"); |
1347 | 57.7k | if (!m->recompress_flate && !stream.isDataModified() && filter_obj.isName() && |
1348 | 57.7k | (filter_obj.getName() == "/FlateDecode" || filter_obj.getName() == "/Fl")) { |
1349 | 10.6k | QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode"); |
1350 | 10.6k | filter = false; |
1351 | 10.6k | } |
1352 | 57.7k | } |
1353 | 69.6k | bool normalize = false; |
1354 | 69.6k | bool uncompress = false; |
1355 | 69.6k | if (filter_on_write && is_root_metadata && |
1356 | 69.6k | (!m->encryption || !m->encryption->getEncryptMetadata())) { |
1357 | 384 | QTC::TC("qpdf", "QPDFWriter not compressing metadata"); |
1358 | 384 | filter = true; |
1359 | 384 | compress_stream = false; |
1360 | 384 | uncompress = true; |
1361 | 69.3k | } else if (filter_on_write && m->normalize_content && m->normalized_streams.contains(old_og)) { |
1362 | 0 | normalize = true; |
1363 | 0 | filter = true; |
1364 | 69.3k | } else if (filter_on_write && filter && m->compress_streams) { |
1365 | 46.7k | compress_stream = true; |
1366 | 46.7k | QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream"); |
1367 | 46.7k | } |
1368 | | |
1369 | | // Disable compression for empty streams to improve compatibility |
1370 | 69.6k | if (stream_dict.getKey("/Length").isInteger() && |
1371 | 69.6k | stream_dict.getKey("/Length").getIntValue() == 0) { |
1372 | 3.64k | filter = true; |
1373 | 3.64k | compress_stream = false; |
1374 | 3.64k | } |
1375 | | |
1376 | 69.6k | bool filtered = false; |
1377 | 79.3k | for (bool first_attempt: {true, false}) { |
1378 | 79.3k | auto pp_stream_data = stream_data ? m->pipeline_stack.activate(*stream_data) |
1379 | 79.3k | : m->pipeline_stack.activate(true); |
1380 | | |
1381 | 79.3k | try { |
1382 | 79.3k | filtered = stream.pipeStreamData( |
1383 | 79.3k | m->pipeline, |
1384 | 79.3k | !filter ? 0 |
1385 | 79.3k | : ((normalize ? qpdf_ef_normalize : 0) | |
1386 | 49.4k | (compress_stream ? qpdf_ef_compress : 0)), |
1387 | 79.3k | !filter ? qpdf_dl_none : (uncompress ? qpdf_dl_all : m->stream_decode_level), |
1388 | 79.3k | false, |
1389 | 79.3k | first_attempt); |
1390 | 79.3k | if (filter && !filtered) { |
1391 | | // Try again |
1392 | 9.59k | filter = false; |
1393 | 9.59k | stream.setFilterOnWrite(false); |
1394 | 69.7k | } else { |
1395 | 69.7k | break; |
1396 | 69.7k | } |
1397 | 79.3k | } catch (std::runtime_error& e) { |
1398 | 76 | if (filter && first_attempt) { |
1399 | 62 | stream.warn("error while getting stream data: "s + e.what()); |
1400 | 62 | stream.warn("qpdf will attempt to write the damaged stream unchanged"); |
1401 | 62 | filter = false; |
1402 | 62 | stream.setFilterOnWrite(false); |
1403 | 62 | continue; |
1404 | 62 | } |
1405 | 14 | throw std::runtime_error( |
1406 | 14 | "error while getting stream data for " + stream.unparse() + ": " + e.what()); |
1407 | 76 | } |
1408 | 9.59k | if (stream_data) { |
1409 | 2.21k | stream_data->clear(); |
1410 | 2.21k | } |
1411 | 9.59k | } |
1412 | 69.7k | if (!filtered) { |
1413 | 29.4k | compress_stream = false; |
1414 | 29.4k | } |
1415 | 69.7k | return filtered; |
1416 | 69.6k | } |
1417 | | |
1418 | | void |
1419 | | QPDFWriter::unparseObject( |
1420 | | QPDFObjectHandle object, size_t level, int flags, size_t stream_length, bool compress) |
1421 | 2.77M | { |
1422 | 2.77M | QPDFObjGen old_og = object.getObjGen(); |
1423 | 2.77M | int child_flags = flags & ~f_stream; |
1424 | | // For non-qdf, "indent" and "indent_large" are a single space between tokens. For qdf, they |
1425 | | // include the preceding newline. |
1426 | 2.77M | std::string indent_large = " "; |
1427 | 2.77M | if (m->qdf_mode) { |
1428 | 0 | indent_large.append(2 * (level + 1), ' '); |
1429 | 0 | indent_large[0] = '\n'; |
1430 | 0 | } |
1431 | 2.77M | std::string_view indent{indent_large.data(), m->qdf_mode ? indent_large.size() - 2 : 1}; |
1432 | | |
1433 | 2.77M | if (auto const tc = object.getTypeCode(); tc == ::ot_array) { |
1434 | | // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the |
1435 | | // [ in the /H key of the linearization parameter dictionary. We'll do this unconditionally |
1436 | | // for all arrays because it looks nicer and doesn't make the files that much bigger. |
1437 | 109k | write("["); |
1438 | 2.16M | for (auto const& item: object.as_array()) { |
1439 | 2.16M | write(indent_large); |
1440 | 2.16M | unparseChild(item, level + 1, child_flags); |
1441 | 2.16M | } |
1442 | 109k | write(indent).write("]"); |
1443 | 2.66M | } else if (tc == ::ot_dictionary) { |
1444 | | // Handle special cases for specific dictionaries. |
1445 | | |
1446 | 224k | if (old_og == m->root_og) { |
1447 | | // Extensions dictionaries. |
1448 | | |
1449 | | // We have one of several cases: |
1450 | | // |
1451 | | // * We need ADBE |
1452 | | // - We already have Extensions |
1453 | | // - If it has the right ADBE, preserve it |
1454 | | // - Otherwise, replace ADBE |
1455 | | // - We don't have Extensions: create one from scratch |
1456 | | // * We don't want ADBE |
1457 | | // - We already have Extensions |
1458 | | // - If it only has ADBE, remove it |
1459 | | // - If it has other things, keep those and remove ADBE |
1460 | | // - We have no extensions: no action required |
1461 | | // |
1462 | | // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE |
1463 | | // dictionary, so we can modify in place. |
1464 | | |
1465 | 13.4k | auto extensions = object.getKey("/Extensions"); |
1466 | 13.4k | const bool has_extensions = extensions.isDictionary(); |
1467 | 13.4k | const bool need_extensions_adbe = m->final_extension_level > 0; |
1468 | | |
1469 | 13.4k | if (has_extensions || need_extensions_adbe) { |
1470 | | // Make a shallow copy of this object so we can modify it safely without affecting |
1471 | | // the original. This code has logic to skip certain keys in agreement with |
1472 | | // prepareFileForWrite and with skip_stream_parameters so that replacing them |
1473 | | // doesn't leave unreferenced objects in the output. We can use unsafeShallowCopy |
1474 | | // here because all we are doing is removing or replacing top-level keys. |
1475 | 490 | object = object.unsafeShallowCopy(); |
1476 | 490 | if (!has_extensions) { |
1477 | 0 | extensions = QPDFObjectHandle(); |
1478 | 0 | } |
1479 | | |
1480 | 490 | const bool have_extensions_adbe = extensions && extensions.hasKey("/ADBE"); |
1481 | 490 | const bool have_extensions_other = |
1482 | 490 | extensions && extensions.getKeys().size() > (have_extensions_adbe ? 1u : 0u); |
1483 | | |
1484 | 490 | if (need_extensions_adbe) { |
1485 | 17 | if (!(have_extensions_other || have_extensions_adbe)) { |
1486 | | // We need Extensions and don't have it. Create it here. |
1487 | 0 | QTC::TC("qpdf", "QPDFWriter create Extensions", m->qdf_mode ? 0 : 1); |
1488 | 0 | extensions = object.replaceKeyAndGetNew( |
1489 | 0 | "/Extensions", QPDFObjectHandle::newDictionary()); |
1490 | 0 | } |
1491 | 473 | } else if (!have_extensions_other) { |
1492 | | // We have Extensions dictionary and don't want one. |
1493 | 155 | if (have_extensions_adbe) { |
1494 | 145 | QTC::TC("qpdf", "QPDFWriter remove existing Extensions"); |
1495 | 145 | object.removeKey("/Extensions"); |
1496 | 145 | extensions = QPDFObjectHandle(); // uninitialized |
1497 | 145 | } |
1498 | 155 | } |
1499 | | |
1500 | 490 | if (extensions) { |
1501 | 345 | QTC::TC("qpdf", "QPDFWriter preserve Extensions"); |
1502 | 345 | QPDFObjectHandle adbe = extensions.getKey("/ADBE"); |
1503 | 345 | if (adbe.isDictionary() && |
1504 | 345 | adbe.getKey("/BaseVersion").isNameAndEquals("/" + m->final_pdf_version) && |
1505 | 345 | adbe.getKey("/ExtensionLevel").isInteger() && |
1506 | 345 | (adbe.getKey("/ExtensionLevel").getIntValue() == |
1507 | 9 | m->final_extension_level)) { |
1508 | 9 | QTC::TC("qpdf", "QPDFWriter preserve ADBE"); |
1509 | 336 | } else { |
1510 | 336 | if (need_extensions_adbe) { |
1511 | 8 | extensions.replaceKey( |
1512 | 8 | "/ADBE", |
1513 | 8 | QPDFObjectHandle::parse( |
1514 | 8 | "<< /BaseVersion /" + m->final_pdf_version + |
1515 | 8 | " /ExtensionLevel " + std::to_string(m->final_extension_level) + |
1516 | 8 | " >>")); |
1517 | 328 | } else { |
1518 | 328 | QTC::TC("qpdf", "QPDFWriter remove ADBE"); |
1519 | 328 | extensions.removeKey("/ADBE"); |
1520 | 328 | } |
1521 | 336 | } |
1522 | 345 | } |
1523 | 490 | } |
1524 | 13.4k | } |
1525 | | |
1526 | | // Stream dictionaries. |
1527 | | |
1528 | 224k | if (flags & f_stream) { |
1529 | | // Suppress /Length since we will write it manually |
1530 | | |
1531 | | // Make a shallow copy of this object so we can modify it safely without affecting the |
1532 | | // original. This code has logic to skip certain keys in agreement with |
1533 | | // prepareFileForWrite and with skip_stream_parameters so that replacing them doesn't |
1534 | | // leave unreferenced objects in the output. We can use unsafeShallowCopy here because |
1535 | | // all we are doing is removing or replacing top-level keys. |
1536 | 43.8k | object = object.unsafeShallowCopy(); |
1537 | | |
1538 | 43.8k | object.removeKey("/Length"); |
1539 | | |
1540 | | // If /DecodeParms is an empty list, remove it. |
1541 | 43.8k | if (object.getKey("/DecodeParms").empty()) { |
1542 | 41.7k | object.removeKey("/DecodeParms"); |
1543 | 41.7k | } |
1544 | | |
1545 | 43.8k | if (flags & f_filtered) { |
1546 | | // We will supply our own filter and decode parameters. |
1547 | 25.1k | object.removeKey("/Filter"); |
1548 | 25.1k | object.removeKey("/DecodeParms"); |
1549 | 25.1k | } else { |
1550 | | // Make sure, no matter what else we have, that we don't have /Crypt in the output |
1551 | | // filters. |
1552 | 18.6k | QPDFObjectHandle filter = object.getKey("/Filter"); |
1553 | 18.6k | QPDFObjectHandle decode_parms = object.getKey("/DecodeParms"); |
1554 | 18.6k | if (filter.isOrHasName("/Crypt")) { |
1555 | 450 | if (filter.isName()) { |
1556 | 33 | object.removeKey("/Filter"); |
1557 | 33 | object.removeKey("/DecodeParms"); |
1558 | 417 | } else { |
1559 | 417 | int idx = 0; |
1560 | 2.34k | for (auto const& item: filter.as_array()) { |
1561 | 2.34k | if (item.isNameAndEquals("/Crypt")) { |
1562 | | // If filter is an array, then the code in QPDF_Stream has already |
1563 | | // verified that DecodeParms and Filters are arrays of the same |
1564 | | // length, but if they weren't for some reason, eraseItem does type |
1565 | | // and bounds checking. Fuzzing tells us that this can actually |
1566 | | // happen. |
1567 | 417 | filter.eraseItem(idx); |
1568 | 417 | decode_parms.eraseItem(idx); |
1569 | 417 | break; |
1570 | 417 | } |
1571 | 1.93k | ++idx; |
1572 | 1.93k | } |
1573 | 417 | } |
1574 | 450 | } |
1575 | 18.6k | } |
1576 | 43.8k | } |
1577 | | |
1578 | 224k | write("<<"); |
1579 | | |
1580 | 791k | for (auto const& [key, value]: object.as_dictionary()) { |
1581 | 791k | if (!value.null()) { |
1582 | 666k | write(indent_large).write_name(key).write(" "); |
1583 | 666k | if (key == "/Contents" && object.isDictionaryOfType("/Sig") && |
1584 | 666k | object.hasKey("/ByteRange")) { |
1585 | 34 | QTC::TC("qpdf", "QPDFWriter no encryption sig contents"); |
1586 | 34 | unparseChild(value, level + 1, child_flags | f_hex_string | f_no_encryption); |
1587 | 666k | } else { |
1588 | 666k | unparseChild(value, level + 1, child_flags); |
1589 | 666k | } |
1590 | 666k | } |
1591 | 791k | } |
1592 | | |
1593 | 224k | if (flags & f_stream) { |
1594 | 43.4k | write(indent_large).write("/Length "); |
1595 | | |
1596 | 43.4k | if (m->direct_stream_lengths) { |
1597 | 43.4k | write(stream_length); |
1598 | 43.4k | } else { |
1599 | 0 | write(m->cur_stream_length_id).write(" 0 R"); |
1600 | 0 | } |
1601 | 43.4k | if (compress && (flags & f_filtered)) { |
1602 | 24.6k | write(indent_large).write("/Filter /FlateDecode"); |
1603 | 24.6k | } |
1604 | 43.4k | } |
1605 | | |
1606 | 224k | write(indent).write(">>"); |
1607 | 2.43M | } else if (tc == ::ot_stream) { |
1608 | | // Write stream data to a buffer. |
1609 | 43.8k | if (!m->direct_stream_lengths) { |
1610 | 0 | m->cur_stream_length_id = m->obj[old_og].renumber + 1; |
1611 | 0 | } |
1612 | | |
1613 | 43.8k | flags |= f_stream; |
1614 | 43.8k | bool compress_stream = false; |
1615 | 43.8k | bool is_metadata = false; |
1616 | 43.8k | std::string stream_data; |
1617 | 43.8k | if (willFilterStream(object, compress_stream, is_metadata, &stream_data)) { |
1618 | 25.1k | flags |= f_filtered; |
1619 | 25.1k | } |
1620 | 43.8k | QPDFObjectHandle stream_dict = object.getDict(); |
1621 | | |
1622 | 43.8k | m->cur_stream_length = stream_data.size(); |
1623 | 43.8k | if (is_metadata && m->encryption && !m->encryption->getEncryptMetadata()) { |
1624 | | // Don't encrypt stream data for the metadata stream |
1625 | 0 | m->cur_data_key.clear(); |
1626 | 0 | } |
1627 | 43.8k | adjustAESStreamLength(m->cur_stream_length); |
1628 | 43.8k | unparseObject(stream_dict, 0, flags, m->cur_stream_length, compress_stream); |
1629 | 43.8k | char last_char = stream_data.empty() ? '\0' : stream_data.back(); |
1630 | 43.8k | write("\nstream\n").write_encrypted(stream_data); |
1631 | 43.8k | m->added_newline = m->newline_before_endstream || (m->qdf_mode && last_char != '\n'); |
1632 | 43.8k | write(m->added_newline ? "\nendstream" : "endstream"); |
1633 | 2.39M | } else if (tc == ::ot_string) { |
1634 | 84.6k | std::string val; |
1635 | 84.6k | if (m->encryption && !(flags & f_in_ostream) && !(flags & f_no_encryption) && |
1636 | 84.6k | !m->cur_data_key.empty()) { |
1637 | 0 | val = object.getStringValue(); |
1638 | 0 | if (m->encrypt_use_aes) { |
1639 | 0 | Pl_Buffer bufpl("encrypted string"); |
1640 | 0 | Pl_AES_PDF pl("aes encrypt string", &bufpl, true, m->cur_data_key); |
1641 | 0 | pl.writeString(val); |
1642 | 0 | pl.finish(); |
1643 | 0 | val = QPDF_String(bufpl.getString()).unparse(true); |
1644 | 0 | } else { |
1645 | 0 | auto tmp_ph = QUtil::make_unique_cstr(val); |
1646 | 0 | char* tmp = tmp_ph.get(); |
1647 | 0 | size_t vlen = val.length(); |
1648 | 0 | RC4 rc4( |
1649 | 0 | QUtil::unsigned_char_pointer(m->cur_data_key), |
1650 | 0 | QIntC::to_int(m->cur_data_key.length())); |
1651 | 0 | auto data = QUtil::unsigned_char_pointer(tmp); |
1652 | 0 | rc4.process(data, vlen, data); |
1653 | 0 | val = QPDF_String(std::string(tmp, vlen)).unparse(); |
1654 | 0 | } |
1655 | 84.6k | } else if (flags & f_hex_string) { |
1656 | 34 | val = QPDF_String(object.getStringValue()).unparse(true); |
1657 | 84.5k | } else { |
1658 | 84.5k | val = object.unparseResolved(); |
1659 | 84.5k | } |
1660 | 84.6k | write(val); |
1661 | 2.30M | } else { |
1662 | 2.30M | write(object.unparseResolved()); |
1663 | 2.30M | } |
1664 | 2.77M | } |
1665 | | |
1666 | | void |
1667 | | QPDFWriter::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj) |
1668 | 27.9k | { |
1669 | 27.9k | qpdf_assert_debug(first_obj > 0); |
1670 | 27.9k | bool is_first = true; |
1671 | 27.9k | auto id = std::to_string(first_obj) + ' '; |
1672 | 348k | for (auto& offset: offsets) { |
1673 | 348k | if (is_first) { |
1674 | 27.9k | is_first = false; |
1675 | 320k | } else { |
1676 | 320k | write_qdf("\n").write_no_qdf(" "); |
1677 | 320k | } |
1678 | 348k | write(id); |
1679 | 348k | util::increment(id, 1); |
1680 | 348k | write(offset); |
1681 | 348k | } |
1682 | 27.9k | write("\n"); |
1683 | 27.9k | } |
1684 | | |
1685 | | void |
1686 | | QPDFWriter::writeObjectStream(QPDFObjectHandle object) |
1687 | 13.9k | { |
1688 | | // Note: object might be null if this is a place-holder for an object stream that we are |
1689 | | // generating from scratch. |
1690 | | |
1691 | 13.9k | QPDFObjGen old_og = object.getObjGen(); |
1692 | 13.9k | qpdf_assert_debug(old_og.getGen() == 0); |
1693 | 13.9k | int old_id = old_og.getObj(); |
1694 | 13.9k | int new_stream_id = m->obj[old_og].renumber; |
1695 | | |
1696 | 13.9k | std::vector<qpdf_offset_t> offsets; |
1697 | 13.9k | qpdf_offset_t first = 0; |
1698 | | |
1699 | | // Generate stream itself. We have to do this in two passes so we can calculate offsets in the |
1700 | | // first pass. |
1701 | 13.9k | std::string stream_buffer_pass1; |
1702 | 13.9k | std::string stream_buffer_pass2; |
1703 | 13.9k | int first_obj = -1; |
1704 | 13.9k | const bool compressed = m->compress_streams && !m->qdf_mode; |
1705 | 13.9k | { |
1706 | | // Pass 1 |
1707 | 13.9k | auto pp_ostream_pass1 = m->pipeline_stack.activate(stream_buffer_pass1); |
1708 | | |
1709 | 13.9k | int count = -1; |
1710 | 174k | for (auto const& obj: m->object_stream_to_objects[old_id]) { |
1711 | 174k | ++count; |
1712 | 174k | int new_obj = m->obj[obj].renumber; |
1713 | 174k | if (first_obj == -1) { |
1714 | 13.9k | first_obj = new_obj; |
1715 | 13.9k | } |
1716 | 174k | if (m->qdf_mode) { |
1717 | 0 | write("%% Object stream: object ").write(new_obj).write(", index ").write(count); |
1718 | 0 | if (!m->suppress_original_object_ids) { |
1719 | 0 | write("; original object ID: ").write(obj.getObj()); |
1720 | | // For compatibility, only write the generation if non-zero. While object |
1721 | | // streams only allow objects with generation 0, if we are generating object |
1722 | | // streams, the old object could have a non-zero generation. |
1723 | 0 | if (obj.getGen() != 0) { |
1724 | 0 | QTC::TC("qpdf", "QPDFWriter original obj non-zero gen"); |
1725 | 0 | write(" ").write(obj.getGen()); |
1726 | 0 | } |
1727 | 0 | } |
1728 | 0 | write("\n"); |
1729 | 0 | } |
1730 | | |
1731 | 174k | offsets.push_back(m->pipeline->getCount()); |
1732 | | // To avoid double-counting objects being written in object streams for progress |
1733 | | // reporting, decrement in pass 1. |
1734 | 174k | indicateProgress(true, false); |
1735 | | |
1736 | 174k | QPDFObjectHandle obj_to_write = m->pdf.getObject(obj); |
1737 | 174k | if (obj_to_write.isStream()) { |
1738 | | // This condition occurred in a fuzz input. Ideally we should block it at parse |
1739 | | // time, but it's not clear to me how to construct a case for this. |
1740 | 0 | obj_to_write.warn("stream found inside object stream; treating as null"); |
1741 | 0 | obj_to_write = QPDFObjectHandle::newNull(); |
1742 | 0 | } |
1743 | 174k | writeObject(obj_to_write, count); |
1744 | | |
1745 | 174k | m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count); |
1746 | 174k | } |
1747 | 13.9k | } |
1748 | 13.9k | { |
1749 | | // Adjust offsets to skip over comment before first object |
1750 | 13.9k | first = offsets.at(0); |
1751 | 174k | for (auto& iter: offsets) { |
1752 | 174k | iter -= first; |
1753 | 174k | } |
1754 | | |
1755 | | // Take one pass at writing pairs of numbers so we can get their size information |
1756 | 13.9k | { |
1757 | 13.9k | auto pp_discard = m->pipeline_stack.activate(true); |
1758 | 13.9k | writeObjectStreamOffsets(offsets, first_obj); |
1759 | 13.9k | first += m->pipeline->getCount(); |
1760 | 13.9k | } |
1761 | | |
1762 | | // Set up a stream to write the stream data into a buffer. |
1763 | 13.9k | auto pp_ostream = m->pipeline_stack.activate(stream_buffer_pass2); |
1764 | | |
1765 | 13.9k | writeObjectStreamOffsets(offsets, first_obj); |
1766 | 13.9k | write(stream_buffer_pass1); |
1767 | 13.9k | stream_buffer_pass1.clear(); |
1768 | 13.9k | stream_buffer_pass1.shrink_to_fit(); |
1769 | 13.9k | if (compressed) { |
1770 | 13.9k | stream_buffer_pass2 = pl::pipe<Pl_Flate>(stream_buffer_pass2, Pl_Flate::a_deflate); |
1771 | 13.9k | } |
1772 | 13.9k | } |
1773 | | |
1774 | | // Write the object |
1775 | 13.9k | openObject(new_stream_id); |
1776 | 13.9k | setDataKey(new_stream_id); |
1777 | 13.9k | write("<<").write_qdf("\n ").write(" /Type /ObjStm").write_qdf("\n "); |
1778 | 13.9k | size_t length = stream_buffer_pass2.size(); |
1779 | 13.9k | adjustAESStreamLength(length); |
1780 | 13.9k | write(" /Length ").write(length).write_qdf("\n "); |
1781 | 13.9k | if (compressed) { |
1782 | 13.9k | write(" /Filter /FlateDecode"); |
1783 | 13.9k | } |
1784 | 13.9k | write(" /N ").write(offsets.size()).write_qdf("\n ").write(" /First ").write(first); |
1785 | 13.9k | if (!object.isNull()) { |
1786 | | // If the original object has an /Extends key, preserve it. |
1787 | 0 | QPDFObjectHandle dict = object.getDict(); |
1788 | 0 | QPDFObjectHandle extends = dict.getKey("/Extends"); |
1789 | 0 | if (extends.isIndirect()) { |
1790 | 0 | QTC::TC("qpdf", "QPDFWriter copy Extends"); |
1791 | 0 | write_qdf("\n ").write(" /Extends "); |
1792 | 0 | unparseChild(extends, 1, f_in_ostream); |
1793 | 0 | } |
1794 | 0 | } |
1795 | 13.9k | write_qdf("\n").write_no_qdf(" ").write(">>\nstream\n").write_encrypted(stream_buffer_pass2); |
1796 | 13.9k | if (m->encryption) { |
1797 | 0 | QTC::TC("qpdf", "QPDFWriter encrypt object stream"); |
1798 | 0 | } |
1799 | 13.9k | write(m->newline_before_endstream ? "\nendstream" : "endstream"); |
1800 | 13.9k | m->cur_data_key.clear(); |
1801 | 13.9k | closeObject(new_stream_id); |
1802 | 13.9k | } |
1803 | | |
1804 | | void |
1805 | | QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index) |
1806 | 301k | { |
1807 | 301k | QPDFObjGen old_og = object.getObjGen(); |
1808 | | |
1809 | 301k | if (object_stream_index == -1 && old_og.getGen() == 0 && |
1810 | 301k | m->object_stream_to_objects.contains(old_og.getObj())) { |
1811 | 13.9k | writeObjectStream(object); |
1812 | 13.9k | return; |
1813 | 13.9k | } |
1814 | | |
1815 | 287k | indicateProgress(false, false); |
1816 | 287k | auto new_id = m->obj[old_og].renumber; |
1817 | 287k | if (m->qdf_mode) { |
1818 | 0 | if (m->page_object_to_seq.contains(old_og)) { |
1819 | 0 | write("%% Page ").write(m->page_object_to_seq[old_og]).write("\n"); |
1820 | 0 | } |
1821 | 0 | if (m->contents_to_page_seq.contains(old_og)) { |
1822 | 0 | write("%% Contents for page ").write(m->contents_to_page_seq[old_og]).write("\n"); |
1823 | 0 | } |
1824 | 0 | } |
1825 | 287k | if (object_stream_index == -1) { |
1826 | 112k | if (m->qdf_mode && (!m->suppress_original_object_ids)) { |
1827 | 0 | write("%% Original object ID: ").write(object.getObjGen().unparse(' ')).write("\n"); |
1828 | 0 | } |
1829 | 112k | openObject(new_id); |
1830 | 112k | setDataKey(new_id); |
1831 | 112k | unparseObject(object, 0, 0); |
1832 | 112k | m->cur_data_key.clear(); |
1833 | 112k | closeObject(new_id); |
1834 | 174k | } else { |
1835 | 174k | unparseObject(object, 0, f_in_ostream); |
1836 | 174k | write("\n"); |
1837 | 174k | } |
1838 | | |
1839 | 287k | if (!m->direct_stream_lengths && object.isStream()) { |
1840 | 0 | if (m->qdf_mode) { |
1841 | 0 | if (m->added_newline) { |
1842 | 0 | write("%QDF: ignore_newline\n"); |
1843 | 0 | } |
1844 | 0 | } |
1845 | 0 | openObject(new_id + 1); |
1846 | 0 | write(m->cur_stream_length); |
1847 | 0 | closeObject(new_id + 1); |
1848 | 0 | } |
1849 | 287k | } |
1850 | | |
1851 | | std::string |
1852 | | QPDFWriter::getOriginalID1() |
1853 | 20.1k | { |
1854 | 20.1k | QPDFObjectHandle trailer = m->pdf.getTrailer(); |
1855 | 20.1k | if (trailer.hasKey("/ID")) { |
1856 | 1.16k | return trailer.getKey("/ID").getArrayItem(0).getStringValue(); |
1857 | 19.0k | } else { |
1858 | 19.0k | return ""; |
1859 | 19.0k | } |
1860 | 20.1k | } |
1861 | | |
1862 | | void |
1863 | | QPDFWriter::generateID(bool encrypted) |
1864 | 13.1k | { |
1865 | | // Generate the ID lazily so that we can handle the user's preference to use static or |
1866 | | // deterministic ID generation. |
1867 | | |
1868 | 13.1k | if (!m->id2.empty()) { |
1869 | 6.52k | return; |
1870 | 6.52k | } |
1871 | | |
1872 | 6.62k | QPDFObjectHandle trailer = m->pdf.getTrailer(); |
1873 | | |
1874 | 6.62k | std::string result; |
1875 | | |
1876 | 6.62k | if (m->static_id) { |
1877 | | // For test suite use only... |
1878 | 0 | static unsigned char tmp[] = { |
1879 | 0 | 0x31, |
1880 | 0 | 0x41, |
1881 | 0 | 0x59, |
1882 | 0 | 0x26, |
1883 | 0 | 0x53, |
1884 | 0 | 0x58, |
1885 | 0 | 0x97, |
1886 | 0 | 0x93, |
1887 | 0 | 0x23, |
1888 | 0 | 0x84, |
1889 | 0 | 0x62, |
1890 | 0 | 0x64, |
1891 | 0 | 0x33, |
1892 | 0 | 0x83, |
1893 | 0 | 0x27, |
1894 | 0 | 0x95, |
1895 | 0 | 0x00}; |
1896 | 0 | result = reinterpret_cast<char*>(tmp); |
1897 | 6.62k | } else { |
1898 | | // The PDF specification has guidelines for creating IDs, but it states clearly that the |
1899 | | // only thing that's really important is that it is very likely to be unique. We can't |
1900 | | // really follow the guidelines in the spec exactly because we haven't written the file yet. |
1901 | | // This scheme should be fine though. The deterministic ID case uses a digest of a |
1902 | | // sufficient portion of the file's contents such no two non-matching files would match in |
1903 | | // the subsets used for this computation. Note that we explicitly omit the filename from |
1904 | | // the digest calculation for deterministic ID so that the same file converted with qpdf, in |
1905 | | // that case, would have the same ID regardless of the output file's name. |
1906 | | |
1907 | 6.62k | std::string seed; |
1908 | 6.62k | if (m->deterministic_id) { |
1909 | 6.62k | if (encrypted) { |
1910 | 72 | throw std::runtime_error( |
1911 | 72 | "QPDFWriter: unable to generated a deterministic ID because the file to be " |
1912 | 72 | "written is encrypted (even though the file may not require a password)"); |
1913 | 72 | } |
1914 | 6.55k | if (m->deterministic_id_data.empty()) { |
1915 | 0 | throw std::logic_error( |
1916 | 0 | "INTERNAL ERROR: QPDFWriter::generateID has no data for deterministic ID"); |
1917 | 0 | } |
1918 | 6.55k | seed += m->deterministic_id_data; |
1919 | 6.55k | } else { |
1920 | 0 | seed += std::to_string(QUtil::get_current_time()); |
1921 | 0 | seed += m->filename; |
1922 | 0 | seed += " "; |
1923 | 0 | } |
1924 | 6.55k | seed += " QPDF "; |
1925 | 6.55k | if (trailer.hasKey("/Info")) { |
1926 | 5.74k | for (auto const& item: trailer.getKey("/Info").as_dictionary()) { |
1927 | 5.74k | if (item.second.isString()) { |
1928 | 1.45k | seed += " "; |
1929 | 1.45k | seed += item.second.getStringValue(); |
1930 | 1.45k | } |
1931 | 5.74k | } |
1932 | 274 | } |
1933 | | |
1934 | 6.55k | MD5 m; |
1935 | 6.55k | m.encodeString(seed.c_str()); |
1936 | 6.55k | MD5::Digest digest; |
1937 | 6.55k | m.digest(digest); |
1938 | 6.55k | result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest)); |
1939 | 6.55k | } |
1940 | | |
1941 | | // If /ID already exists, follow the spec: use the original first word and generate a new second |
1942 | | // word. Otherwise, we'll use the generated ID for both. |
1943 | | |
1944 | 6.55k | m->id2 = result; |
1945 | | // Note: keep /ID from old file even if --static-id was given. |
1946 | 6.55k | m->id1 = getOriginalID1(); |
1947 | 6.55k | if (m->id1.empty()) { |
1948 | 6.22k | m->id1 = m->id2; |
1949 | 6.22k | } |
1950 | 6.55k | } |
1951 | | |
1952 | | void |
1953 | | QPDFWriter::initializeSpecialStreams() |
1954 | 0 | { |
1955 | | // Mark all page content streams in case we are filtering or normalizing. |
1956 | 0 | std::vector<QPDFObjectHandle> pages = m->pdf.getAllPages(); |
1957 | 0 | int num = 0; |
1958 | 0 | for (auto& page: pages) { |
1959 | 0 | m->page_object_to_seq[page.getObjGen()] = ++num; |
1960 | 0 | QPDFObjectHandle contents = page.getKey("/Contents"); |
1961 | 0 | std::vector<QPDFObjGen> contents_objects; |
1962 | 0 | if (contents.isArray()) { |
1963 | 0 | int n = static_cast<int>(contents.size()); |
1964 | 0 | for (int i = 0; i < n; ++i) { |
1965 | 0 | contents_objects.push_back(contents.getArrayItem(i).getObjGen()); |
1966 | 0 | } |
1967 | 0 | } else if (contents.isStream()) { |
1968 | 0 | contents_objects.push_back(contents.getObjGen()); |
1969 | 0 | } |
1970 | |
|
1971 | 0 | for (auto const& c: contents_objects) { |
1972 | 0 | m->contents_to_page_seq[c] = num; |
1973 | 0 | m->normalized_streams.insert(c); |
1974 | 0 | } |
1975 | 0 | } |
1976 | 0 | } |
1977 | | |
1978 | | void |
1979 | | QPDFWriter::preserveObjectStreams() |
1980 | 0 | { |
1981 | 0 | auto const& xref = QPDF::Writer::getXRefTable(m->pdf); |
1982 | | // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object |
1983 | | // streams out of old objects that have generation numbers greater than zero. However in an |
1984 | | // existing PDF, all object stream objects and all objects in them must have generation 0 |
1985 | | // because the PDF spec does not provide any way to do otherwise. This code filters out objects |
1986 | | // that are not allowed to be in object streams. In addition to removing objects that were |
1987 | | // erroneously included in object streams in the source PDF, it also prevents unreferenced |
1988 | | // objects from being included. |
1989 | 0 | auto end = xref.cend(); |
1990 | 0 | m->obj.streams_empty = true; |
1991 | 0 | if (m->preserve_unreferenced_objects) { |
1992 | 0 | for (auto iter = xref.cbegin(); iter != end; ++iter) { |
1993 | 0 | if (iter->second.getType() == 2) { |
1994 | | // Pdf contains object streams. |
1995 | 0 | QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced"); |
1996 | 0 | m->obj.streams_empty = false; |
1997 | 0 | m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); |
1998 | 0 | } |
1999 | 0 | } |
2000 | 0 | } else { |
2001 | | // Start by scanning for first compressed object in case we don't have any object streams to |
2002 | | // process. |
2003 | 0 | for (auto iter = xref.cbegin(); iter != end; ++iter) { |
2004 | 0 | if (iter->second.getType() == 2) { |
2005 | | // Pdf contains object streams. |
2006 | 0 | QTC::TC("qpdf", "QPDFWriter preserve object streams"); |
2007 | 0 | m->obj.streams_empty = false; |
2008 | 0 | auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf); |
2009 | | // The object pointed to by iter may be a previous generation, in which case it is |
2010 | | // removed by getCompressibleObjSet. We need to restart the loop (while the object |
2011 | | // table may contain multiple generations of an object). |
2012 | 0 | for (iter = xref.cbegin(); iter != end; ++iter) { |
2013 | 0 | if (iter->second.getType() == 2) { |
2014 | 0 | auto id = static_cast<size_t>(iter->first.getObj()); |
2015 | 0 | if (id < eligible.size() && eligible[id]) { |
2016 | 0 | m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); |
2017 | 0 | } else { |
2018 | 0 | QTC::TC("qpdf", "QPDFWriter exclude from object stream"); |
2019 | 0 | } |
2020 | 0 | } |
2021 | 0 | } |
2022 | 0 | return; |
2023 | 0 | } |
2024 | 0 | } |
2025 | 0 | } |
2026 | 0 | } |
2027 | | |
2028 | | void |
2029 | | QPDFWriter::generateObjectStreams() |
2030 | 9.68k | { |
2031 | | // Basic strategy: make a list of objects that can go into an object stream. Then figure out |
2032 | | // how many object streams are needed so that we can distribute objects approximately evenly |
2033 | | // without having any object stream exceed 100 members. We don't have to worry about linearized |
2034 | | // files here -- if the file is linearized, we take care of excluding things that aren't allowed |
2035 | | // here later. |
2036 | | |
2037 | | // This code doesn't do anything with /Extends. |
2038 | | |
2039 | 9.68k | std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf); |
2040 | 9.68k | size_t n_object_streams = (eligible.size() + 99U) / 100U; |
2041 | | |
2042 | 9.68k | initializeTables(2U * n_object_streams); |
2043 | 9.68k | if (n_object_streams == 0) { |
2044 | 37 | m->obj.streams_empty = true; |
2045 | 37 | return; |
2046 | 37 | } |
2047 | 9.65k | size_t n_per = eligible.size() / n_object_streams; |
2048 | 9.65k | if (n_per * n_object_streams < eligible.size()) { |
2049 | 164 | ++n_per; |
2050 | 164 | } |
2051 | 9.65k | unsigned int n = 0; |
2052 | 9.65k | int cur_ostream = m->pdf.newIndirectNull().getObjectID(); |
2053 | 123k | for (auto const& item: eligible) { |
2054 | 123k | if (n == n_per) { |
2055 | 656 | QTC::TC("qpdf", "QPDFWriter generate >1 ostream"); |
2056 | 656 | n = 0; |
2057 | | // Construct a new null object as the "original" object stream. The rest of the code |
2058 | | // knows that this means we're creating the object stream from scratch. |
2059 | 656 | cur_ostream = m->pdf.newIndirectNull().getObjectID(); |
2060 | 656 | } |
2061 | 123k | auto& obj = m->obj[item]; |
2062 | 123k | obj.object_stream = cur_ostream; |
2063 | 123k | obj.gen = item.getGen(); |
2064 | 123k | ++n; |
2065 | 123k | } |
2066 | 9.65k | } |
2067 | | |
2068 | | QPDFObjectHandle |
2069 | | QPDFWriter::getTrimmedTrailer() |
2070 | 26.7k | { |
2071 | | // Remove keys from the trailer that necessarily have to be replaced when writing the file. |
2072 | | |
2073 | 26.7k | QPDFObjectHandle trailer = m->pdf.getTrailer().unsafeShallowCopy(); |
2074 | | |
2075 | | // Remove encryption keys |
2076 | 26.7k | trailer.removeKey("/ID"); |
2077 | 26.7k | trailer.removeKey("/Encrypt"); |
2078 | | |
2079 | | // Remove modification information |
2080 | 26.7k | trailer.removeKey("/Prev"); |
2081 | | |
2082 | | // Remove all trailer keys that potentially come from a cross-reference stream |
2083 | 26.7k | trailer.removeKey("/Index"); |
2084 | 26.7k | trailer.removeKey("/W"); |
2085 | 26.7k | trailer.removeKey("/Length"); |
2086 | 26.7k | trailer.removeKey("/Filter"); |
2087 | 26.7k | trailer.removeKey("/DecodeParms"); |
2088 | 26.7k | trailer.removeKey("/Type"); |
2089 | 26.7k | trailer.removeKey("/XRefStm"); |
2090 | | |
2091 | 26.7k | return trailer; |
2092 | 26.7k | } |
2093 | | |
2094 | | // Make document extension level information direct as required by the spec. |
2095 | | void |
2096 | | QPDFWriter::prepareFileForWrite() |
2097 | 9.54k | { |
2098 | 9.54k | m->pdf.fixDanglingReferences(); |
2099 | 9.54k | auto root = m->pdf.getRoot(); |
2100 | 9.54k | auto oh = root.getKey("/Extensions"); |
2101 | 9.54k | if (oh.isDictionary()) { |
2102 | 357 | const bool extensions_indirect = oh.isIndirect(); |
2103 | 357 | if (extensions_indirect) { |
2104 | 330 | QTC::TC("qpdf", "QPDFWriter make Extensions direct"); |
2105 | 330 | oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy()); |
2106 | 330 | } |
2107 | 357 | if (oh.hasKey("/ADBE")) { |
2108 | 173 | auto adbe = oh.getKey("/ADBE"); |
2109 | 173 | if (adbe.isIndirect()) { |
2110 | 152 | QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1); |
2111 | 152 | adbe.makeDirect(); |
2112 | 152 | oh.replaceKey("/ADBE", adbe); |
2113 | 152 | } |
2114 | 173 | } |
2115 | 357 | } |
2116 | 9.54k | } |
2117 | | |
2118 | | void |
2119 | | QPDFWriter::initializeTables(size_t extra) |
2120 | 9.66k | { |
2121 | 9.66k | auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra; |
2122 | 9.66k | m->obj.resize(size); |
2123 | 9.66k | m->new_obj.resize(size); |
2124 | 9.66k | } |
2125 | | |
2126 | | void |
2127 | | QPDFWriter::doWriteSetup() |
2128 | 9.75k | { |
2129 | 9.75k | if (m->did_write_setup) { |
2130 | 0 | return; |
2131 | 0 | } |
2132 | 9.75k | m->did_write_setup = true; |
2133 | | |
2134 | | // Do preliminary setup |
2135 | | |
2136 | 9.75k | if (m->linearized) { |
2137 | 9.75k | m->qdf_mode = false; |
2138 | 9.75k | } |
2139 | | |
2140 | 9.75k | if (m->pclm) { |
2141 | 0 | m->stream_decode_level = qpdf_dl_none; |
2142 | 0 | m->compress_streams = false; |
2143 | 0 | m->encryption = nullptr; |
2144 | 0 | } |
2145 | | |
2146 | 9.75k | if (m->qdf_mode) { |
2147 | 0 | if (!m->normalize_content_set) { |
2148 | 0 | m->normalize_content = true; |
2149 | 0 | } |
2150 | 0 | if (!m->compress_streams_set) { |
2151 | 0 | m->compress_streams = false; |
2152 | 0 | } |
2153 | 0 | if (!m->stream_decode_level_set) { |
2154 | 0 | m->stream_decode_level = qpdf_dl_generalized; |
2155 | 0 | } |
2156 | 0 | } |
2157 | | |
2158 | 9.75k | if (m->encryption) { |
2159 | | // Encryption has been explicitly set |
2160 | 0 | m->preserve_encryption = false; |
2161 | 9.75k | } else if (m->normalize_content || !m->compress_streams || m->pclm || m->qdf_mode) { |
2162 | | // Encryption makes looking at contents pretty useless. If the user explicitly encrypted |
2163 | | // though, we still obey that. |
2164 | 0 | m->preserve_encryption = false; |
2165 | 0 | } |
2166 | | |
2167 | 9.75k | if (m->preserve_encryption) { |
2168 | 9.75k | copyEncryptionParameters(m->pdf); |
2169 | 9.75k | } |
2170 | | |
2171 | 9.75k | if (!m->forced_pdf_version.empty()) { |
2172 | 0 | int major = 0; |
2173 | 0 | int minor = 0; |
2174 | 0 | parseVersion(m->forced_pdf_version, major, minor); |
2175 | 0 | disableIncompatibleEncryption(major, minor, m->forced_extension_level); |
2176 | 0 | if (compareVersions(major, minor, 1, 5) < 0) { |
2177 | 0 | QTC::TC("qpdf", "QPDFWriter forcing object stream disable"); |
2178 | 0 | m->object_stream_mode = qpdf_o_disable; |
2179 | 0 | } |
2180 | 0 | } |
2181 | | |
2182 | 9.75k | if (m->qdf_mode || m->normalize_content) { |
2183 | 0 | initializeSpecialStreams(); |
2184 | 0 | } |
2185 | | |
2186 | 9.75k | if (m->qdf_mode) { |
2187 | | // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing |
2188 | | // recomputed stream length data. Certain streams such as object streams, xref streams, and |
2189 | | // hint streams always get direct stream lengths. |
2190 | 0 | m->direct_stream_lengths = false; |
2191 | 0 | } |
2192 | | |
2193 | 9.75k | switch (m->object_stream_mode) { |
2194 | 0 | case qpdf_o_disable: |
2195 | 0 | initializeTables(); |
2196 | 0 | m->obj.streams_empty = true; |
2197 | 0 | break; |
2198 | | |
2199 | 0 | case qpdf_o_preserve: |
2200 | 0 | initializeTables(); |
2201 | 0 | preserveObjectStreams(); |
2202 | 0 | break; |
2203 | | |
2204 | 9.68k | case qpdf_o_generate: |
2205 | 9.68k | generateObjectStreams(); |
2206 | 9.68k | break; |
2207 | | |
2208 | | // no default so gcc will warn for missing case tag |
2209 | 9.75k | } |
2210 | | |
2211 | 9.66k | if (!m->obj.streams_empty) { |
2212 | 9.62k | if (m->linearized) { |
2213 | | // Page dictionaries are not allowed to be compressed objects. |
2214 | 16.8k | for (auto& page: m->pdf.getAllPages()) { |
2215 | 16.8k | if (m->obj[page].object_stream > 0) { |
2216 | 15.1k | QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); |
2217 | 15.1k | m->obj[page].object_stream = 0; |
2218 | 15.1k | } |
2219 | 16.8k | } |
2220 | 9.62k | } |
2221 | | |
2222 | 9.62k | if (m->linearized || m->encryption) { |
2223 | | // The document catalog is not allowed to be compressed in linearized files either. It |
2224 | | // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to |
2225 | | // handle encrypted files with compressed document catalogs, so we disable them in that |
2226 | | // case as well. |
2227 | 9.62k | if (m->obj[m->root_og].object_stream > 0) { |
2228 | 8.01k | QTC::TC("qpdf", "QPDFWriter uncompressing root"); |
2229 | 8.01k | m->obj[m->root_og].object_stream = 0; |
2230 | 8.01k | } |
2231 | 9.62k | } |
2232 | | |
2233 | | // Generate reverse mapping from object stream to objects |
2234 | 3.09M | m->obj.forEach([this](auto id, auto const& item) -> void { |
2235 | 3.09M | if (item.object_stream > 0) { |
2236 | 100k | auto& vec = m->object_stream_to_objects[item.object_stream]; |
2237 | 100k | vec.emplace_back(id, item.gen); |
2238 | 100k | if (m->max_ostream_index < vec.size()) { |
2239 | 46.1k | ++m->max_ostream_index; |
2240 | 46.1k | } |
2241 | 100k | } |
2242 | 3.09M | }); |
2243 | 9.62k | --m->max_ostream_index; |
2244 | | |
2245 | 9.62k | if (m->object_stream_to_objects.empty()) { |
2246 | 707 | m->obj.streams_empty = true; |
2247 | 8.92k | } else { |
2248 | 8.92k | setMinimumPDFVersion("1.5"); |
2249 | 8.92k | } |
2250 | 9.62k | } |
2251 | | |
2252 | 9.66k | setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel()); |
2253 | 9.66k | m->final_pdf_version = m->min_pdf_version; |
2254 | 9.66k | m->final_extension_level = m->min_extension_level; |
2255 | 9.66k | if (!m->forced_pdf_version.empty()) { |
2256 | 0 | QTC::TC("qpdf", "QPDFWriter using forced PDF version"); |
2257 | 0 | m->final_pdf_version = m->forced_pdf_version; |
2258 | 0 | m->final_extension_level = m->forced_extension_level; |
2259 | 0 | } |
2260 | 9.66k | } |
2261 | | |
2262 | | void |
2263 | | QPDFWriter::write() |
2264 | 9.75k | { |
2265 | 9.75k | doWriteSetup(); |
2266 | | |
2267 | | // Set up progress reporting. For linearized files, we write two passes. events_expected is an |
2268 | | // approximation, but it's good enough for progress reporting, which is mostly a guess anyway. |
2269 | 9.75k | m->events_expected = QIntC::to_int(m->pdf.getObjectCount() * (m->linearized ? 2 : 1)); |
2270 | | |
2271 | 9.75k | prepareFileForWrite(); |
2272 | | |
2273 | 9.75k | if (m->linearized) { |
2274 | 9.51k | writeLinearized(); |
2275 | 9.51k | } else { |
2276 | 248 | writeStandard(); |
2277 | 248 | } |
2278 | | |
2279 | 9.75k | m->pipeline->finish(); |
2280 | 9.75k | if (m->close_file) { |
2281 | 0 | fclose(m->file); |
2282 | 0 | } |
2283 | 9.75k | m->file = nullptr; |
2284 | 9.75k | if (m->buffer_pipeline) { |
2285 | 0 | m->output_buffer = m->buffer_pipeline->getBuffer(); |
2286 | 0 | m->buffer_pipeline = nullptr; |
2287 | 0 | } |
2288 | 9.75k | indicateProgress(false, true); |
2289 | 9.75k | } |
2290 | | |
2291 | | QPDFObjGen |
2292 | | QPDFWriter::getRenumberedObjGen(QPDFObjGen og) |
2293 | 0 | { |
2294 | 0 | return {m->obj[og].renumber, 0}; |
2295 | 0 | } |
2296 | | |
2297 | | std::map<QPDFObjGen, QPDFXRefEntry> |
2298 | | QPDFWriter::getWrittenXRefTable() |
2299 | 0 | { |
2300 | 0 | std::map<QPDFObjGen, QPDFXRefEntry> result; |
2301 | |
|
2302 | 0 | auto it = result.begin(); |
2303 | 0 | m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void { |
2304 | 0 | if (item.xref.getType() != 0) { |
2305 | 0 | it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref); |
2306 | 0 | } |
2307 | 0 | }); |
2308 | 0 | return result; |
2309 | 0 | } |
2310 | | |
2311 | | void |
2312 | | QPDFWriter::enqueuePart(std::vector<QPDFObjectHandle>& part) |
2313 | 37.3k | { |
2314 | 81.9k | for (auto const& oh: part) { |
2315 | 81.9k | enqueueObject(oh); |
2316 | 81.9k | } |
2317 | 37.3k | } |
2318 | | |
2319 | | void |
2320 | | QPDFWriter::writeEncryptionDictionary() |
2321 | 0 | { |
2322 | 0 | m->encryption_dict_objid = openObject(m->encryption_dict_objid); |
2323 | 0 | auto& enc = *m->encryption; |
2324 | 0 | auto const V = enc.getV(); |
2325 | |
|
2326 | 0 | write("<<"); |
2327 | 0 | if (V >= 4) { |
2328 | 0 | write(" /CF << /StdCF << /AuthEvent /DocOpen /CFM "); |
2329 | 0 | write(m->encrypt_use_aes ? ((V < 5) ? "/AESV2" : "/AESV3") : "/V2"); |
2330 | | // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of |
2331 | | // MacOS won't open encrypted files without it. |
2332 | 0 | write((V < 5) ? " /Length 16 >> >>" : " /Length 32 >> >>"); |
2333 | 0 | if (!m->encryption->getEncryptMetadata()) { |
2334 | 0 | write(" /EncryptMetadata false"); |
2335 | 0 | } |
2336 | 0 | } |
2337 | 0 | write(" /Filter /Standard /Length ").write(enc.getLengthBytes() * 8); |
2338 | 0 | write(" /O ").write_string(enc.getO(), true); |
2339 | 0 | if (V >= 4) { |
2340 | 0 | write(" /OE ").write_string(enc.getOE(), true); |
2341 | 0 | } |
2342 | 0 | write(" /P ").write(enc.getP()); |
2343 | 0 | if (V >= 5) { |
2344 | 0 | write(" /Perms ").write_string(enc.getPerms(), true); |
2345 | 0 | } |
2346 | 0 | write(" /R ").write(enc.getR()); |
2347 | |
|
2348 | 0 | if (V >= 4) { |
2349 | 0 | write(" /StmF /StdCF /StrF /StdCF"); |
2350 | 0 | } |
2351 | 0 | write(" /U ").write_string(enc.getU(), true); |
2352 | 0 | if (V >= 4) { |
2353 | 0 | write(" /UE ").write_string(enc.getUE(), true); |
2354 | 0 | } |
2355 | 0 | write(" /V ").write(enc.getV()).write(" >>"); |
2356 | 0 | closeObject(m->encryption_dict_objid); |
2357 | 0 | } |
2358 | | |
2359 | | std::string |
2360 | | QPDFWriter::getFinalVersion() |
2361 | 0 | { |
2362 | 0 | doWriteSetup(); |
2363 | 0 | return m->final_pdf_version; |
2364 | 0 | } |
2365 | | |
2366 | | void |
2367 | | QPDFWriter::writeHeader() |
2368 | 13.6k | { |
2369 | 13.6k | write("%PDF-").write(m->final_pdf_version); |
2370 | 13.6k | if (m->pclm) { |
2371 | | // PCLm version |
2372 | 0 | write("\n%PCLm 1.0\n"); |
2373 | 13.6k | } else { |
2374 | | // This string of binary characters would not be valid UTF-8, so it really should be treated |
2375 | | // as binary. |
2376 | 13.6k | write("\n%\xbf\xf7\xa2\xfe\n"); |
2377 | 13.6k | } |
2378 | 13.6k | write_qdf("%QDF-1.0\n\n"); |
2379 | | |
2380 | | // Note: do not write extra header text here. Linearized PDFs must include the entire |
2381 | | // linearization parameter dictionary within the first 1024 characters of the PDF file, so for |
2382 | | // linearized files, we have to write extra header text after the linearization parameter |
2383 | | // dictionary. |
2384 | 13.6k | } |
2385 | | |
2386 | | void |
2387 | | QPDFWriter::writeHintStream(int hint_id) |
2388 | 6.55k | { |
2389 | 6.55k | std::string hint_buffer; |
2390 | 6.55k | int S = 0; |
2391 | 6.55k | int O = 0; |
2392 | 6.55k | bool compressed = m->compress_streams && !m->qdf_mode; |
2393 | 6.55k | QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed); |
2394 | | |
2395 | 6.55k | openObject(hint_id); |
2396 | 6.55k | setDataKey(hint_id); |
2397 | | |
2398 | 6.55k | size_t hlen = hint_buffer.size(); |
2399 | | |
2400 | 6.55k | write("<< "); |
2401 | 6.55k | if (compressed) { |
2402 | 6.55k | write("/Filter /FlateDecode "); |
2403 | 6.55k | } |
2404 | 6.55k | write("/S ").write(S); |
2405 | 6.55k | if (O) { |
2406 | 168 | write(" /O ").write(O); |
2407 | 168 | } |
2408 | 6.55k | adjustAESStreamLength(hlen); |
2409 | 6.55k | write(" /Length ").write(hlen); |
2410 | 6.55k | write(" >>\nstream\n").write_encrypted(hint_buffer); |
2411 | | |
2412 | 6.55k | if (m->encryption) { |
2413 | 0 | QTC::TC("qpdf", "QPDFWriter encrypted hint stream"); |
2414 | 0 | } |
2415 | | |
2416 | 6.55k | write(hint_buffer.empty() || hint_buffer.back() != '\n' ? "\nendstream" : "endstream"); |
2417 | 6.55k | closeObject(hint_id); |
2418 | 6.55k | } |
2419 | | |
2420 | | qpdf_offset_t |
2421 | | QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size) |
2422 | 0 | { |
2423 | | // There are too many extra arguments to replace overloaded function with defaults in the header |
2424 | | // file...too much risk of leaving something off. |
2425 | 0 | return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0); |
2426 | 0 | } |
2427 | | |
2428 | | qpdf_offset_t |
2429 | | QPDFWriter::writeXRefTable( |
2430 | | trailer_e which, |
2431 | | int first, |
2432 | | int last, |
2433 | | int size, |
2434 | | qpdf_offset_t prev, |
2435 | | bool suppress_offsets, |
2436 | | int hint_id, |
2437 | | qpdf_offset_t hint_offset, |
2438 | | qpdf_offset_t hint_length, |
2439 | | int linearization_pass) |
2440 | 1.14k | { |
2441 | 1.14k | write("xref\n").write(first).write(" ").write(last - first + 1); |
2442 | 1.14k | qpdf_offset_t space_before_zero = m->pipeline->getCount(); |
2443 | 1.14k | write("\n"); |
2444 | 1.14k | if (first == 0) { |
2445 | 563 | write("0000000000 65535 f \n"); |
2446 | 563 | ++first; |
2447 | 563 | } |
2448 | 12.6k | for (int i = first; i <= last; ++i) { |
2449 | 11.5k | qpdf_offset_t offset = 0; |
2450 | 11.5k | if (!suppress_offsets) { |
2451 | 5.58k | offset = m->new_obj[i].xref.getOffset(); |
2452 | 5.58k | if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) { |
2453 | 3.76k | offset += hint_length; |
2454 | 3.76k | } |
2455 | 5.58k | } |
2456 | 11.5k | write(QUtil::int_to_string(offset, 10)).write(" 00000 n \n"); |
2457 | 11.5k | } |
2458 | 1.14k | writeTrailer(which, size, false, prev, linearization_pass); |
2459 | 1.14k | write("\n"); |
2460 | 1.14k | return space_before_zero; |
2461 | 1.14k | } |
2462 | | |
2463 | | qpdf_offset_t |
2464 | | QPDFWriter::writeXRefStream( |
2465 | | int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size) |
2466 | 0 | { |
2467 | | // There are too many extra arguments to replace overloaded function with defaults in the header |
2468 | | // file...too much risk of leaving something off. |
2469 | 0 | return writeXRefStream( |
2470 | 0 | objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0); |
2471 | 0 | } |
2472 | | |
2473 | | qpdf_offset_t |
2474 | | QPDFWriter::writeXRefStream( |
2475 | | int xref_id, |
2476 | | int max_id, |
2477 | | qpdf_offset_t max_offset, |
2478 | | trailer_e which, |
2479 | | int first, |
2480 | | int last, |
2481 | | int size, |
2482 | | qpdf_offset_t prev, |
2483 | | int hint_id, |
2484 | | qpdf_offset_t hint_offset, |
2485 | | qpdf_offset_t hint_length, |
2486 | | bool skip_compression, |
2487 | | int linearization_pass) |
2488 | 25.5k | { |
2489 | 25.5k | qpdf_offset_t xref_offset = m->pipeline->getCount(); |
2490 | 25.5k | qpdf_offset_t space_before_zero = xref_offset - 1; |
2491 | | |
2492 | | // field 1 contains offsets and object stream identifiers |
2493 | 25.5k | unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id)); |
2494 | | |
2495 | | // field 2 contains object stream indices |
2496 | 25.5k | unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index)); |
2497 | | |
2498 | 25.5k | unsigned int esize = 1 + f1_size + f2_size; |
2499 | | |
2500 | | // Must store in xref table in advance of writing the actual data rather than waiting for |
2501 | | // openObject to do it. |
2502 | 25.5k | m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount()); |
2503 | | |
2504 | 25.5k | std::string xref_data; |
2505 | 25.5k | const bool compressed = m->compress_streams && !m->qdf_mode; |
2506 | 25.5k | { |
2507 | 25.5k | auto pp_xref = m->pipeline_stack.activate(xref_data); |
2508 | | |
2509 | 382k | for (int i = first; i <= last; ++i) { |
2510 | 357k | QPDFXRefEntry& e = m->new_obj[i].xref; |
2511 | 357k | switch (e.getType()) { |
2512 | 98.7k | case 0: |
2513 | 98.7k | writeBinary(0, 1); |
2514 | 98.7k | writeBinary(0, f1_size); |
2515 | 98.7k | writeBinary(0, f2_size); |
2516 | 98.7k | break; |
2517 | | |
2518 | 126k | case 1: |
2519 | 126k | { |
2520 | 126k | qpdf_offset_t offset = e.getOffset(); |
2521 | 126k | if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) { |
2522 | 37.8k | offset += hint_length; |
2523 | 37.8k | } |
2524 | 126k | writeBinary(1, 1); |
2525 | 126k | writeBinary(QIntC::to_ulonglong(offset), f1_size); |
2526 | 126k | writeBinary(0, f2_size); |
2527 | 126k | } |
2528 | 126k | break; |
2529 | | |
2530 | 132k | case 2: |
2531 | 132k | writeBinary(2, 1); |
2532 | 132k | writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size); |
2533 | 132k | writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size); |
2534 | 132k | break; |
2535 | | |
2536 | 0 | default: |
2537 | 0 | throw std::logic_error("invalid type writing xref stream"); |
2538 | 0 | break; |
2539 | 357k | } |
2540 | 357k | } |
2541 | 25.5k | } |
2542 | | |
2543 | 25.5k | if (compressed) { |
2544 | 25.5k | xref_data = pl::pipe<Pl_PNGFilter>(xref_data, Pl_PNGFilter::a_encode, esize); |
2545 | 25.5k | if (!skip_compression) { |
2546 | | // Write the stream dictionary for compression but don't actually compress. This |
2547 | | // helps us with computation of padding for pass 1 of linearization. |
2548 | 12.5k | xref_data = pl::pipe<Pl_Flate>(xref_data, Pl_Flate::a_deflate); |
2549 | 12.5k | } |
2550 | 25.5k | } |
2551 | | |
2552 | 25.5k | openObject(xref_id); |
2553 | 25.5k | write("<<").write_qdf("\n ").write(" /Type /XRef").write_qdf("\n "); |
2554 | 25.5k | write(" /Length ").write(xref_data.size()); |
2555 | 25.5k | if (compressed) { |
2556 | 25.5k | write_qdf("\n ").write(" /Filter /FlateDecode").write_qdf("\n "); |
2557 | 25.5k | write(" /DecodeParms << /Columns ").write(esize).write(" /Predictor 12 >>"); |
2558 | 25.5k | } |
2559 | 25.5k | write_qdf("\n ").write(" /W [ 1 ").write(f1_size).write(" ").write(f2_size).write(" ]"); |
2560 | 25.5k | if (!(first == 0 && last == (size - 1))) { |
2561 | 13.0k | write(" /Index [ ").write(first).write(" ").write(last - first + 1).write(" ]"); |
2562 | 13.0k | } |
2563 | 25.5k | writeTrailer(which, size, true, prev, linearization_pass); |
2564 | 25.5k | write("\nstream\n").write(xref_data).write("\nendstream"); |
2565 | 25.5k | closeObject(xref_id); |
2566 | 25.5k | return space_before_zero; |
2567 | 25.5k | } |
2568 | | |
2569 | | size_t |
2570 | | QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes) |
2571 | 12.9k | { |
2572 | | // This routine is called right after a linearization first pass xref stream has been written |
2573 | | // without compression. Calculate the amount of padding that would be required in the worst |
2574 | | // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is |
2575 | | // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add |
2576 | | // 10 extra bytes for number length increases. |
2577 | | |
2578 | 12.9k | return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384))); |
2579 | 12.9k | } |
2580 | | |
2581 | | void |
2582 | | QPDFWriter::writeLinearized() |
2583 | 9.51k | { |
2584 | | // Optimize file and enqueue objects in order |
2585 | | |
2586 | 9.51k | std::map<int, int> stream_cache; |
2587 | | |
2588 | 55.5k | auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) { |
2589 | 55.5k | auto& result = stream_cache[stream.getObjectID()]; |
2590 | 55.5k | if (result == 0) { |
2591 | 25.8k | bool compress_stream; |
2592 | 25.8k | bool is_metadata; |
2593 | 25.8k | if (willFilterStream(stream, compress_stream, is_metadata, nullptr)) { |
2594 | 14.9k | result = 2; |
2595 | 14.9k | } else { |
2596 | 10.8k | result = 1; |
2597 | 10.8k | } |
2598 | 25.8k | } |
2599 | 55.5k | return result; |
2600 | 55.5k | }; |
2601 | | |
2602 | 9.51k | QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters); |
2603 | | |
2604 | 9.51k | std::vector<QPDFObjectHandle> part4; |
2605 | 9.51k | std::vector<QPDFObjectHandle> part6; |
2606 | 9.51k | std::vector<QPDFObjectHandle> part7; |
2607 | 9.51k | std::vector<QPDFObjectHandle> part8; |
2608 | 9.51k | std::vector<QPDFObjectHandle> part9; |
2609 | 9.51k | QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9); |
2610 | | |
2611 | | // Object number sequence: |
2612 | | // |
2613 | | // second half |
2614 | | // second half uncompressed objects |
2615 | | // second half xref stream, if any |
2616 | | // second half compressed objects |
2617 | | // first half |
2618 | | // linearization dictionary |
2619 | | // first half xref stream, if any |
2620 | | // part 4 uncompresesd objects |
2621 | | // encryption dictionary, if any |
2622 | | // hint stream |
2623 | | // part 6 uncompressed objects |
2624 | | // first half compressed objects |
2625 | | // |
2626 | | |
2627 | | // Second half objects |
2628 | 9.51k | int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size()); |
2629 | 9.51k | int second_half_first_obj = 1; |
2630 | 9.51k | int after_second_half = 1 + second_half_uncompressed; |
2631 | 9.51k | m->next_objid = after_second_half; |
2632 | 9.51k | int second_half_xref = 0; |
2633 | 9.51k | bool need_xref_stream = !m->obj.streams_empty; |
2634 | 9.51k | if (need_xref_stream) { |
2635 | 7.01k | second_half_xref = m->next_objid++; |
2636 | 7.01k | } |
2637 | | // Assign numbers to all compressed objects in the second half. |
2638 | 9.51k | std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9}; |
2639 | 32.1k | for (int i = 0; i < 3; ++i) { |
2640 | 35.0k | for (auto const& oh: *vecs2[i]) { |
2641 | 35.0k | assignCompressedObjectNumbers(oh.getObjGen()); |
2642 | 35.0k | } |
2643 | 22.6k | } |
2644 | 9.51k | int second_half_end = m->next_objid - 1; |
2645 | 9.51k | int second_trailer_size = m->next_objid; |
2646 | | |
2647 | | // First half objects |
2648 | 9.51k | int first_half_start = m->next_objid; |
2649 | 9.51k | int lindict_id = m->next_objid++; |
2650 | 9.51k | int first_half_xref = 0; |
2651 | 9.51k | if (need_xref_stream) { |
2652 | 7.01k | first_half_xref = m->next_objid++; |
2653 | 7.01k | } |
2654 | 9.51k | int part4_first_obj = m->next_objid; |
2655 | 9.51k | m->next_objid += QIntC::to_int(part4.size()); |
2656 | 9.51k | int after_part4 = m->next_objid; |
2657 | 9.51k | if (m->encryption) { |
2658 | 0 | m->encryption_dict_objid = m->next_objid++; |
2659 | 0 | } |
2660 | 9.51k | int hint_id = m->next_objid++; |
2661 | 9.51k | int part6_first_obj = m->next_objid; |
2662 | 9.51k | m->next_objid += QIntC::to_int(part6.size()); |
2663 | 9.51k | int after_part6 = m->next_objid; |
2664 | | // Assign numbers to all compressed objects in the first half |
2665 | 9.51k | std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6}; |
2666 | 24.6k | for (int i = 0; i < 2; ++i) { |
2667 | 47.3k | for (auto const& oh: *vecs1[i]) { |
2668 | 47.3k | assignCompressedObjectNumbers(oh.getObjGen()); |
2669 | 47.3k | } |
2670 | 15.0k | } |
2671 | 9.51k | int first_half_end = m->next_objid - 1; |
2672 | 9.51k | int first_trailer_size = m->next_objid; |
2673 | | |
2674 | 9.51k | int part4_end_marker = part4.back().getObjectID(); |
2675 | 9.51k | int part6_end_marker = part6.back().getObjectID(); |
2676 | 9.51k | qpdf_offset_t space_before_zero = 0; |
2677 | 9.51k | qpdf_offset_t file_size = 0; |
2678 | 9.51k | qpdf_offset_t part6_end_offset = 0; |
2679 | 9.51k | qpdf_offset_t first_half_max_obj_offset = 0; |
2680 | 9.51k | qpdf_offset_t second_xref_offset = 0; |
2681 | 9.51k | qpdf_offset_t first_xref_end = 0; |
2682 | 9.51k | qpdf_offset_t second_xref_end = 0; |
2683 | | |
2684 | 9.51k | m->next_objid = part4_first_obj; |
2685 | 9.51k | enqueuePart(part4); |
2686 | 9.51k | if (m->next_objid != after_part4) { |
2687 | | // This can happen with very botched files as in the fuzzer test. There are likely some |
2688 | | // faulty assumptions in calculateLinearizationData |
2689 | 9 | throw std::runtime_error("error encountered after writing part 4 of linearized data"); |
2690 | 9 | } |
2691 | 9.50k | m->next_objid = part6_first_obj; |
2692 | 9.50k | enqueuePart(part6); |
2693 | 9.50k | if (m->next_objid != after_part6) { |
2694 | 130 | throw std::runtime_error("error encountered after writing part 6 of linearized data"); |
2695 | 130 | } |
2696 | 9.37k | m->next_objid = second_half_first_obj; |
2697 | 9.37k | enqueuePart(part7); |
2698 | 9.37k | enqueuePart(part8); |
2699 | 9.37k | enqueuePart(part9); |
2700 | 9.37k | if (m->next_objid != after_second_half) { |
2701 | 339 | throw std::runtime_error("error encountered after writing part 9 of linearized data"); |
2702 | 339 | } |
2703 | | |
2704 | 9.03k | qpdf_offset_t hint_length = 0; |
2705 | 9.03k | std::string hint_buffer; |
2706 | | |
2707 | | // Write file in two passes. Part numbers refer to PDF spec 1.4. |
2708 | | |
2709 | 9.03k | FILE* lin_pass1_file = nullptr; |
2710 | 9.03k | auto pp_pass1 = m->pipeline_stack.popper(); |
2711 | 9.03k | auto pp_md5 = m->pipeline_stack.popper(); |
2712 | 13.6k | for (int pass: {1, 2}) { |
2713 | 13.6k | if (pass == 1) { |
2714 | 7.07k | if (!m->lin_pass1_filename.empty()) { |
2715 | 0 | lin_pass1_file = QUtil::safe_fopen(m->lin_pass1_filename.c_str(), "wb"); |
2716 | 0 | m->pipeline_stack.activate( |
2717 | 0 | pp_pass1, |
2718 | 0 | std::make_unique<Pl_StdioFile>("linearization pass1", lin_pass1_file)); |
2719 | 7.07k | } else { |
2720 | 7.07k | m->pipeline_stack.activate(pp_pass1, true); |
2721 | 7.07k | } |
2722 | 7.07k | if (m->deterministic_id) { |
2723 | 7.07k | m->pipeline_stack.activate_md5(pp_md5); |
2724 | 7.07k | } |
2725 | 7.07k | } |
2726 | | |
2727 | | // Part 1: header |
2728 | | |
2729 | 13.6k | writeHeader(); |
2730 | | |
2731 | | // Part 2: linearization parameter dictionary. Save enough space to write real dictionary. |
2732 | | // 200 characters is enough space if all numerical values in the parameter dictionary that |
2733 | | // contain offsets are 20 digits long plus a few extra characters for safety. The entire |
2734 | | // linearization parameter dictionary must appear within the first 1024 characters of the |
2735 | | // file. |
2736 | | |
2737 | 13.6k | qpdf_offset_t pos = m->pipeline->getCount(); |
2738 | 13.6k | openObject(lindict_id); |
2739 | 13.6k | write("<<"); |
2740 | 13.6k | if (pass == 2) { |
2741 | 6.55k | std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages(); |
2742 | 6.55k | int first_page_object = m->obj[pages.at(0)].renumber; |
2743 | | |
2744 | 6.55k | write(" /Linearized 1 /L ").write(file_size + hint_length); |
2745 | | // Implementation note 121 states that a space is mandatory after this open bracket. |
2746 | 6.55k | write(" /H [ ").write(m->new_obj[hint_id].xref.getOffset()).write(" "); |
2747 | 6.55k | write(hint_length); |
2748 | 6.55k | write(" ] /O ").write(first_page_object); |
2749 | 6.55k | write(" /E ").write(part6_end_offset + hint_length); |
2750 | 6.55k | write(" /N ").write(pages.size()); |
2751 | 6.55k | write(" /T ").write(space_before_zero + hint_length); |
2752 | 6.55k | } |
2753 | 13.6k | write(" >>"); |
2754 | 13.6k | closeObject(lindict_id); |
2755 | 13.6k | static int const pad = 200; |
2756 | 13.6k | write(QIntC::to_size(pos - m->pipeline->getCount() + pad), ' ').write("\n"); |
2757 | | |
2758 | | // If the user supplied any additional header text, write it here after the linearization |
2759 | | // parameter dictionary. |
2760 | 13.6k | write(m->extra_header_text); |
2761 | | |
2762 | | // Part 3: first page cross reference table and trailer. |
2763 | | |
2764 | 13.6k | qpdf_offset_t first_xref_offset = m->pipeline->getCount(); |
2765 | 13.6k | qpdf_offset_t hint_offset = 0; |
2766 | 13.6k | if (pass == 2) { |
2767 | 6.55k | hint_offset = m->new_obj[hint_id].xref.getOffset(); |
2768 | 6.55k | } |
2769 | 13.6k | if (need_xref_stream) { |
2770 | | // Must pad here too. |
2771 | 13.0k | if (pass == 1) { |
2772 | | // Set first_half_max_obj_offset to a value large enough to force four bytes to be |
2773 | | // reserved for each file offset. This would provide adequate space for the xref |
2774 | | // stream as long as the last object in page 1 starts with in the first 4 GB of the |
2775 | | // file, which is extremely likely. In the second pass, we will know the actual |
2776 | | // value for this, but it's okay if it's smaller. |
2777 | 6.77k | first_half_max_obj_offset = 1 << 25; |
2778 | 6.77k | } |
2779 | 13.0k | pos = m->pipeline->getCount(); |
2780 | 13.0k | writeXRefStream( |
2781 | 13.0k | first_half_xref, |
2782 | 13.0k | first_half_end, |
2783 | 13.0k | first_half_max_obj_offset, |
2784 | 13.0k | t_lin_first, |
2785 | 13.0k | first_half_start, |
2786 | 13.0k | first_half_end, |
2787 | 13.0k | first_trailer_size, |
2788 | 13.0k | hint_length + second_xref_offset, |
2789 | 13.0k | hint_id, |
2790 | 13.0k | hint_offset, |
2791 | 13.0k | hint_length, |
2792 | 13.0k | (pass == 1), |
2793 | 13.0k | pass); |
2794 | 13.0k | qpdf_offset_t endpos = m->pipeline->getCount(); |
2795 | 13.0k | if (pass == 1) { |
2796 | | // Pad so we have enough room for the real xref stream. |
2797 | 6.64k | write(calculateXrefStreamPadding(endpos - pos), ' '); |
2798 | 6.64k | first_xref_end = m->pipeline->getCount(); |
2799 | 6.64k | } else { |
2800 | | // Pad so that the next object starts at the same place as in pass 1. |
2801 | 6.40k | write(QIntC::to_size(first_xref_end - endpos), ' '); |
2802 | | |
2803 | 6.40k | if (m->pipeline->getCount() != first_xref_end) { |
2804 | 0 | throw std::logic_error( |
2805 | 0 | "insufficient padding for first pass xref stream; first_xref_end=" + |
2806 | 0 | std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos)); |
2807 | 0 | } |
2808 | 6.40k | } |
2809 | 13.0k | write("\n"); |
2810 | 13.0k | } else { |
2811 | 577 | writeXRefTable( |
2812 | 577 | t_lin_first, |
2813 | 577 | first_half_start, |
2814 | 577 | first_half_end, |
2815 | 577 | first_trailer_size, |
2816 | 577 | hint_length + second_xref_offset, |
2817 | 577 | (pass == 1), |
2818 | 577 | hint_id, |
2819 | 577 | hint_offset, |
2820 | 577 | hint_length, |
2821 | 577 | pass); |
2822 | 577 | write("startxref\n0\n%%EOF\n"); |
2823 | 577 | } |
2824 | | |
2825 | | // Parts 4 through 9 |
2826 | | |
2827 | 126k | for (auto const& cur_object: m->object_queue) { |
2828 | 126k | if (cur_object.getObjectID() == part6_end_marker) { |
2829 | 13.4k | first_half_max_obj_offset = m->pipeline->getCount(); |
2830 | 13.4k | } |
2831 | 126k | writeObject(cur_object); |
2832 | 126k | if (cur_object.getObjectID() == part4_end_marker) { |
2833 | 13.4k | if (m->encryption) { |
2834 | 0 | writeEncryptionDictionary(); |
2835 | 0 | } |
2836 | 13.4k | if (pass == 1) { |
2837 | 6.93k | m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount()); |
2838 | 6.93k | } else { |
2839 | | // Part 5: hint stream |
2840 | 6.54k | write(hint_buffer); |
2841 | 6.54k | } |
2842 | 13.4k | } |
2843 | 126k | if (cur_object.getObjectID() == part6_end_marker) { |
2844 | 13.3k | part6_end_offset = m->pipeline->getCount(); |
2845 | 13.3k | } |
2846 | 126k | } |
2847 | | |
2848 | | // Part 10: overflow hint stream -- not used |
2849 | | |
2850 | | // Part 11: main cross reference table and trailer |
2851 | | |
2852 | 13.6k | second_xref_offset = m->pipeline->getCount(); |
2853 | 13.6k | if (need_xref_stream) { |
2854 | 12.5k | pos = m->pipeline->getCount(); |
2855 | 12.5k | space_before_zero = writeXRefStream( |
2856 | 12.5k | second_half_xref, |
2857 | 12.5k | second_half_end, |
2858 | 12.5k | second_xref_offset, |
2859 | 12.5k | t_lin_second, |
2860 | 12.5k | 0, |
2861 | 12.5k | second_half_end, |
2862 | 12.5k | second_trailer_size, |
2863 | 12.5k | 0, |
2864 | 12.5k | 0, |
2865 | 12.5k | 0, |
2866 | 12.5k | 0, |
2867 | 12.5k | (pass == 1), |
2868 | 12.5k | pass); |
2869 | 12.5k | qpdf_offset_t endpos = m->pipeline->getCount(); |
2870 | | |
2871 | 12.5k | if (pass == 1) { |
2872 | | // Pad so we have enough room for the real xref stream. See comments for previous |
2873 | | // xref stream on how we calculate the padding. |
2874 | 6.26k | write(calculateXrefStreamPadding(endpos - pos), ' ').write("\n"); |
2875 | 6.26k | second_xref_end = m->pipeline->getCount(); |
2876 | 6.26k | } else { |
2877 | | // Make the file size the same. |
2878 | 6.24k | auto padding = |
2879 | 6.24k | QIntC::to_size(second_xref_end + hint_length - 1 - m->pipeline->getCount()); |
2880 | 6.24k | write(padding, ' ').write("\n"); |
2881 | | |
2882 | | // If this assertion fails, maybe we didn't have enough padding above. |
2883 | 6.24k | if (m->pipeline->getCount() != second_xref_end + hint_length) { |
2884 | 0 | throw std::logic_error( |
2885 | 0 | "count mismatch after xref stream; possible insufficient padding?"); |
2886 | 0 | } |
2887 | 6.24k | } |
2888 | 12.5k | } else { |
2889 | 1.10k | space_before_zero = writeXRefTable( |
2890 | 1.10k | t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass); |
2891 | 1.10k | } |
2892 | 13.6k | write("startxref\n").write(first_xref_offset).write("\n%%EOF\n"); |
2893 | | |
2894 | 13.6k | if (pass == 1) { |
2895 | 6.55k | if (m->deterministic_id) { |
2896 | 6.55k | QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1); |
2897 | 6.55k | computeDeterministicIDData(); |
2898 | 6.55k | pp_md5.pop(); |
2899 | 6.55k | } |
2900 | | |
2901 | | // Close first pass pipeline |
2902 | 6.55k | file_size = m->pipeline->getCount(); |
2903 | 6.55k | pp_pass1.pop(); |
2904 | | |
2905 | | // Save hint offset since it will be set to zero by calling openObject. |
2906 | 6.55k | qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset(); |
2907 | | |
2908 | | // Write hint stream to a buffer |
2909 | 6.55k | { |
2910 | 6.55k | auto pp_hint = m->pipeline_stack.activate(hint_buffer); |
2911 | 6.55k | writeHintStream(hint_id); |
2912 | 6.55k | } |
2913 | 6.55k | hint_length = QIntC::to_offset(hint_buffer.size()); |
2914 | | |
2915 | | // Restore hint offset |
2916 | 6.55k | m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1); |
2917 | 6.55k | if (lin_pass1_file) { |
2918 | | // Write some debugging information |
2919 | 0 | fprintf( |
2920 | 0 | lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str()); |
2921 | 0 | fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str()); |
2922 | 0 | fprintf( |
2923 | 0 | lin_pass1_file, |
2924 | 0 | "%% second_xref_offset=%s\n", |
2925 | 0 | std::to_string(second_xref_offset).c_str()); |
2926 | 0 | fprintf( |
2927 | 0 | lin_pass1_file, |
2928 | 0 | "%% second_xref_end=%s\n", |
2929 | 0 | std::to_string(second_xref_end).c_str()); |
2930 | 0 | fclose(lin_pass1_file); |
2931 | 0 | lin_pass1_file = nullptr; |
2932 | 0 | } |
2933 | 6.55k | } |
2934 | 13.6k | } |
2935 | 9.03k | } |
2936 | | |
2937 | | void |
2938 | | QPDFWriter::enqueueObjectsStandard() |
2939 | 0 | { |
2940 | 0 | if (m->preserve_unreferenced_objects) { |
2941 | 0 | QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard"); |
2942 | 0 | for (auto const& oh: m->pdf.getAllObjects()) { |
2943 | 0 | enqueueObject(oh); |
2944 | 0 | } |
2945 | 0 | } |
2946 | | |
2947 | | // Put root first on queue. |
2948 | 0 | QPDFObjectHandle trailer = getTrimmedTrailer(); |
2949 | 0 | enqueueObject(trailer.getKey("/Root")); |
2950 | | |
2951 | | // Next place any other objects referenced from the trailer dictionary into the queue, handling |
2952 | | // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op. |
2953 | 0 | for (auto& item: trailer.as_dictionary()) { |
2954 | 0 | if (!item.second.null()) { |
2955 | 0 | enqueueObject(item.second); |
2956 | 0 | } |
2957 | 0 | } |
2958 | 0 | } |
2959 | | |
2960 | | void |
2961 | | QPDFWriter::enqueueObjectsPCLm() |
2962 | 0 | { |
2963 | | // Image transform stream content for page strip images. Each of this new stream has to come |
2964 | | // after every page image strip written in the pclm file. |
2965 | 0 | std::string image_transform_content = "q /image Do Q\n"; |
2966 | | |
2967 | | // enqueue all pages first |
2968 | 0 | std::vector<QPDFObjectHandle> all = m->pdf.getAllPages(); |
2969 | 0 | for (auto& page: all) { |
2970 | | // enqueue page |
2971 | 0 | enqueueObject(page); |
2972 | | |
2973 | | // enqueue page contents stream |
2974 | 0 | enqueueObject(page.getKey("/Contents")); |
2975 | | |
2976 | | // enqueue all the strips for each page |
2977 | 0 | QPDFObjectHandle strips = page.getKey("/Resources").getKey("/XObject"); |
2978 | 0 | for (auto& image: strips.as_dictionary()) { |
2979 | 0 | if (!image.second.null()) { |
2980 | 0 | enqueueObject(image.second); |
2981 | 0 | enqueueObject(QPDFObjectHandle::newStream(&m->pdf, image_transform_content)); |
2982 | 0 | } |
2983 | 0 | } |
2984 | 0 | } |
2985 | | |
2986 | | // Put root in queue. |
2987 | 0 | QPDFObjectHandle trailer = getTrimmedTrailer(); |
2988 | 0 | enqueueObject(trailer.getKey("/Root")); |
2989 | 0 | } |
2990 | | |
2991 | | void |
2992 | | QPDFWriter::indicateProgress(bool decrement, bool finished) |
2993 | 467k | { |
2994 | 467k | if (decrement) { |
2995 | 174k | --m->events_seen; |
2996 | 174k | return; |
2997 | 174k | } |
2998 | | |
2999 | 293k | ++m->events_seen; |
3000 | | |
3001 | 293k | if (!m->progress_reporter.get()) { |
3002 | 293k | return; |
3003 | 293k | } |
3004 | | |
3005 | 0 | if (finished || (m->events_seen >= m->next_progress_report)) { |
3006 | 0 | int percentage = |
3007 | 0 | (finished ? 100 |
3008 | 0 | : m->next_progress_report == 0 |
3009 | 0 | ? 0 |
3010 | 0 | : std::min(99, 1 + ((100 * m->events_seen) / m->events_expected))); |
3011 | 0 | m->progress_reporter->reportProgress(percentage); |
3012 | 0 | } |
3013 | 0 | int increment = std::max(1, (m->events_expected / 100)); |
3014 | 0 | while (m->events_seen >= m->next_progress_report) { |
3015 | 0 | m->next_progress_report += increment; |
3016 | 0 | } |
3017 | 0 | } |
3018 | | |
3019 | | void |
3020 | | QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr) |
3021 | 0 | { |
3022 | 0 | m->progress_reporter = pr; |
3023 | 0 | } |
3024 | | |
3025 | | void |
3026 | | QPDFWriter::writeStandard() |
3027 | 0 | { |
3028 | 0 | auto pp_md5 = m->pipeline_stack.popper(); |
3029 | 0 | if (m->deterministic_id) { |
3030 | 0 | m->pipeline_stack.activate_md5(pp_md5); |
3031 | 0 | } |
3032 | | |
3033 | | // Start writing |
3034 | |
|
3035 | 0 | writeHeader(); |
3036 | 0 | write(m->extra_header_text); |
3037 | |
|
3038 | 0 | if (m->pclm) { |
3039 | 0 | enqueueObjectsPCLm(); |
3040 | 0 | } else { |
3041 | 0 | enqueueObjectsStandard(); |
3042 | 0 | } |
3043 | | |
3044 | | // Now start walking queue, outputting each object. |
3045 | 0 | while (m->object_queue_front < m->object_queue.size()) { |
3046 | 0 | QPDFObjectHandle cur_object = m->object_queue.at(m->object_queue_front); |
3047 | 0 | ++m->object_queue_front; |
3048 | 0 | writeObject(cur_object); |
3049 | 0 | } |
3050 | | |
3051 | | // Write out the encryption dictionary, if any |
3052 | 0 | if (m->encryption) { |
3053 | 0 | writeEncryptionDictionary(); |
3054 | 0 | } |
3055 | | |
3056 | | // Now write out xref. next_objid is now the number of objects. |
3057 | 0 | qpdf_offset_t xref_offset = m->pipeline->getCount(); |
3058 | 0 | if (m->object_stream_to_objects.empty()) { |
3059 | | // Write regular cross-reference table |
3060 | 0 | writeXRefTable(t_normal, 0, m->next_objid - 1, m->next_objid); |
3061 | 0 | } else { |
3062 | | // Write cross-reference stream. |
3063 | 0 | int xref_id = m->next_objid++; |
3064 | 0 | writeXRefStream( |
3065 | 0 | xref_id, xref_id, xref_offset, t_normal, 0, m->next_objid - 1, m->next_objid); |
3066 | 0 | } |
3067 | 0 | write("startxref\n").write(xref_offset).write("\n%%EOF\n"); |
3068 | |
|
3069 | 0 | if (m->deterministic_id) { |
3070 | 0 | QTC::TC( |
3071 | 0 | "qpdf", |
3072 | 0 | "QPDFWriter standard deterministic ID", |
3073 | 0 | m->object_stream_to_objects.empty() ? 0 : 1); |
3074 | 0 | } |
3075 | 0 | } |