/src/qpdf/libqpdf/QPDFWriter.cc
Line | Count | Source |
1 | | #include <qpdf/qpdf-config.h> // include early for large file support |
2 | | |
3 | | #include <qpdf/QPDFWriter_private.hh> |
4 | | |
5 | | #include <qpdf/MD5.hh> |
6 | | #include <qpdf/Pl_AES_PDF.hh> |
7 | | #include <qpdf/Pl_Flate.hh> |
8 | | #include <qpdf/Pl_MD5.hh> |
9 | | #include <qpdf/Pl_PNGFilter.hh> |
10 | | #include <qpdf/Pl_RC4.hh> |
11 | | #include <qpdf/Pl_StdioFile.hh> |
12 | | #include <qpdf/QIntC.hh> |
13 | | #include <qpdf/QPDFObjectHandle_private.hh> |
14 | | #include <qpdf/QPDFObject_private.hh> |
15 | | #include <qpdf/QPDF_private.hh> |
16 | | #include <qpdf/QTC.hh> |
17 | | #include <qpdf/QUtil.hh> |
18 | | #include <qpdf/RC4.hh> |
19 | | #include <qpdf/Util.hh> |
20 | | |
21 | | #include <algorithm> |
22 | | #include <concepts> |
23 | | #include <cstdlib> |
24 | | #include <stdexcept> |
25 | | #include <tuple> |
26 | | |
27 | | using namespace std::literals; |
28 | | using namespace qpdf; |
29 | | |
30 | | using Encryption = impl::Doc::Encryption; |
31 | | using Config = Writer::Config; |
32 | | |
33 | | QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default) |
34 | 0 | { |
35 | | // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
36 | 0 | } |
37 | | |
38 | | QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) : |
39 | 0 | handler(handler) |
40 | 0 | { |
41 | 0 | } |
42 | | |
43 | | QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT |
44 | | // (modernize-use-equals-default) |
45 | 0 | { |
46 | | // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
47 | 0 | } |
48 | | |
49 | | void |
50 | | QPDFWriter::FunctionProgressReporter::reportProgress(int progress) |
51 | 0 | { |
52 | 0 | handler(progress); |
53 | 0 | } |
54 | | |
55 | | namespace |
56 | | { |
57 | | class Pl_stack |
58 | | { |
59 | | // A pipeline Popper is normally returned by Pl_stack::activate, or, if necessary, a |
60 | | // reference to a Popper instance can be passed into activate. When the Popper goes out of |
61 | | // scope, the pipeline stack is popped. This causes finish to be called on the current |
62 | | // pipeline and the pipeline stack to be popped until the top of stack is a previous active |
63 | | // top of stack and restores the pipeline to that point. It deletes any pipelines that it |
64 | | // pops. |
65 | | class Popper |
66 | | { |
67 | | friend class Pl_stack; |
68 | | |
69 | | public: |
70 | | Popper() = default; |
71 | | Popper(Popper const&) = delete; |
72 | | Popper(Popper&& other) noexcept |
73 | 0 | { |
74 | 0 | // For MSVC, default pops the stack |
75 | 0 | if (this != &other) { |
76 | 0 | stack = other.stack; |
77 | 0 | stack_id = other.stack_id; |
78 | 0 | other.stack = nullptr; |
79 | 0 | other.stack_id = 0; |
80 | 0 | }; |
81 | 0 | } |
82 | | Popper& operator=(Popper const&) = delete; |
83 | | Popper& |
84 | | operator=(Popper&& other) noexcept |
85 | 0 | { |
86 | 0 | // For MSVC, default pops the stack |
87 | 0 | if (this != &other) { |
88 | 0 | stack = other.stack; |
89 | 0 | stack_id = other.stack_id; |
90 | 0 | other.stack = nullptr; |
91 | 0 | other.stack_id = 0; |
92 | 0 | }; |
93 | 0 | return *this; |
94 | 0 | } |
95 | | |
96 | | ~Popper(); |
97 | | |
98 | | // Manually pop pipeline from the pipeline stack. |
99 | | void pop(); |
100 | | |
101 | | private: |
102 | | Popper(Pl_stack& stack) : |
103 | 45.4k | stack(&stack) |
104 | 45.4k | { |
105 | 45.4k | } |
106 | | |
107 | | Pl_stack* stack{nullptr}; |
108 | | unsigned long stack_id{0}; |
109 | | }; |
110 | | |
111 | | public: |
112 | | Pl_stack(pl::Count*& top) : |
113 | 8.99k | top(top) |
114 | 8.99k | { |
115 | 8.99k | } |
116 | | |
117 | | Popper |
118 | | popper() |
119 | 8.87k | { |
120 | 8.87k | return {*this}; |
121 | 8.87k | } |
122 | | |
123 | | void |
124 | | initialize(Pipeline* p) |
125 | 8.99k | { |
126 | 8.99k | auto c = std::make_unique<pl::Count>(++last_id, p); |
127 | 8.99k | top = c.get(); |
128 | 8.99k | stack.emplace_back(std::move(c)); |
129 | 8.99k | } |
130 | | |
131 | | Popper |
132 | | activate(std::string& str) |
133 | 35.0k | { |
134 | 35.0k | Popper pp{*this}; |
135 | 35.0k | activate(pp, str); |
136 | 35.0k | return pp; |
137 | 35.0k | } |
138 | | |
139 | | void |
140 | | activate(Popper& pp, std::string& str) |
141 | 35.0k | { |
142 | 35.0k | activate(pp, false, &str, nullptr); |
143 | 35.0k | } |
144 | | |
145 | | void |
146 | | activate(Popper& pp, std::unique_ptr<Pipeline> next) |
147 | 0 | { |
148 | 0 | count_buffer.clear(); |
149 | 0 | activate(pp, false, &count_buffer, std::move(next)); |
150 | 0 | } |
151 | | |
152 | | Popper |
153 | | activate( |
154 | | bool discard = false, |
155 | | std::string* str = nullptr, |
156 | | std::unique_ptr<Pipeline> next = nullptr) |
157 | 1.57k | { |
158 | 1.57k | Popper pp{*this}; |
159 | 1.57k | activate(pp, discard, str, std::move(next)); |
160 | 1.57k | return pp; |
161 | 1.57k | } |
162 | | |
163 | | void |
164 | | activate( |
165 | | Popper& pp, |
166 | | bool discard = false, |
167 | | std::string* str = nullptr, |
168 | | std::unique_ptr<Pipeline> next = nullptr) |
169 | 36.5k | { |
170 | 36.5k | std::unique_ptr<pl::Count> c; |
171 | 36.5k | if (next) { |
172 | 0 | c = std::make_unique<pl::Count>(++last_id, count_buffer, std::move(next)); |
173 | 36.5k | } else if (discard) { |
174 | 1.57k | c = std::make_unique<pl::Count>(++last_id, nullptr); |
175 | 35.0k | } else if (!str) { |
176 | 0 | c = std::make_unique<pl::Count>(++last_id, top); |
177 | 35.0k | } else { |
178 | 35.0k | c = std::make_unique<pl::Count>(++last_id, *str); |
179 | 35.0k | } |
180 | 36.5k | pp.stack_id = last_id; |
181 | 36.5k | top = c.get(); |
182 | 36.5k | stack.emplace_back(std::move(c)); |
183 | 36.5k | } |
184 | | void |
185 | | activate_md5(Popper& pp) |
186 | 8.87k | { |
187 | 8.87k | qpdf_assert_debug(!md5_pipeline); |
188 | 8.87k | qpdf_assert_debug(md5_id == 0); |
189 | 8.87k | qpdf_assert_debug(top->getCount() == 0); |
190 | 8.87k | md5_pipeline = std::make_unique<Pl_MD5>("qpdf md5", top); |
191 | 8.87k | md5_pipeline->persistAcrossFinish(true); |
192 | | // Special case code in pop clears m->md5_pipeline upon deletion. |
193 | 8.87k | auto c = std::make_unique<pl::Count>(++last_id, md5_pipeline.get()); |
194 | 8.87k | pp.stack_id = last_id; |
195 | 8.87k | md5_id = last_id; |
196 | 8.87k | top = c.get(); |
197 | 8.87k | stack.emplace_back(std::move(c)); |
198 | 8.87k | } |
199 | | |
200 | | // Return the hex digest and disable the MD5 pipeline. |
201 | | std::string |
202 | | hex_digest() |
203 | 8.71k | { |
204 | 8.71k | qpdf_assert_debug(md5_pipeline); |
205 | 8.71k | auto digest = md5_pipeline->getHexDigest(); |
206 | 8.71k | md5_pipeline->enable(false); |
207 | 8.71k | return digest; |
208 | 8.71k | } |
209 | | |
210 | | void |
211 | | clear_buffer() |
212 | 0 | { |
213 | 0 | count_buffer.clear(); |
214 | 0 | } |
215 | | |
216 | | private: |
217 | | void |
218 | | pop(unsigned long stack_id) |
219 | 45.4k | { |
220 | 45.4k | if (!stack_id) { |
221 | 0 | return; |
222 | 0 | } |
223 | 45.4k | qpdf_assert_debug(stack.size() >= 2); |
224 | 45.4k | top->finish(); |
225 | 45.4k | qpdf_assert_debug(stack.back().get() == top); |
226 | | // It used to be possible for this assertion to fail if writeLinearized exits by |
227 | | // exception when deterministic ID. There are no longer any cases in which two |
228 | | // dynamically allocated pipeline Popper objects ever exist at the same time, so the |
229 | | // assertion will fail if they get popped out of order from automatic destruction. |
230 | 45.4k | qpdf_assert_debug(top->id() == stack_id); |
231 | 45.4k | if (stack_id == md5_id) { |
232 | 8.87k | md5_pipeline = nullptr; |
233 | 8.87k | md5_id = 0; |
234 | 8.87k | } |
235 | 45.4k | stack.pop_back(); |
236 | 45.4k | top = stack.back().get(); |
237 | 45.4k | } |
238 | | |
239 | | std::vector<std::unique_ptr<pl::Count>> stack; |
240 | | pl::Count*& top; |
241 | | std::unique_ptr<Pl_MD5> md5_pipeline{nullptr}; |
242 | | unsigned long last_id{0}; |
243 | | unsigned long md5_id{0}; |
244 | | std::string count_buffer; |
245 | | }; |
246 | | } // namespace |
247 | | |
248 | | Pl_stack::Popper::~Popper() |
249 | 45.4k | { |
250 | 45.4k | if (stack) { |
251 | 45.4k | stack->pop(stack_id); |
252 | 45.4k | } |
253 | 45.4k | } |
254 | | |
255 | | void |
256 | | Pl_stack::Popper::pop() |
257 | 0 | { |
258 | 0 | if (stack) { |
259 | 0 | stack->pop(stack_id); |
260 | 0 | } |
261 | 0 | stack_id = 0; |
262 | 0 | stack = nullptr; |
263 | 0 | } |
264 | | |
265 | | namespace qpdf::impl |
266 | | { |
267 | | // Writer class is restricted to QPDFWriter so that only it can call certain methods. |
268 | | class Writer: protected Doc::Common |
269 | | { |
270 | | public: |
271 | | // flags used by unparseObject |
272 | | static int const f_stream = 1 << 0; |
273 | | static int const f_filtered = 1 << 1; |
274 | | static int const f_in_ostream = 1 << 2; |
275 | | static int const f_hex_string = 1 << 3; |
276 | | static int const f_no_encryption = 1 << 4; |
277 | | |
278 | | enum trailer_e { t_normal, t_lin_first, t_lin_second }; |
279 | | |
280 | | Writer() = delete; |
281 | | Writer(Writer const&) = delete; |
282 | | Writer(Writer&&) = delete; |
283 | | Writer& operator=(Writer const&) = delete; |
284 | | Writer& operator=(Writer&&) = delete; |
285 | | ~Writer() |
286 | 8.99k | { |
287 | 8.99k | if (file && close_file) { |
288 | 0 | fclose(file); |
289 | 0 | } |
290 | 8.99k | delete output_buffer; |
291 | 8.99k | } |
292 | | Writer(QPDF& qpdf, QPDFWriter& w) : |
293 | 9.20k | Common(qpdf.doc()), |
294 | 9.20k | lin(qpdf.doc().linearization()), |
295 | 9.20k | cfg(true), |
296 | 9.20k | root_og(qpdf.getRoot().indirect() ? qpdf.getRoot().id_gen() : QPDFObjGen(-1, 0)), |
297 | 9.20k | pipeline_stack(pipeline) |
298 | 9.20k | { |
299 | 9.20k | } |
300 | | |
301 | | void write(); |
302 | | std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable(); |
303 | | void setMinimumPDFVersion(std::string const& version, int extension_level = 0); |
304 | | void copyEncryptionParameters(QPDF&); |
305 | | void doWriteSetup(); |
306 | | void prepareFileForWrite(); |
307 | | |
308 | | void disableIncompatibleEncryption(int major, int minor, int extension_level); |
309 | | void interpretR3EncryptionParameters( |
310 | | bool allow_accessibility, |
311 | | bool allow_extract, |
312 | | bool allow_assemble, |
313 | | bool allow_annotate_and_form, |
314 | | bool allow_form_filling, |
315 | | bool allow_modify_other, |
316 | | qpdf_r3_print_e print, |
317 | | qpdf_r3_modify_e modify); |
318 | | void setEncryptionParameters(char const* user_password, char const* owner_password); |
319 | | void setEncryptionMinimumVersion(); |
320 | | void parseVersion(std::string const& version, int& major, int& minor) const; |
321 | | int compareVersions(int major1, int minor1, int major2, int minor2) const; |
322 | | void generateID(bool encrypted); |
323 | | std::string getOriginalID1(); |
324 | | void initializeTables(size_t extra = 0); |
325 | | void preserveObjectStreams(); |
326 | | void generateObjectStreams(); |
327 | | void initializeSpecialStreams(); |
328 | | void enqueue(QPDFObjectHandle const& object); |
329 | | void enqueueObjectsStandard(); |
330 | | void enqueueObjectsPCLm(); |
331 | | void enqueuePart(std::vector<QPDFObjectHandle>& part); |
332 | | void assignCompressedObjectNumbers(QPDFObjGen og); |
333 | | Dictionary trimmed_trailer(); |
334 | | |
335 | | // Returns tuple<filter, compress_stream, is_root_metadata> |
336 | | std::tuple<const bool, const bool, const bool> |
337 | | will_filter_stream(QPDFObjectHandle stream, std::string* stream_data); |
338 | | |
339 | | // Test whether stream would be filtered if it were written. |
340 | | bool will_filter_stream(QPDFObjectHandle stream); |
341 | | unsigned int bytesNeeded(long long n); |
342 | | void writeBinary(unsigned long long val, unsigned int bytes); |
343 | | Writer& write(std::string_view str); |
344 | | Writer& write(size_t count, char c); |
345 | | Writer& write(std::integral auto val); |
346 | | Writer& write_name(std::string const& str); |
347 | | Writer& write_string(std::string const& str, bool force_binary = false); |
348 | | Writer& write_encrypted(std::string_view str); |
349 | | |
350 | | template <typename... Args> |
351 | | Writer& write_qdf(Args&&... args); |
352 | | template <typename... Args> |
353 | | Writer& write_no_qdf(Args&&... args); |
354 | | void writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj); |
355 | | void writeObjectStream(QPDFObjectHandle object); |
356 | | void writeObject(QPDFObjectHandle object, int object_stream_index = -1); |
357 | | void writeTrailer( |
358 | | trailer_e which, |
359 | | int size, |
360 | | bool xref_stream, |
361 | | qpdf_offset_t prev, |
362 | | int linearization_pass); |
363 | | void unparseObject( |
364 | | QPDFObjectHandle object, |
365 | | size_t level, |
366 | | int flags, |
367 | | // for stream dictionaries |
368 | | size_t stream_length = 0, |
369 | | bool compress = false); |
370 | | void unparseChild(QPDFObjectHandle const& child, size_t level, int flags); |
371 | | int openObject(int objid = 0); |
372 | | void closeObject(int objid); |
373 | | void writeStandard(); |
374 | | void writeLinearized(); |
375 | | void writeEncryptionDictionary(); |
376 | | void writeHeader(); |
377 | | void writeHintStream(int hint_id); |
378 | | qpdf_offset_t writeXRefTable(trailer_e which, int first, int last, int size); |
379 | | qpdf_offset_t writeXRefTable( |
380 | | trailer_e which, |
381 | | int first, |
382 | | int last, |
383 | | int size, |
384 | | // for linearization |
385 | | qpdf_offset_t prev, |
386 | | bool suppress_offsets, |
387 | | int hint_id, |
388 | | qpdf_offset_t hint_offset, |
389 | | qpdf_offset_t hint_length, |
390 | | int linearization_pass); |
391 | | qpdf_offset_t writeXRefStream( |
392 | | int objid, |
393 | | int max_id, |
394 | | qpdf_offset_t max_offset, |
395 | | trailer_e which, |
396 | | int first, |
397 | | int last, |
398 | | int size); |
399 | | qpdf_offset_t writeXRefStream( |
400 | | int objid, |
401 | | int max_id, |
402 | | qpdf_offset_t max_offset, |
403 | | trailer_e which, |
404 | | int first, |
405 | | int last, |
406 | | int size, |
407 | | // for linearization |
408 | | qpdf_offset_t prev, |
409 | | int hint_id, |
410 | | qpdf_offset_t hint_offset, |
411 | | qpdf_offset_t hint_length, |
412 | | bool skip_compression, |
413 | | int linearization_pass); |
414 | | |
415 | | void setDataKey(int objid); |
416 | | void indicateProgress(bool decrement, bool finished); |
417 | | size_t calculateXrefStreamPadding(qpdf_offset_t xref_bytes); |
418 | | |
419 | | void adjustAESStreamLength(size_t& length); |
420 | | void computeDeterministicIDData(); |
421 | | |
422 | | protected: |
423 | | Doc::Linearization& lin; |
424 | | |
425 | | qpdf::Writer::Config cfg; |
426 | | |
427 | | QPDFObjGen root_og{-1, 0}; |
428 | | char const* filename{"unspecified"}; |
429 | | FILE* file{nullptr}; |
430 | | bool close_file{false}; |
431 | | std::unique_ptr<Pl_Buffer> buffer_pipeline{nullptr}; |
432 | | Buffer* output_buffer{nullptr}; |
433 | | |
434 | | std::unique_ptr<QPDF::Doc::Encryption> encryption; |
435 | | std::string encryption_key; |
436 | | |
437 | | std::string id1; // for /ID key of |
438 | | std::string id2; // trailer dictionary |
439 | | std::string final_pdf_version; |
440 | | int final_extension_level{0}; |
441 | | std::string min_pdf_version; |
442 | | int min_extension_level{0}; |
443 | | int encryption_dict_objid{0}; |
444 | | std::string cur_data_key; |
445 | | std::unique_ptr<Pipeline> file_pl; |
446 | | qpdf::pl::Count* pipeline{nullptr}; |
447 | | std::vector<QPDFObjectHandle> object_queue; |
448 | | size_t object_queue_front{0}; |
449 | | QPDFWriter::ObjTable obj; |
450 | | QPDFWriter::NewObjTable new_obj; |
451 | | int next_objid{1}; |
452 | | int cur_stream_length_id{0}; |
453 | | size_t cur_stream_length{0}; |
454 | | bool added_newline{false}; |
455 | | size_t max_ostream_index{0}; |
456 | | std::set<QPDFObjGen> normalized_streams; |
457 | | std::map<QPDFObjGen, int> page_object_to_seq; |
458 | | std::map<QPDFObjGen, int> contents_to_page_seq; |
459 | | std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects; |
460 | | Pl_stack pipeline_stack; |
461 | | std::string deterministic_id_data; |
462 | | bool did_write_setup{false}; |
463 | | |
464 | | // For progress reporting |
465 | | std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter; |
466 | | int events_expected{0}; |
467 | | int events_seen{0}; |
468 | | int next_progress_report{0}; |
469 | | }; // class qpdf::impl::Writer |
470 | | |
471 | | } // namespace qpdf::impl |
472 | | |
473 | | class QPDFWriter::Members: impl::Writer |
474 | | { |
475 | | friend class QPDFWriter; |
476 | | friend class qpdf::Writer; |
477 | | |
478 | | public: |
479 | | Members(QPDFWriter& w, QPDF& qpdf) : |
480 | 9.20k | impl::Writer(qpdf, w) |
481 | 9.20k | { |
482 | 9.20k | } |
483 | | }; |
484 | | |
485 | | qpdf::Writer::Writer(QPDF& qpdf, Config cfg) : |
486 | 0 | QPDFWriter(qpdf) |
487 | 0 | { |
488 | 0 | m->cfg = cfg; |
489 | 0 | } |
490 | | QPDFWriter::QPDFWriter(QPDF& pdf) : |
491 | 9.20k | m(std::make_shared<Members>(*this, pdf)) |
492 | 9.20k | { |
493 | 9.20k | } |
494 | | |
495 | | QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) : |
496 | 0 | m(std::make_shared<Members>(*this, pdf)) |
497 | 0 | { |
498 | 0 | setOutputFilename(filename); |
499 | 0 | } |
500 | | |
501 | | QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) : |
502 | 0 | m(std::make_shared<Members>(*this, pdf)) |
503 | 0 | { |
504 | 0 | setOutputFile(description, file, close_file); |
505 | 0 | } |
506 | | |
507 | | void |
508 | | QPDFWriter::setOutputFilename(char const* filename) |
509 | 0 | { |
510 | 0 | char const* description = filename; |
511 | 0 | FILE* f = nullptr; |
512 | 0 | bool close_file = false; |
513 | 0 | if (filename == nullptr) { |
514 | 0 | description = "standard output"; |
515 | 0 | f = stdout; |
516 | 0 | QUtil::binary_stdout(); |
517 | 0 | } else { |
518 | 0 | f = QUtil::safe_fopen(filename, "wb+"); |
519 | 0 | close_file = true; |
520 | 0 | } |
521 | 0 | setOutputFile(description, f, close_file); |
522 | 0 | } |
523 | | |
524 | | void |
525 | | QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file) |
526 | 0 | { |
527 | 0 | m->filename = description; |
528 | 0 | m->file = file; |
529 | 0 | m->close_file = close_file; |
530 | 0 | m->file_pl = std::make_unique<Pl_StdioFile>("qpdf output", file); |
531 | 0 | m->pipeline_stack.initialize(m->file_pl.get()); |
532 | 0 | } |
533 | | |
534 | | void |
535 | | QPDFWriter::setOutputMemory() |
536 | 0 | { |
537 | 0 | m->filename = "memory buffer"; |
538 | 0 | m->buffer_pipeline = std::make_unique<Pl_Buffer>("qpdf output"); |
539 | 0 | m->pipeline_stack.initialize(m->buffer_pipeline.get()); |
540 | 0 | } |
541 | | |
542 | | Buffer* |
543 | | QPDFWriter::getBuffer() |
544 | 0 | { |
545 | 0 | Buffer* result = m->output_buffer; |
546 | 0 | m->output_buffer = nullptr; |
547 | 0 | return result; |
548 | 0 | } |
549 | | |
550 | | std::shared_ptr<Buffer> |
551 | | QPDFWriter::getBufferSharedPointer() |
552 | 0 | { |
553 | 0 | return std::shared_ptr<Buffer>(getBuffer()); |
554 | 0 | } |
555 | | |
556 | | void |
557 | | QPDFWriter::setOutputPipeline(Pipeline* p) |
558 | 8.99k | { |
559 | 8.99k | m->filename = "custom pipeline"; |
560 | 8.99k | m->pipeline_stack.initialize(p); |
561 | 8.99k | } |
562 | | |
563 | | void |
564 | | QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode) |
565 | 0 | { |
566 | 0 | m->cfg.object_streams(mode); |
567 | 0 | } |
568 | | |
569 | | void |
570 | | QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode) |
571 | 0 | { |
572 | 0 | m->cfg.stream_data(mode); |
573 | 0 | } |
574 | | |
575 | | Config& |
576 | | Config::stream_data(qpdf_stream_data_e mode) |
577 | 0 | { |
578 | 0 | switch (mode) { |
579 | 0 | case qpdf_s_uncompress: |
580 | 0 | decode_level(std::max(qpdf_dl_generalized, decode_level_)); |
581 | 0 | compress_streams(false); |
582 | 0 | return *this; |
583 | | |
584 | 0 | case qpdf_s_preserve: |
585 | 0 | decode_level(qpdf_dl_none); |
586 | 0 | compress_streams(false); |
587 | 0 | return *this; |
588 | | |
589 | 0 | case qpdf_s_compress: |
590 | 0 | decode_level(std::max(qpdf_dl_generalized, decode_level_)); |
591 | 0 | compress_streams(true); |
592 | 0 | } |
593 | 0 | return *this; |
594 | 0 | } |
595 | | |
596 | | void |
597 | | QPDFWriter::setCompressStreams(bool val) |
598 | 0 | { |
599 | 0 | m->cfg.compress_streams(val); |
600 | 0 | } |
601 | | |
602 | | Config& |
603 | | Config::compress_streams(bool val) |
604 | 8.99k | { |
605 | 8.99k | if (pclm_) { |
606 | 0 | usage("compress_streams cannot be set when pclm is set"); |
607 | 0 | return *this; |
608 | 0 | } |
609 | 8.99k | compress_streams_set_ = true; |
610 | 8.99k | compress_streams_ = val; |
611 | 8.99k | return *this; |
612 | 8.99k | } |
613 | | |
614 | | void |
615 | | QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val) |
616 | 8.99k | { |
617 | 8.99k | m->cfg.decode_level(val); |
618 | 8.99k | } |
619 | | |
620 | | Config& |
621 | | Config::decode_level(qpdf_stream_decode_level_e val) |
622 | 8.99k | { |
623 | 8.99k | if (pclm_) { |
624 | 0 | usage("stream_decode_level cannot be set when pclm is set"); |
625 | 0 | return *this; |
626 | 0 | } |
627 | 8.99k | decode_level_set_ = true; |
628 | 8.99k | decode_level_ = val; |
629 | 8.99k | return *this; |
630 | 8.99k | } |
631 | | |
632 | | void |
633 | | QPDFWriter::setRecompressFlate(bool val) |
634 | 0 | { |
635 | 0 | m->cfg.recompress_flate(val); |
636 | 0 | } |
637 | | |
638 | | void |
639 | | QPDFWriter::setContentNormalization(bool val) |
640 | 0 | { |
641 | 0 | m->cfg.normalize_content(val); |
642 | 0 | } |
643 | | |
644 | | void |
645 | | QPDFWriter::setQDFMode(bool val) |
646 | 8.99k | { |
647 | 8.99k | m->cfg.qdf(val); |
648 | 8.99k | } |
649 | | |
650 | | Config& |
651 | | Config::qdf(bool val) |
652 | 8.99k | { |
653 | 8.99k | if (pclm_ || linearize_) { |
654 | 0 | usage("qdf cannot be set when linearize or pclm are set"); |
655 | 0 | } |
656 | 8.99k | if (preserve_encryption_) { |
657 | 8.99k | usage("preserve_encryption cannot be set when qdf is set"); |
658 | 8.99k | } |
659 | 8.99k | qdf_ = val; |
660 | 8.99k | if (val) { |
661 | 8.99k | if (!normalize_content_set_) { |
662 | 8.99k | normalize_content(true); |
663 | 8.99k | } |
664 | 8.99k | if (!compress_streams_set_) { |
665 | 8.99k | compress_streams(false); |
666 | 8.99k | } |
667 | 8.99k | if (!decode_level_set_) { |
668 | 0 | decode_level(qpdf_dl_generalized); |
669 | 0 | } |
670 | 8.99k | preserve_encryption_ = false; |
671 | | // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing |
672 | | // recomputed stream length data. Certain streams such as object streams, xref streams, and |
673 | | // hint streams always get direct stream lengths. |
674 | 8.99k | direct_stream_lengths_ = false; |
675 | 8.99k | } |
676 | 8.99k | return *this; |
677 | 8.99k | } |
678 | | |
679 | | void |
680 | | QPDFWriter::setPreserveUnreferencedObjects(bool val) |
681 | 0 | { |
682 | 0 | m->cfg.preserve_unreferenced(val); |
683 | 0 | } |
684 | | |
685 | | void |
686 | | QPDFWriter::setNewlineBeforeEndstream(bool val) |
687 | 0 | { |
688 | 0 | m->cfg.newline_before_endstream(val); |
689 | 0 | } |
690 | | |
691 | | void |
692 | | QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level) |
693 | 0 | { |
694 | 0 | m->setMinimumPDFVersion(version, extension_level); |
695 | 0 | } |
696 | | |
697 | | void |
698 | | impl::Writer::setMinimumPDFVersion(std::string const& version, int extension_level) |
699 | 9.27k | { |
700 | 9.27k | bool set_version = false; |
701 | 9.27k | bool set_extension_level = false; |
702 | 9.27k | if (min_pdf_version.empty()) { |
703 | 8.93k | set_version = true; |
704 | 8.93k | set_extension_level = true; |
705 | 8.93k | } else { |
706 | 348 | int old_major = 0; |
707 | 348 | int old_minor = 0; |
708 | 348 | int min_major = 0; |
709 | 348 | int min_minor = 0; |
710 | 348 | parseVersion(version, old_major, old_minor); |
711 | 348 | parseVersion(min_pdf_version, min_major, min_minor); |
712 | 348 | int compare = compareVersions(old_major, old_minor, min_major, min_minor); |
713 | 348 | if (compare > 0) { |
714 | 122 | QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1); |
715 | 122 | set_version = true; |
716 | 122 | set_extension_level = true; |
717 | 226 | } else if (compare == 0) { |
718 | 3 | if (extension_level > min_extension_level) { |
719 | 1 | set_extension_level = true; |
720 | 1 | } |
721 | 3 | } |
722 | 348 | } |
723 | | |
724 | 9.27k | if (set_version) { |
725 | 9.05k | min_pdf_version = version; |
726 | 9.05k | } |
727 | 9.27k | if (set_extension_level) { |
728 | 9.05k | min_extension_level = extension_level; |
729 | 9.05k | } |
730 | 9.27k | } |
731 | | |
732 | | void |
733 | | QPDFWriter::setMinimumPDFVersion(PDFVersion const& v) |
734 | 0 | { |
735 | 0 | std::string version; |
736 | 0 | int extension_level; |
737 | 0 | v.getVersion(version, extension_level); |
738 | 0 | setMinimumPDFVersion(version, extension_level); |
739 | 0 | } |
740 | | |
741 | | void |
742 | | QPDFWriter::forcePDFVersion(std::string const& version, int extension_level) |
743 | 0 | { |
744 | 0 | m->cfg.forced_pdf_version(version, extension_level); |
745 | 0 | } |
746 | | |
747 | | void |
748 | | QPDFWriter::setExtraHeaderText(std::string const& text) |
749 | 0 | { |
750 | 0 | m->cfg.extra_header_text(text); |
751 | 0 | } |
752 | | |
753 | | Config& |
754 | | Config::extra_header_text(std::string const& val) |
755 | 0 | { |
756 | 0 | extra_header_text_ = val; |
757 | 0 | if (!extra_header_text_.empty() && extra_header_text_.back() != '\n') { |
758 | 0 | extra_header_text_ += "\n"; |
759 | 0 | } else { |
760 | 0 | QTC::TC("qpdf", "QPDFWriter extra header text no newline"); |
761 | 0 | } |
762 | 0 | return *this; |
763 | 0 | } |
764 | | |
765 | | void |
766 | | QPDFWriter::setStaticID(bool val) |
767 | 0 | { |
768 | 0 | m->cfg.static_id(val); |
769 | 0 | } |
770 | | |
771 | | void |
772 | | QPDFWriter::setDeterministicID(bool val) |
773 | 8.99k | { |
774 | 8.99k | m->cfg.deterministic_id(val); |
775 | 8.99k | } |
776 | | |
777 | | void |
778 | | QPDFWriter::setStaticAesIV(bool val) |
779 | 0 | { |
780 | 0 | if (val) { |
781 | 0 | Pl_AES_PDF::useStaticIV(); |
782 | 0 | } |
783 | 0 | } |
784 | | |
785 | | void |
786 | | QPDFWriter::setSuppressOriginalObjectIDs(bool val) |
787 | 0 | { |
788 | 0 | m->cfg.no_original_object_ids(val); |
789 | 0 | } |
790 | | |
791 | | void |
792 | | QPDFWriter::setPreserveEncryption(bool val) |
793 | 0 | { |
794 | 0 | m->cfg.preserve_encryption(val); |
795 | 0 | } |
796 | | |
797 | | void |
798 | | QPDFWriter::setLinearization(bool val) |
799 | 0 | { |
800 | 0 | m->cfg.linearize(val); |
801 | 0 | } |
802 | | |
803 | | Config& |
804 | | Config::linearize(bool val) |
805 | 0 | { |
806 | 0 | if (pclm_ || qdf_) { |
807 | 0 | usage("linearize cannot be set when qdf or pclm are set"); |
808 | 0 | return *this; |
809 | 0 | } |
810 | 0 | linearize_ = val; |
811 | 0 | return *this; |
812 | 0 | } |
813 | | |
814 | | void |
815 | | QPDFWriter::setLinearizationPass1Filename(std::string const& filename) |
816 | 0 | { |
817 | 0 | m->cfg.linearize_pass1(filename); |
818 | 0 | } |
819 | | |
820 | | void |
821 | | QPDFWriter::setPCLm(bool val) |
822 | 0 | { |
823 | 0 | m->cfg.pclm(val); |
824 | 0 | } |
825 | | |
826 | | Config& |
827 | | Config::pclm(bool val) |
828 | 0 | { |
829 | 0 | if (decode_level_set_ || compress_streams_set_ || linearize_) { |
830 | 0 | usage( |
831 | 0 | "pclm cannot be set when stream_decode_level, compress_streams, linearize or qdf are " |
832 | 0 | "set"); |
833 | 0 | return *this; |
834 | 0 | } |
835 | 0 | pclm_ = val; |
836 | 0 | if (val) { |
837 | 0 | decode_level_ = qpdf_dl_none; |
838 | 0 | compress_streams_ = false; |
839 | 0 | linearize_ = false; |
840 | 0 | } |
841 | |
|
842 | 0 | return *this; |
843 | 0 | } |
844 | | |
845 | | void |
846 | | QPDFWriter::setR2EncryptionParametersInsecure( |
847 | | char const* user_password, |
848 | | char const* owner_password, |
849 | | bool allow_print, |
850 | | bool allow_modify, |
851 | | bool allow_extract, |
852 | | bool allow_annotate) |
853 | 0 | { |
854 | 0 | m->encryption = std::make_unique<Encryption>(1, 2, 5, true); |
855 | 0 | if (!allow_print) { |
856 | 0 | m->encryption->setP(3, false); |
857 | 0 | } |
858 | 0 | if (!allow_modify) { |
859 | 0 | m->encryption->setP(4, false); |
860 | 0 | } |
861 | 0 | if (!allow_extract) { |
862 | 0 | m->encryption->setP(5, false); |
863 | 0 | } |
864 | 0 | if (!allow_annotate) { |
865 | 0 | m->encryption->setP(6, false); |
866 | 0 | } |
867 | 0 | m->setEncryptionParameters(user_password, owner_password); |
868 | 0 | } |
869 | | |
870 | | void |
871 | | QPDFWriter::setR3EncryptionParametersInsecure( |
872 | | char const* user_password, |
873 | | char const* owner_password, |
874 | | bool allow_accessibility, |
875 | | bool allow_extract, |
876 | | bool allow_assemble, |
877 | | bool allow_annotate_and_form, |
878 | | bool allow_form_filling, |
879 | | bool allow_modify_other, |
880 | | qpdf_r3_print_e print) |
881 | 0 | { |
882 | 0 | m->encryption = std::make_unique<Encryption>(2, 3, 16, true); |
883 | 0 | m->interpretR3EncryptionParameters( |
884 | 0 | allow_accessibility, |
885 | 0 | allow_extract, |
886 | 0 | allow_assemble, |
887 | 0 | allow_annotate_and_form, |
888 | 0 | allow_form_filling, |
889 | 0 | allow_modify_other, |
890 | 0 | print, |
891 | 0 | qpdf_r3m_all); |
892 | 0 | m->setEncryptionParameters(user_password, owner_password); |
893 | 0 | } |
894 | | |
895 | | void |
896 | | QPDFWriter::setR4EncryptionParametersInsecure( |
897 | | char const* user_password, |
898 | | char const* owner_password, |
899 | | bool allow_accessibility, |
900 | | bool allow_extract, |
901 | | bool allow_assemble, |
902 | | bool allow_annotate_and_form, |
903 | | bool allow_form_filling, |
904 | | bool allow_modify_other, |
905 | | qpdf_r3_print_e print, |
906 | | bool encrypt_metadata, |
907 | | bool use_aes) |
908 | 0 | { |
909 | 0 | m->encryption = std::make_unique<Encryption>(4, 4, 16, encrypt_metadata); |
910 | 0 | m->cfg.encrypt_use_aes(use_aes); |
911 | 0 | m->interpretR3EncryptionParameters( |
912 | 0 | allow_accessibility, |
913 | 0 | allow_extract, |
914 | 0 | allow_assemble, |
915 | 0 | allow_annotate_and_form, |
916 | 0 | allow_form_filling, |
917 | 0 | allow_modify_other, |
918 | 0 | print, |
919 | 0 | qpdf_r3m_all); |
920 | 0 | m->setEncryptionParameters(user_password, owner_password); |
921 | 0 | } |
922 | | |
923 | | void |
924 | | QPDFWriter::setR5EncryptionParameters( |
925 | | char const* user_password, |
926 | | char const* owner_password, |
927 | | bool allow_accessibility, |
928 | | bool allow_extract, |
929 | | bool allow_assemble, |
930 | | bool allow_annotate_and_form, |
931 | | bool allow_form_filling, |
932 | | bool allow_modify_other, |
933 | | qpdf_r3_print_e print, |
934 | | bool encrypt_metadata) |
935 | 0 | { |
936 | 0 | m->encryption = std::make_unique<Encryption>(5, 5, 32, encrypt_metadata); |
937 | 0 | m->cfg.encrypt_use_aes(true); |
938 | 0 | m->interpretR3EncryptionParameters( |
939 | 0 | allow_accessibility, |
940 | 0 | allow_extract, |
941 | 0 | allow_assemble, |
942 | 0 | allow_annotate_and_form, |
943 | 0 | allow_form_filling, |
944 | 0 | allow_modify_other, |
945 | 0 | print, |
946 | 0 | qpdf_r3m_all); |
947 | 0 | m->setEncryptionParameters(user_password, owner_password); |
948 | 0 | } |
949 | | |
950 | | void |
951 | | QPDFWriter::setR6EncryptionParameters( |
952 | | char const* user_password, |
953 | | char const* owner_password, |
954 | | bool allow_accessibility, |
955 | | bool allow_extract, |
956 | | bool allow_assemble, |
957 | | bool allow_annotate_and_form, |
958 | | bool allow_form_filling, |
959 | | bool allow_modify_other, |
960 | | qpdf_r3_print_e print, |
961 | | bool encrypt_metadata) |
962 | 0 | { |
963 | 0 | m->encryption = std::make_unique<Encryption>(5, 6, 32, encrypt_metadata); |
964 | 0 | m->interpretR3EncryptionParameters( |
965 | 0 | allow_accessibility, |
966 | 0 | allow_extract, |
967 | 0 | allow_assemble, |
968 | 0 | allow_annotate_and_form, |
969 | 0 | allow_form_filling, |
970 | 0 | allow_modify_other, |
971 | 0 | print, |
972 | 0 | qpdf_r3m_all); |
973 | 0 | m->cfg.encrypt_use_aes(true); |
974 | 0 | m->setEncryptionParameters(user_password, owner_password); |
975 | 0 | } |
976 | | |
977 | | void |
978 | | impl::Writer::interpretR3EncryptionParameters( |
979 | | bool allow_accessibility, |
980 | | bool allow_extract, |
981 | | bool allow_assemble, |
982 | | bool allow_annotate_and_form, |
983 | | bool allow_form_filling, |
984 | | bool allow_modify_other, |
985 | | qpdf_r3_print_e print, |
986 | | qpdf_r3_modify_e modify) |
987 | 0 | { |
988 | | // Acrobat 5 security options: |
989 | | |
990 | | // Checkboxes: |
991 | | // Enable Content Access for the Visually Impaired |
992 | | // Allow Content Copying and Extraction |
993 | | |
994 | | // Allowed changes menu: |
995 | | // None |
996 | | // Only Document Assembly |
997 | | // Only Form Field Fill-in or Signing |
998 | | // Comment Authoring, Form Field Fill-in or Signing |
999 | | // General Editing, Comment and Form Field Authoring |
1000 | | |
1001 | | // Allowed printing menu: |
1002 | | // None |
1003 | | // Low Resolution |
1004 | | // Full printing |
1005 | | |
1006 | | // Meanings of bits in P when R >= 3 |
1007 | | // |
1008 | | // 3: low-resolution printing |
1009 | | // 4: document modification except as controlled by 6, 9, and 11 |
1010 | | // 5: extraction |
1011 | | // 6: add/modify annotations (comment), fill in forms |
1012 | | // if 4+6 are set, also allows modification of form fields |
1013 | | // 9: fill in forms even if 6 is clear |
1014 | | // 10: accessibility; ignored by readers, should always be set |
1015 | | // 11: document assembly even if 4 is clear |
1016 | | // 12: high-resolution printing |
1017 | 0 | if (!allow_accessibility && encryption->getR() <= 3) { |
1018 | | // Bit 10 is deprecated and should always be set. This used to mean accessibility. There |
1019 | | // is no way to disable accessibility with R > 3. |
1020 | 0 | encryption->setP(10, false); |
1021 | 0 | } |
1022 | 0 | if (!allow_extract) { |
1023 | 0 | encryption->setP(5, false); |
1024 | 0 | } |
1025 | |
|
1026 | 0 | switch (print) { |
1027 | 0 | case qpdf_r3p_none: |
1028 | 0 | encryption->setP(3, false); // any printing |
1029 | 0 | [[fallthrough]]; |
1030 | 0 | case qpdf_r3p_low: |
1031 | 0 | encryption->setP(12, false); // high resolution printing |
1032 | 0 | [[fallthrough]]; |
1033 | 0 | case qpdf_r3p_full: |
1034 | 0 | break; |
1035 | | // no default so gcc warns for missing cases |
1036 | 0 | } |
1037 | | |
1038 | | // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full |
1039 | | // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're |
1040 | | // stuck with it. See also allow checks below to control the bits individually. |
1041 | | |
1042 | | // NOT EXERCISED IN TEST SUITE |
1043 | 0 | switch (modify) { |
1044 | 0 | case qpdf_r3m_none: |
1045 | 0 | encryption->setP(11, false); // document assembly |
1046 | 0 | [[fallthrough]]; |
1047 | 0 | case qpdf_r3m_assembly: |
1048 | 0 | encryption->setP(9, false); // filling in form fields |
1049 | 0 | [[fallthrough]]; |
1050 | 0 | case qpdf_r3m_form: |
1051 | 0 | encryption->setP(6, false); // modify annotations, fill in form fields |
1052 | 0 | [[fallthrough]]; |
1053 | 0 | case qpdf_r3m_annotate: |
1054 | 0 | encryption->setP(4, false); // other modifications |
1055 | 0 | [[fallthrough]]; |
1056 | 0 | case qpdf_r3m_all: |
1057 | 0 | break; |
1058 | | // no default so gcc warns for missing cases |
1059 | 0 | } |
1060 | | // END NOT EXERCISED IN TEST SUITE |
1061 | | |
1062 | 0 | if (!allow_assemble) { |
1063 | 0 | encryption->setP(11, false); |
1064 | 0 | } |
1065 | 0 | if (!allow_annotate_and_form) { |
1066 | 0 | encryption->setP(6, false); |
1067 | 0 | } |
1068 | 0 | if (!allow_form_filling) { |
1069 | 0 | encryption->setP(9, false); |
1070 | 0 | } |
1071 | 0 | if (!allow_modify_other) { |
1072 | 0 | encryption->setP(4, false); |
1073 | 0 | } |
1074 | 0 | } |
1075 | | |
1076 | | void |
1077 | | impl::Writer::setEncryptionParameters(char const* user_password, char const* owner_password) |
1078 | 0 | { |
1079 | 0 | generateID(true); |
1080 | 0 | encryption->setId1(id1); |
1081 | 0 | encryption_key = encryption->compute_parameters(user_password, owner_password); |
1082 | 0 | setEncryptionMinimumVersion(); |
1083 | 0 | } |
1084 | | |
1085 | | void |
1086 | | QPDFWriter::copyEncryptionParameters(QPDF& qpdf) |
1087 | 0 | { |
1088 | 0 | m->copyEncryptionParameters(qpdf); |
1089 | 0 | } |
1090 | | |
1091 | | void |
1092 | | impl::Writer::copyEncryptionParameters(QPDF& qpdf) |
1093 | 0 | { |
1094 | 0 | cfg.preserve_encryption(false); |
1095 | 0 | QPDFObjectHandle trailer = qpdf.getTrailer(); |
1096 | 0 | if (trailer.hasKey("/Encrypt")) { |
1097 | 0 | generateID(true); |
1098 | 0 | id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue(); |
1099 | 0 | QPDFObjectHandle encrypt = trailer.getKey("/Encrypt"); |
1100 | 0 | int V = encrypt.getKey("/V").getIntValueAsInt(); |
1101 | 0 | int key_len = 5; |
1102 | 0 | if (V > 1) { |
1103 | 0 | key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8; |
1104 | 0 | } |
1105 | 0 | const bool encrypt_metadata = |
1106 | 0 | encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool() |
1107 | 0 | ? encrypt.getKey("/EncryptMetadata").getBoolValue() |
1108 | 0 | : true; |
1109 | 0 | if (V >= 4) { |
1110 | | // When copying encryption parameters, use AES even if the original file did not. |
1111 | | // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of |
1112 | | // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF |
1113 | | // all potentially having different values. |
1114 | 0 | cfg.encrypt_use_aes(true); |
1115 | 0 | } |
1116 | 0 | QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", encrypt_metadata ? 0 : 1); |
1117 | 0 | QTC::TC("qpdf", "QPDFWriter copy use_aes", cfg.encrypt_use_aes() ? 0 : 1); |
1118 | |
|
1119 | 0 | encryption = std::make_unique<Encryption>( |
1120 | 0 | V, |
1121 | 0 | encrypt.getKey("/R").getIntValueAsInt(), |
1122 | 0 | key_len, |
1123 | 0 | static_cast<int>(encrypt.getKey("/P").getIntValue()), |
1124 | 0 | encrypt.getKey("/O").getStringValue(), |
1125 | 0 | encrypt.getKey("/U").getStringValue(), |
1126 | 0 | V < 5 ? "" : encrypt.getKey("/OE").getStringValue(), |
1127 | 0 | V < 5 ? "" : encrypt.getKey("/UE").getStringValue(), |
1128 | 0 | V < 5 ? "" : encrypt.getKey("/Perms").getStringValue(), |
1129 | 0 | id1, // id1 == the other file's id1 |
1130 | 0 | encrypt_metadata); |
1131 | 0 | encryption_key = V >= 5 ? qpdf.getEncryptionKey() |
1132 | 0 | : encryption->compute_encryption_key(qpdf.getPaddedUserPassword()); |
1133 | 0 | setEncryptionMinimumVersion(); |
1134 | 0 | } |
1135 | 0 | } |
1136 | | |
1137 | | void |
1138 | | impl::Writer::disableIncompatibleEncryption(int major, int minor, int extension_level) |
1139 | 0 | { |
1140 | 0 | if (!encryption) { |
1141 | 0 | return; |
1142 | 0 | } |
1143 | 0 | if (compareVersions(major, minor, 1, 3) < 0) { |
1144 | 0 | encryption = nullptr; |
1145 | 0 | return; |
1146 | 0 | } |
1147 | 0 | int V = encryption->getV(); |
1148 | 0 | int R = encryption->getR(); |
1149 | 0 | if (compareVersions(major, minor, 1, 4) < 0) { |
1150 | 0 | if (V > 1 || R > 2) { |
1151 | 0 | encryption = nullptr; |
1152 | 0 | } |
1153 | 0 | } else if (compareVersions(major, minor, 1, 5) < 0) { |
1154 | 0 | if (V > 2 || R > 3) { |
1155 | 0 | encryption = nullptr; |
1156 | 0 | } |
1157 | 0 | } else if (compareVersions(major, minor, 1, 6) < 0) { |
1158 | 0 | if (cfg.encrypt_use_aes()) { |
1159 | 0 | encryption = nullptr; |
1160 | 0 | } |
1161 | 0 | } else if ( |
1162 | 0 | (compareVersions(major, minor, 1, 7) < 0) || |
1163 | 0 | ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) { |
1164 | 0 | if (V >= 5 || R >= 5) { |
1165 | 0 | encryption = nullptr; |
1166 | 0 | } |
1167 | 0 | } |
1168 | |
|
1169 | 0 | if (!encryption) { |
1170 | 0 | QTC::TC("qpdf", "QPDFWriter forced version disabled encryption"); |
1171 | 0 | } |
1172 | 0 | } |
1173 | | |
1174 | | void |
1175 | | impl::Writer::parseVersion(std::string const& version, int& major, int& minor) const |
1176 | 692 | { |
1177 | 692 | major = QUtil::string_to_int(version.c_str()); |
1178 | 692 | minor = 0; |
1179 | 692 | size_t p = version.find('.'); |
1180 | 692 | if ((p != std::string::npos) && (version.length() > p)) { |
1181 | 691 | minor = QUtil::string_to_int(version.substr(p + 1).c_str()); |
1182 | 691 | } |
1183 | 692 | std::string tmp = std::to_string(major) + "." + std::to_string(minor); |
1184 | 692 | if (tmp != version) { |
1185 | | // The version number in the input is probably invalid. This happens with some files that |
1186 | | // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately |
1187 | | // QPDFWriter doesn't have a way to give a warning, so we just ignore this case. |
1188 | 19 | } |
1189 | 692 | } |
1190 | | |
1191 | | int |
1192 | | impl::Writer::compareVersions(int major1, int minor1, int major2, int minor2) const |
1193 | 344 | { |
1194 | 344 | if (major1 < major2) { |
1195 | 9 | return -1; |
1196 | 9 | } |
1197 | 335 | if (major1 > major2) { |
1198 | 24 | return 1; |
1199 | 24 | } |
1200 | 311 | if (minor1 < minor2) { |
1201 | 210 | return -1; |
1202 | 210 | } |
1203 | 101 | return minor1 > minor2 ? 1 : 0; |
1204 | 311 | } |
1205 | | |
1206 | | void |
1207 | | impl::Writer::setEncryptionMinimumVersion() |
1208 | 0 | { |
1209 | 0 | auto const R = encryption->getR(); |
1210 | 0 | if (R >= 6) { |
1211 | 0 | setMinimumPDFVersion("1.7", 8); |
1212 | 0 | } else if (R == 5) { |
1213 | 0 | setMinimumPDFVersion("1.7", 3); |
1214 | 0 | } else if (R == 4) { |
1215 | 0 | setMinimumPDFVersion(cfg.encrypt_use_aes() ? "1.6" : "1.5"); |
1216 | 0 | } else if (R == 3) { |
1217 | 0 | setMinimumPDFVersion("1.4"); |
1218 | 0 | } else { |
1219 | 0 | setMinimumPDFVersion("1.3"); |
1220 | 0 | } |
1221 | 0 | } |
1222 | | |
1223 | | void |
1224 | | impl::Writer::setDataKey(int objid) |
1225 | 71.8k | { |
1226 | 71.8k | if (encryption) { |
1227 | 0 | cur_data_key = QPDF::compute_data_key( |
1228 | 0 | encryption_key, |
1229 | 0 | objid, |
1230 | 0 | 0, |
1231 | 0 | cfg.encrypt_use_aes(), |
1232 | 0 | encryption->getV(), |
1233 | 0 | encryption->getR()); |
1234 | 0 | } |
1235 | 71.8k | } |
1236 | | |
1237 | | unsigned int |
1238 | | impl::Writer::bytesNeeded(long long n) |
1239 | 861 | { |
1240 | 861 | unsigned int bytes = 0; |
1241 | 1.87k | while (n) { |
1242 | 1.01k | ++bytes; |
1243 | 1.01k | n >>= 8; |
1244 | 1.01k | } |
1245 | 861 | return bytes; |
1246 | 861 | } |
1247 | | |
1248 | | void |
1249 | | impl::Writer::writeBinary(unsigned long long val, unsigned int bytes) |
1250 | 69.9k | { |
1251 | 69.9k | if (bytes > sizeof(unsigned long long)) { |
1252 | 0 | throw std::logic_error("QPDFWriter::writeBinary called with too many bytes"); |
1253 | 0 | } |
1254 | 69.9k | unsigned char data[sizeof(unsigned long long)]; |
1255 | 165k | for (unsigned int i = 0; i < bytes; ++i) { |
1256 | 95.4k | data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff); |
1257 | 95.4k | val >>= 8; |
1258 | 95.4k | } |
1259 | 69.9k | pipeline->write(data, bytes); |
1260 | 69.9k | } |
1261 | | |
1262 | | impl::Writer& |
1263 | | impl::Writer::write(std::string_view str) |
1264 | 2.91M | { |
1265 | 2.91M | pipeline->write(str); |
1266 | 2.91M | return *this; |
1267 | 2.91M | } |
1268 | | |
1269 | | impl::Writer& |
1270 | | impl::Writer::write(std::integral auto val) |
1271 | 365k | { |
1272 | 365k | pipeline->write(std::to_string(val)); |
1273 | 365k | return *this; |
1274 | 365k | } _ZN4qpdf4impl6Writer5writeITkNSt3__18integralEiEERS1_T_ Line | Count | Source | 1271 | 303k | { | 1272 | 303k | pipeline->write(std::to_string(val)); | 1273 | 303k | return *this; | 1274 | 303k | } |
_ZN4qpdf4impl6Writer5writeITkNSt3__18integralExEERS1_T_ Line | Count | Source | 1271 | 37.5k | { | 1272 | 37.5k | pipeline->write(std::to_string(val)); | 1273 | 37.5k | return *this; | 1274 | 37.5k | } |
_ZN4qpdf4impl6Writer5writeITkNSt3__18integralEmEERS1_T_ Line | Count | Source | 1271 | 24.6k | { | 1272 | 24.6k | pipeline->write(std::to_string(val)); | 1273 | 24.6k | return *this; | 1274 | 24.6k | } |
_ZN4qpdf4impl6Writer5writeITkNSt3__18integralEjEERS1_T_ Line | Count | Source | 1271 | 574 | { | 1272 | 574 | pipeline->write(std::to_string(val)); | 1273 | 574 | return *this; | 1274 | 574 | } |
|
1275 | | |
1276 | | impl::Writer& |
1277 | | impl::Writer::write(size_t count, char c) |
1278 | 0 | { |
1279 | 0 | pipeline->write(count, c); |
1280 | 0 | return *this; |
1281 | 0 | } |
1282 | | |
1283 | | impl::Writer& |
1284 | | impl::Writer::write_name(std::string const& str) |
1285 | 282k | { |
1286 | 282k | pipeline->write(Name::normalize(str)); |
1287 | 282k | return *this; |
1288 | 282k | } |
1289 | | |
1290 | | impl::Writer& |
1291 | | impl::Writer::write_string(std::string const& str, bool force_binary) |
1292 | 16.9k | { |
1293 | 16.9k | pipeline->write(QPDF_String(str).unparse(force_binary)); |
1294 | 16.9k | return *this; |
1295 | 16.9k | } |
1296 | | |
1297 | | template <typename... Args> |
1298 | | impl::Writer& |
1299 | | impl::Writer::write_qdf(Args&&... args) |
1300 | 188k | { |
1301 | 188k | if (cfg.qdf()) { |
1302 | 188k | pipeline->write(std::forward<Args>(args)...); |
1303 | 188k | } |
1304 | 188k | return *this; |
1305 | 188k | } qpdf::impl::Writer& qpdf::impl::Writer::write_qdf<char const (&) [2]>(char const (&) [2]) Line | Count | Source | 1300 | 149k | { | 1301 | 149k | if (cfg.qdf()) { | 1302 | 149k | pipeline->write(std::forward<Args>(args)...); | 1303 | 149k | } | 1304 | 149k | return *this; | 1305 | 149k | } |
qpdf::impl::Writer& qpdf::impl::Writer::write_qdf<char const (&) [3]>(char const (&) [3]) Line | Count | Source | 1300 | 20.7k | { | 1301 | 20.7k | if (cfg.qdf()) { | 1302 | 20.7k | pipeline->write(std::forward<Args>(args)...); | 1303 | 20.7k | } | 1304 | 20.7k | return *this; | 1305 | 20.7k | } |
qpdf::impl::Writer& qpdf::impl::Writer::write_qdf<char const (&) [4]>(char const (&) [4]) Line | Count | Source | 1300 | 8.46k | { | 1301 | 8.46k | if (cfg.qdf()) { | 1302 | 8.46k | pipeline->write(std::forward<Args>(args)...); | 1303 | 8.46k | } | 1304 | 8.46k | return *this; | 1305 | 8.46k | } |
qpdf::impl::Writer& qpdf::impl::Writer::write_qdf<char const (&) [11]>(char const (&) [11]) Line | Count | Source | 1300 | 8.87k | { | 1301 | 8.87k | if (cfg.qdf()) { | 1302 | 8.87k | pipeline->write(std::forward<Args>(args)...); | 1303 | 8.87k | } | 1304 | 8.87k | return *this; | 1305 | 8.87k | } |
|
1306 | | |
1307 | | template <typename... Args> |
1308 | | impl::Writer& |
1309 | | impl::Writer::write_no_qdf(Args&&... args) |
1310 | 47.8k | { |
1311 | 47.8k | if (!cfg.qdf()) { |
1312 | 0 | pipeline->write(std::forward<Args>(args)...); |
1313 | 0 | } |
1314 | 47.8k | return *this; |
1315 | 47.8k | } qpdf::impl::Writer& qpdf::impl::Writer::write_no_qdf<char const (&) [2]>(char const (&) [2]) Line | Count | Source | 1310 | 39.3k | { | 1311 | 39.3k | if (!cfg.qdf()) { | 1312 | 0 | pipeline->write(std::forward<Args>(args)...); | 1313 | 0 | } | 1314 | 39.3k | return *this; | 1315 | 39.3k | } |
qpdf::impl::Writer& qpdf::impl::Writer::write_no_qdf<char const (&) [4]>(char const (&) [4]) Line | Count | Source | 1310 | 8.46k | { | 1311 | 8.46k | if (!cfg.qdf()) { | 1312 | 0 | pipeline->write(std::forward<Args>(args)...); | 1313 | 0 | } | 1314 | 8.46k | return *this; | 1315 | 8.46k | } |
|
1316 | | |
1317 | | void |
1318 | | impl::Writer::adjustAESStreamLength(size_t& length) |
1319 | 22.7k | { |
1320 | 22.7k | if (encryption && !cur_data_key.empty() && cfg.encrypt_use_aes()) { |
1321 | | // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16. It will |
1322 | | // also be prepended by 16 bits of random data. |
1323 | 0 | length += 32 - (length & 0xf); |
1324 | 0 | } |
1325 | 22.7k | } |
1326 | | |
1327 | | impl::Writer& |
1328 | | impl::Writer::write_encrypted(std::string_view str) |
1329 | 22.6k | { |
1330 | 22.6k | if (!(encryption && !cur_data_key.empty())) { |
1331 | 22.6k | write(str); |
1332 | 22.6k | } else if (cfg.encrypt_use_aes()) { |
1333 | 0 | write(pl::pipe<Pl_AES_PDF>(str, true, cur_data_key)); |
1334 | 0 | } else { |
1335 | 0 | write(pl::pipe<Pl_RC4>(str, cur_data_key)); |
1336 | 0 | } |
1337 | | |
1338 | 22.6k | return *this; |
1339 | 22.6k | } |
1340 | | |
1341 | | void |
1342 | | impl::Writer::computeDeterministicIDData() |
1343 | 8.71k | { |
1344 | 8.71k | if (!id2.empty()) { |
1345 | | // Can't happen in the code |
1346 | 0 | throw std::logic_error( |
1347 | 0 | "Deterministic ID computation enabled after ID generation has already occurred."); |
1348 | 0 | } |
1349 | 8.71k | qpdf_assert_debug(deterministic_id_data.empty()); |
1350 | 8.71k | deterministic_id_data = pipeline_stack.hex_digest(); |
1351 | 8.71k | } |
1352 | | |
1353 | | int |
1354 | | impl::Writer::openObject(int objid) |
1355 | 93.3k | { |
1356 | 93.3k | if (objid == 0) { |
1357 | 0 | objid = next_objid++; |
1358 | 0 | } |
1359 | 93.3k | new_obj[objid].xref = QPDFXRefEntry(pipeline->getCount()); |
1360 | 93.3k | write(objid).write(" 0 obj\n"); |
1361 | 93.3k | return objid; |
1362 | 93.3k | } |
1363 | | |
1364 | | void |
1365 | | impl::Writer::closeObject(int objid) |
1366 | 93.1k | { |
1367 | | // Write a newline before endobj as it makes the file easier to repair. |
1368 | 93.1k | write("\nendobj\n").write_qdf("\n"); |
1369 | 93.1k | auto& no = new_obj[objid]; |
1370 | 93.1k | no.length = pipeline->getCount() - no.xref.getOffset(); |
1371 | 93.1k | } |
1372 | | |
1373 | | void |
1374 | | impl::Writer::assignCompressedObjectNumbers(QPDFObjGen og) |
1375 | 1.59k | { |
1376 | 1.59k | int objid = og.getObj(); |
1377 | 1.59k | if (og.getGen() != 0 || !object_stream_to_objects.contains(objid)) { |
1378 | | // This is not an object stream. |
1379 | 0 | return; |
1380 | 0 | } |
1381 | | |
1382 | | // Reserve numbers for the objects that belong to this object stream. |
1383 | 13.7k | for (auto const& iter: object_stream_to_objects[objid]) { |
1384 | 13.7k | obj[iter].renumber = next_objid++; |
1385 | 13.7k | } |
1386 | 1.59k | } |
1387 | | |
1388 | | void |
1389 | | impl::Writer::enqueue(QPDFObjectHandle const& object) |
1390 | 2.65M | { |
1391 | 2.65M | if (object.indirect()) { |
1392 | 207k | util::assertion( |
1393 | | // This owner check can only be done for indirect objects. It is possible for a direct |
1394 | | // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle |
1395 | | // from one file was insert into another file without copying. Doing that is safe even |
1396 | | // if the original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from |
1397 | | // its owner. |
1398 | 207k | object.qpdf() == &qpdf, |
1399 | 207k | "QPDFObjectHandle from different QPDF found while writing. " |
1400 | 207k | "Use QPDF::copyForeignObject to add objects from another file." // |
1401 | 207k | ); |
1402 | | |
1403 | 207k | if (cfg.qdf() && object.isStreamOfType("/XRef")) { |
1404 | | // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so |
1405 | | // will confuse fix-qdf, which expects to see only one XRef stream at the end of the |
1406 | | // file. This case can occur when creating a QDF from a file with object streams when |
1407 | | // preserving unreferenced objects since the old cross reference streams are not |
1408 | | // actually referenced by object number. |
1409 | 681 | return; |
1410 | 681 | } |
1411 | | |
1412 | 206k | QPDFObjGen og = object.getObjGen(); |
1413 | 206k | auto& o = obj[og]; |
1414 | | |
1415 | 206k | if (o.renumber == 0) { |
1416 | 74.2k | if (o.object_stream > 0) { |
1417 | | // This is in an object stream. Don't process it here. Instead, enqueue the object |
1418 | | // stream. Object streams always have generation 0. |
1419 | | // Detect loops by storing invalid object ID -1, which will get overwritten later. |
1420 | 2.08k | o.renumber = -1; |
1421 | 2.08k | enqueue(qpdf.getObject(o.object_stream, 0)); |
1422 | 72.1k | } else { |
1423 | 72.1k | object_queue.emplace_back(object); |
1424 | 72.1k | o.renumber = next_objid++; |
1425 | | |
1426 | 72.1k | if (og.getGen() == 0 && object_stream_to_objects.contains(og.getObj())) { |
1427 | | // For linearized files, uncompressed objects go at end, and we take care of |
1428 | | // assigning numbers to them elsewhere. |
1429 | 1.59k | if (!cfg.linearize()) { |
1430 | 1.59k | assignCompressedObjectNumbers(og); |
1431 | 1.59k | } |
1432 | 70.5k | } else if (!cfg.direct_stream_lengths() && object.isStream()) { |
1433 | | // reserve next object ID for length |
1434 | 21.3k | ++next_objid; |
1435 | 21.3k | } |
1436 | 72.1k | } |
1437 | 74.2k | } |
1438 | 206k | return; |
1439 | 207k | } |
1440 | | |
1441 | 2.44M | if (cfg.linearize()) { |
1442 | 0 | return; |
1443 | 0 | } |
1444 | | |
1445 | 2.44M | if (Array array = object) { |
1446 | 1.73M | for (auto& item: array) { |
1447 | 1.73M | enqueue(item); |
1448 | 1.73M | } |
1449 | 1.10M | return; |
1450 | 1.10M | } |
1451 | | |
1452 | 1.33M | for (auto const& item: Dictionary(object)) { |
1453 | 261k | if (!item.second.null()) { |
1454 | 245k | enqueue(item.second); |
1455 | 245k | } |
1456 | 261k | } |
1457 | 1.33M | } |
1458 | | |
1459 | | void |
1460 | | impl::Writer::unparseChild(QPDFObjectHandle const& child, size_t level, int flags) |
1461 | 646k | { |
1462 | 646k | if (!cfg.linearize()) { |
1463 | 646k | enqueue(child); |
1464 | 646k | } |
1465 | 646k | if (child.indirect()) { |
1466 | 113k | write(obj[child].renumber).write(" 0 R"); |
1467 | 533k | } else { |
1468 | 533k | unparseObject(child, level, flags); |
1469 | 533k | } |
1470 | 646k | } |
1471 | | |
1472 | | void |
1473 | | impl::Writer::writeTrailer( |
1474 | | trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass) |
1475 | 8.71k | { |
1476 | 8.71k | auto trailer = trimmed_trailer(); |
1477 | 8.71k | if (xref_stream) { |
1478 | 287 | cur_data_key.clear(); |
1479 | 8.42k | } else { |
1480 | 8.42k | write("trailer <<"); |
1481 | 8.42k | } |
1482 | 8.71k | write_qdf("\n"); |
1483 | 8.71k | if (which == t_lin_second) { |
1484 | 0 | write(" /Size ").write(size); |
1485 | 8.71k | } else { |
1486 | 16.5k | for (auto const& [key, value]: trailer) { |
1487 | 16.5k | if (value.null()) { |
1488 | 3.11k | continue; |
1489 | 3.11k | } |
1490 | 13.4k | write_qdf(" ").write_no_qdf(" ").write_name(key).write(" "); |
1491 | 13.4k | if (key == "/Size") { |
1492 | 1.40k | write(size); |
1493 | 1.40k | if (which == t_lin_first) { |
1494 | 0 | write(" /Prev "); |
1495 | 0 | qpdf_offset_t pos = pipeline->getCount(); |
1496 | 0 | write(prev).write(QIntC::to_size(pos - pipeline->getCount() + 21), ' '); |
1497 | 0 | } |
1498 | 12.0k | } else { |
1499 | 12.0k | unparseChild(value, 1, 0); |
1500 | 12.0k | } |
1501 | 13.4k | write_qdf("\n"); |
1502 | 13.4k | } |
1503 | 8.71k | } |
1504 | | |
1505 | | // Write ID |
1506 | 8.71k | write_qdf(" ").write(" /ID ["); |
1507 | 8.71k | if (linearization_pass == 1) { |
1508 | 0 | std::string original_id1 = getOriginalID1(); |
1509 | 0 | if (original_id1.empty()) { |
1510 | 0 | write("<00000000000000000000000000000000>"); |
1511 | 0 | } else { |
1512 | | // Write a string of zeroes equal in length to the representation of the original ID. |
1513 | | // While writing the original ID would have the same number of bytes, it would cause a |
1514 | | // change to the deterministic ID generated by older versions of the software that |
1515 | | // hard-coded the length of the ID to 16 bytes. |
1516 | 0 | size_t len = QPDF_String(original_id1).unparse(true).length() - 2; |
1517 | 0 | write("<").write(len, '0').write(">"); |
1518 | 0 | } |
1519 | 0 | write("<00000000000000000000000000000000>"); |
1520 | 8.71k | } else { |
1521 | 8.71k | if (linearization_pass == 0 && cfg.deterministic_id()) { |
1522 | 8.71k | computeDeterministicIDData(); |
1523 | 8.71k | } |
1524 | 8.71k | generateID(encryption.get()); |
1525 | 8.71k | write_string(id1, true).write_string(id2, true); |
1526 | 8.71k | } |
1527 | 8.71k | write("]"); |
1528 | | |
1529 | 8.71k | if (which != t_lin_second) { |
1530 | | // Write reference to encryption dictionary |
1531 | 8.46k | if (encryption) { |
1532 | 0 | write(" /Encrypt ").write(encryption_dict_objid).write(" 0 R"); |
1533 | 0 | } |
1534 | 8.46k | } |
1535 | | |
1536 | 8.71k | write_qdf("\n>>").write_no_qdf(" >>"); |
1537 | 8.71k | } |
1538 | | |
1539 | | bool |
1540 | | impl::Writer::will_filter_stream(QPDFObjectHandle stream) |
1541 | 0 | { |
1542 | 0 | std::string s; |
1543 | 0 | [[maybe_unused]] auto [filter, ignore1, ignore2] = will_filter_stream(stream, &s); |
1544 | 0 | return filter; |
1545 | 0 | } |
1546 | | |
1547 | | std::tuple<const bool, const bool, const bool> |
1548 | | impl::Writer::will_filter_stream(QPDFObjectHandle stream, std::string* stream_data) |
1549 | 21.2k | { |
1550 | 21.2k | const bool is_root_metadata = stream.isRootMetadata(); |
1551 | 21.2k | bool filter = false; |
1552 | 21.2k | auto decode_level = cfg.decode_level(); |
1553 | 21.2k | int encode_flags = 0; |
1554 | 21.2k | Dictionary stream_dict = stream.getDict(); |
1555 | | |
1556 | 21.2k | if (stream.getFilterOnWrite()) { |
1557 | 21.2k | filter = stream.isDataModified() || cfg.compress_streams() || decode_level != qpdf_dl_none; |
1558 | 21.2k | if (cfg.compress_streams()) { |
1559 | | // Don't filter if the stream is already compressed with FlateDecode. This way we don't |
1560 | | // make it worse if the original file used a better Flate algorithm, and we don't spend |
1561 | | // time and CPU cycles uncompressing and recompressing stuff. This can be overridden |
1562 | | // with setRecompressFlate(true). |
1563 | 0 | Name Filter = stream_dict["/Filter"]; |
1564 | 0 | if (Filter && !cfg.recompress_flate() && !stream.isDataModified() && |
1565 | 0 | (Filter == "/FlateDecode" || Filter == "/Fl")) { |
1566 | 0 | filter = false; |
1567 | 0 | } |
1568 | 0 | } |
1569 | 21.2k | if (is_root_metadata && (!encryption || !encryption->getEncryptMetadata())) { |
1570 | 69 | filter = true; |
1571 | 69 | decode_level = qpdf_dl_all; |
1572 | 21.1k | } else if (cfg.normalize_content() && normalized_streams.contains(stream)) { |
1573 | 2.91k | encode_flags = qpdf_ef_normalize; |
1574 | 2.91k | filter = true; |
1575 | 18.2k | } else if (filter && cfg.compress_streams()) { |
1576 | 0 | encode_flags = qpdf_ef_compress; |
1577 | 0 | } |
1578 | 21.2k | } |
1579 | | |
1580 | | // Disable compression for empty streams to improve compatibility |
1581 | 21.2k | if (Integer(stream_dict["/Length"]) == 0) { |
1582 | 82 | filter = true; |
1583 | 82 | encode_flags = 0; |
1584 | 82 | } |
1585 | | |
1586 | 31.5k | for (bool first_attempt: {true, false}) { |
1587 | 31.5k | auto pp_stream_data = |
1588 | 31.5k | stream_data ? pipeline_stack.activate(*stream_data) : pipeline_stack.activate(true); |
1589 | | |
1590 | 31.5k | try { |
1591 | 31.5k | if (stream.pipeStreamData( |
1592 | 31.5k | pipeline, |
1593 | 31.5k | filter ? encode_flags : 0, |
1594 | 31.5k | filter ? decode_level : qpdf_dl_none, |
1595 | 31.5k | false, |
1596 | 31.5k | first_attempt)) { |
1597 | 10.8k | return {true, encode_flags & qpdf_ef_compress, is_root_metadata}; |
1598 | 10.8k | } |
1599 | 20.7k | if (!filter) { |
1600 | 10.3k | break; |
1601 | 10.3k | } |
1602 | 20.7k | } catch (std::runtime_error& e) { |
1603 | 29 | if (!(filter && first_attempt)) { |
1604 | 5 | throw std::runtime_error( |
1605 | 5 | "error while getting stream data for " + stream.unparse() + ": " + e.what()); |
1606 | 5 | } |
1607 | 24 | stream.warn("error while getting stream data: "s + e.what()); |
1608 | 24 | stream.warn("qpdf will attempt to write the damaged stream unchanged"); |
1609 | 24 | } |
1610 | | // Try again |
1611 | 10.3k | filter = false; |
1612 | 10.3k | stream.setFilterOnWrite(false); |
1613 | 10.3k | if (stream_data) { |
1614 | 10.3k | stream_data->clear(); |
1615 | 10.3k | } |
1616 | 10.3k | } |
1617 | 10.3k | return {false, false, is_root_metadata}; |
1618 | 21.2k | } |
1619 | | |
1620 | | void |
1621 | | impl::Writer::unparseObject( |
1622 | | QPDFObjectHandle object, size_t level, int flags, size_t stream_length, bool compress) |
1623 | 638k | { |
1624 | 638k | QPDFObjGen old_og = object.getObjGen(); |
1625 | 638k | int child_flags = flags & ~f_stream; |
1626 | | // For non-qdf, "indent" and "indent_large" are a single space between tokens. For qdf, they |
1627 | | // include the preceding newline. |
1628 | 638k | std::string indent_large = " "; |
1629 | 638k | if (cfg.qdf()) { |
1630 | 638k | indent_large.append(2 * (level + 1), ' '); |
1631 | 638k | indent_large[0] = '\n'; |
1632 | 638k | } |
1633 | 638k | std::string_view indent{indent_large.data(), cfg.qdf() ? indent_large.size() - 2 : 1}; |
1634 | | |
1635 | 638k | if (auto const tc = object.getTypeCode(); tc == ::ot_array) { |
1636 | | // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the |
1637 | | // [ in the /H key of the linearization parameter dictionary. We'll do this unconditionally |
1638 | | // for all arrays because it looks nicer and doesn't make the files that much bigger. |
1639 | 56.9k | write("["); |
1640 | 365k | for (auto const& item: object.as_array()) { |
1641 | 365k | write(indent_large); |
1642 | 365k | unparseChild(item, level + 1, child_flags); |
1643 | 365k | } |
1644 | 56.9k | write(indent).write("]"); |
1645 | 581k | } else if (tc == ::ot_dictionary) { |
1646 | | // Handle special cases for specific dictionaries. |
1647 | | |
1648 | 91.7k | if (old_og == root_og) { |
1649 | | // Extensions dictionaries. |
1650 | | |
1651 | | // We have one of several cases: |
1652 | | // |
1653 | | // * We need ADBE |
1654 | | // - We already have Extensions |
1655 | | // - If it has the right ADBE, preserve it |
1656 | | // - Otherwise, replace ADBE |
1657 | | // - We don't have Extensions: create one from scratch |
1658 | | // * We don't want ADBE |
1659 | | // - We already have Extensions |
1660 | | // - If it only has ADBE, remove it |
1661 | | // - If it has other things, keep those and remove ADBE |
1662 | | // - We have no extensions: no action required |
1663 | | // |
1664 | | // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE |
1665 | | // dictionary, so we can modify in place. |
1666 | | |
1667 | 8.58k | auto extensions = object.getKey("/Extensions"); |
1668 | 8.58k | const bool has_extensions = extensions.isDictionary(); |
1669 | 8.58k | const bool need_extensions_adbe = final_extension_level > 0; |
1670 | | |
1671 | 8.58k | if (has_extensions || need_extensions_adbe) { |
1672 | | // Make a shallow copy of this object so we can modify it safely without affecting |
1673 | | // the original. This code has logic to skip certain keys in agreement with |
1674 | | // prepareFileForWrite and with skip_stream_parameters so that replacing them |
1675 | | // doesn't leave unreferenced objects in the output. We can use unsafeShallowCopy |
1676 | | // here because all we are doing is removing or replacing top-level keys. |
1677 | 140 | object = object.unsafeShallowCopy(); |
1678 | 140 | if (!has_extensions) { |
1679 | 0 | extensions = QPDFObjectHandle(); |
1680 | 0 | } |
1681 | | |
1682 | 140 | const bool have_extensions_adbe = extensions && extensions.hasKey("/ADBE"); |
1683 | 140 | const bool have_extensions_other = |
1684 | 140 | extensions && extensions.getKeys().size() > (have_extensions_adbe ? 1u : 0u); |
1685 | | |
1686 | 140 | if (need_extensions_adbe) { |
1687 | 35 | if (!(have_extensions_other || have_extensions_adbe)) { |
1688 | | // We need Extensions and don't have it. Create it here. |
1689 | 0 | QTC::TC("qpdf", "QPDFWriter create Extensions", cfg.qdf() ? 0 : 1); |
1690 | 0 | extensions = object.replaceKeyAndGetNew( |
1691 | 0 | "/Extensions", QPDFObjectHandle::newDictionary()); |
1692 | 0 | } |
1693 | 105 | } else if (!have_extensions_other) { |
1694 | | // We have Extensions dictionary and don't want one. |
1695 | 10 | if (have_extensions_adbe) { |
1696 | 8 | QTC::TC("qpdf", "QPDFWriter remove existing Extensions"); |
1697 | 8 | object.removeKey("/Extensions"); |
1698 | 8 | extensions = QPDFObjectHandle(); // uninitialized |
1699 | 8 | } |
1700 | 10 | } |
1701 | | |
1702 | 140 | if (extensions) { |
1703 | 132 | QTC::TC("qpdf", "QPDFWriter preserve Extensions"); |
1704 | 132 | QPDFObjectHandle adbe = extensions.getKey("/ADBE"); |
1705 | 132 | if (adbe.isDictionary() && |
1706 | 37 | adbe.getKey("/BaseVersion").isNameAndEquals("/" + final_pdf_version) && |
1707 | 21 | adbe.getKey("/ExtensionLevel").isInteger() && |
1708 | 20 | (adbe.getKey("/ExtensionLevel").getIntValue() == final_extension_level)) { |
1709 | 127 | } else { |
1710 | 127 | if (need_extensions_adbe) { |
1711 | 30 | extensions.replaceKey( |
1712 | 30 | "/ADBE", |
1713 | 30 | QPDFObjectHandle::parse( |
1714 | 30 | "<< /BaseVersion /" + final_pdf_version + " /ExtensionLevel " + |
1715 | 30 | std::to_string(final_extension_level) + " >>")); |
1716 | 97 | } else { |
1717 | 97 | extensions.removeKey("/ADBE"); |
1718 | 97 | } |
1719 | 127 | } |
1720 | 132 | } |
1721 | 140 | } |
1722 | 8.58k | } |
1723 | | |
1724 | | // Stream dictionaries. |
1725 | | |
1726 | 91.7k | if (flags & f_stream) { |
1727 | | // Suppress /Length since we will write it manually |
1728 | | |
1729 | | // Make a shallow copy of this object so we can modify it safely without affecting the |
1730 | | // original. This code has logic to skip certain keys in agreement with |
1731 | | // prepareFileForWrite and with skip_stream_parameters so that replacing them doesn't |
1732 | | // leave unreferenced objects in the output. We can use unsafeShallowCopy here because |
1733 | | // all we are doing is removing or replacing top-level keys. |
1734 | 21.2k | object = object.unsafeShallowCopy(); |
1735 | | |
1736 | 21.2k | object.removeKey("/Length"); |
1737 | | |
1738 | | // If /DecodeParms is an empty list, remove it. |
1739 | 21.2k | if (object.getKey("/DecodeParms").empty()) { |
1740 | 20.5k | object.removeKey("/DecodeParms"); |
1741 | 20.5k | } |
1742 | | |
1743 | 21.2k | if (flags & f_filtered) { |
1744 | | // We will supply our own filter and decode parameters. |
1745 | 10.8k | object.removeKey("/Filter"); |
1746 | 10.8k | object.removeKey("/DecodeParms"); |
1747 | 10.8k | } else { |
1748 | | // Make sure, no matter what else we have, that we don't have /Crypt in the output |
1749 | | // filters. |
1750 | 10.3k | QPDFObjectHandle filter = object.getKey("/Filter"); |
1751 | 10.3k | QPDFObjectHandle decode_parms = object.getKey("/DecodeParms"); |
1752 | 10.3k | if (filter.isOrHasName("/Crypt")) { |
1753 | 197 | if (filter.isName()) { |
1754 | 25 | object.removeKey("/Filter"); |
1755 | 25 | object.removeKey("/DecodeParms"); |
1756 | 172 | } else { |
1757 | 172 | int idx = 0; |
1758 | 1.24k | for (auto const& item: filter.as_array()) { |
1759 | 1.24k | if (item.isNameAndEquals("/Crypt")) { |
1760 | | // If filter is an array, then the code in QPDF_Stream has already |
1761 | | // verified that DecodeParms and Filters are arrays of the same |
1762 | | // length, but if they weren't for some reason, eraseItem does type |
1763 | | // and bounds checking. Fuzzing tells us that this can actually |
1764 | | // happen. |
1765 | 172 | filter.eraseItem(idx); |
1766 | 172 | decode_parms.eraseItem(idx); |
1767 | 172 | break; |
1768 | 172 | } |
1769 | 1.06k | ++idx; |
1770 | 1.06k | } |
1771 | 172 | } |
1772 | 197 | } |
1773 | 10.3k | } |
1774 | 21.2k | } |
1775 | | |
1776 | 91.7k | write("<<"); |
1777 | | |
1778 | 310k | for (auto const& [key, value]: object.as_dictionary()) { |
1779 | 310k | if (!value.null()) { |
1780 | 268k | write(indent_large).write_name(key).write(" "); |
1781 | 268k | if (key == "/Contents" && object.isDictionaryOfType("/Sig") && |
1782 | 104 | object.hasKey("/ByteRange")) { |
1783 | 102 | QTC::TC("qpdf", "QPDFWriter no encryption sig contents"); |
1784 | 102 | unparseChild(value, level + 1, child_flags | f_hex_string | f_no_encryption); |
1785 | 268k | } else { |
1786 | 268k | unparseChild(value, level + 1, child_flags); |
1787 | 268k | } |
1788 | 268k | } |
1789 | 310k | } |
1790 | | |
1791 | 91.7k | if (flags & f_stream) { |
1792 | 21.1k | write(indent_large).write("/Length "); |
1793 | | |
1794 | 21.1k | if (cfg.direct_stream_lengths()) { |
1795 | 0 | write(stream_length); |
1796 | 21.1k | } else { |
1797 | 21.1k | write(cur_stream_length_id).write(" 0 R"); |
1798 | 21.1k | } |
1799 | 21.1k | if (compress && (flags & f_filtered)) { |
1800 | 0 | write(indent_large).write("/Filter /FlateDecode"); |
1801 | 0 | } |
1802 | 21.1k | } |
1803 | | |
1804 | 91.7k | write(indent).write(">>"); |
1805 | 490k | } else if (tc == ::ot_stream) { |
1806 | | // Write stream data to a buffer. |
1807 | 21.2k | if (!cfg.direct_stream_lengths()) { |
1808 | 21.2k | cur_stream_length_id = obj[old_og].renumber + 1; |
1809 | 21.2k | } |
1810 | | |
1811 | 21.2k | flags |= f_stream; |
1812 | 21.2k | std::string stream_data; |
1813 | 21.2k | auto [filter, compress_stream, is_root_metadata] = will_filter_stream(object, &stream_data); |
1814 | 21.2k | if (filter) { |
1815 | 10.8k | flags |= f_filtered; |
1816 | 10.8k | } |
1817 | 21.2k | QPDFObjectHandle stream_dict = object.getDict(); |
1818 | | |
1819 | 21.2k | cur_stream_length = stream_data.size(); |
1820 | 21.2k | if (is_root_metadata && encryption && !encryption->getEncryptMetadata()) { |
1821 | | // Don't encrypt stream data for the metadata stream |
1822 | 0 | cur_data_key.clear(); |
1823 | 0 | } |
1824 | 21.2k | adjustAESStreamLength(cur_stream_length); |
1825 | 21.2k | unparseObject(stream_dict, 0, flags, cur_stream_length, compress_stream); |
1826 | 21.2k | char last_char = stream_data.empty() ? '\0' : stream_data.back(); |
1827 | 21.2k | write("\nstream\n").write_encrypted(stream_data); |
1828 | 21.2k | added_newline = cfg.newline_before_endstream() || (cfg.qdf() && last_char != '\n'); |
1829 | 21.2k | write(added_newline ? "\nendstream" : "endstream"); |
1830 | 468k | } else if (tc == ::ot_string) { |
1831 | 25.9k | std::string val; |
1832 | 25.9k | if (encryption && !(flags & f_in_ostream) && !(flags & f_no_encryption) && |
1833 | 0 | !cur_data_key.empty()) { |
1834 | 0 | val = object.getStringValue(); |
1835 | 0 | if (cfg.encrypt_use_aes()) { |
1836 | 0 | Pl_Buffer bufpl("encrypted string"); |
1837 | 0 | Pl_AES_PDF pl("aes encrypt string", &bufpl, true, cur_data_key); |
1838 | 0 | pl.writeString(val); |
1839 | 0 | pl.finish(); |
1840 | 0 | val = QPDF_String(bufpl.getString()).unparse(true); |
1841 | 0 | } else { |
1842 | 0 | auto tmp_ph = QUtil::make_unique_cstr(val); |
1843 | 0 | char* tmp = tmp_ph.get(); |
1844 | 0 | size_t vlen = val.length(); |
1845 | 0 | RC4 rc4( |
1846 | 0 | QUtil::unsigned_char_pointer(cur_data_key), |
1847 | 0 | QIntC::to_int(cur_data_key.length())); |
1848 | 0 | auto data = QUtil::unsigned_char_pointer(tmp); |
1849 | 0 | rc4.process(data, vlen, data); |
1850 | 0 | val = QPDF_String(std::string(tmp, vlen)).unparse(); |
1851 | 0 | } |
1852 | 25.9k | } else if (flags & f_hex_string) { |
1853 | 102 | val = QPDF_String(object.getStringValue()).unparse(true); |
1854 | 25.8k | } else { |
1855 | 25.8k | val = object.unparseResolved(); |
1856 | 25.8k | } |
1857 | 25.9k | write(val); |
1858 | 442k | } else { |
1859 | 442k | write(object.unparseResolved()); |
1860 | 442k | } |
1861 | 638k | } |
1862 | | |
1863 | | void |
1864 | | impl::Writer::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj) |
1865 | 3.15k | { |
1866 | 3.15k | qpdf_assert_debug(first_obj > 0); |
1867 | 3.15k | bool is_first = true; |
1868 | 3.15k | auto id = std::to_string(first_obj) + ' '; |
1869 | 27.5k | for (auto& offset: offsets) { |
1870 | 27.5k | if (is_first) { |
1871 | 3.15k | is_first = false; |
1872 | 24.3k | } else { |
1873 | 24.3k | write_qdf("\n").write_no_qdf(" "); |
1874 | 24.3k | } |
1875 | 27.5k | write(id); |
1876 | 27.5k | util::increment(id, 1); |
1877 | 27.5k | write(offset); |
1878 | 27.5k | } |
1879 | 3.15k | write("\n"); |
1880 | 3.15k | } |
1881 | | |
1882 | | void |
1883 | | impl::Writer::writeObjectStream(QPDFObjectHandle object) |
1884 | 1.57k | { |
1885 | | // Note: object might be null if this is a place-holder for an object stream that we are |
1886 | | // generating from scratch. |
1887 | | |
1888 | 1.57k | QPDFObjGen old_og = object.getObjGen(); |
1889 | 1.57k | qpdf_assert_debug(old_og.getGen() == 0); |
1890 | 1.57k | int old_id = old_og.getObj(); |
1891 | 1.57k | int new_stream_id = obj[old_og].renumber; |
1892 | | |
1893 | 1.57k | std::vector<qpdf_offset_t> offsets; |
1894 | 1.57k | qpdf_offset_t first = 0; |
1895 | | |
1896 | | // Generate stream itself. We have to do this in two passes so we can calculate offsets in the |
1897 | | // first pass. |
1898 | 1.57k | std::string stream_buffer_pass1; |
1899 | 1.57k | std::string stream_buffer_pass2; |
1900 | 1.57k | int first_obj = -1; |
1901 | 1.57k | const bool compressed = cfg.compress_streams() && !cfg.qdf(); |
1902 | 1.57k | { |
1903 | | // Pass 1 |
1904 | 1.57k | auto pp_ostream_pass1 = pipeline_stack.activate(stream_buffer_pass1); |
1905 | | |
1906 | 1.57k | int count = -1; |
1907 | 13.7k | for (auto const& og: object_stream_to_objects[old_id]) { |
1908 | 13.7k | ++count; |
1909 | 13.7k | int new_o = obj[og].renumber; |
1910 | 13.7k | if (first_obj == -1) { |
1911 | 1.57k | first_obj = new_o; |
1912 | 1.57k | } |
1913 | 13.7k | if (cfg.qdf()) { |
1914 | 13.7k | write("%% Object stream: object ").write(new_o).write(", index ").write(count); |
1915 | 13.7k | if (!cfg.no_original_object_ids()) { |
1916 | 13.7k | write("; original object ID: ").write(og.getObj()); |
1917 | | // For compatibility, only write the generation if non-zero. While object |
1918 | | // streams only allow objects with generation 0, if we are generating object |
1919 | | // streams, the old object could have a non-zero generation. |
1920 | 13.7k | if (og.getGen() != 0) { |
1921 | 0 | write(" ").write(og.getGen()); |
1922 | 0 | } |
1923 | 13.7k | } |
1924 | 13.7k | write("\n"); |
1925 | 13.7k | } |
1926 | | |
1927 | 13.7k | offsets.push_back(pipeline->getCount()); |
1928 | | // To avoid double-counting objects being written in object streams for progress |
1929 | | // reporting, decrement in pass 1. |
1930 | 13.7k | indicateProgress(true, false); |
1931 | | |
1932 | 13.7k | QPDFObjectHandle obj_to_write = qpdf.getObject(og); |
1933 | 13.7k | if (obj_to_write.isStream()) { |
1934 | | // This condition occurred in a fuzz input. Ideally we should block it at parse |
1935 | | // time, but it's not clear to me how to construct a case for this. |
1936 | 0 | obj_to_write.warn("stream found inside object stream; treating as null"); |
1937 | 0 | obj_to_write = QPDFObjectHandle::newNull(); |
1938 | 0 | } |
1939 | 13.7k | writeObject(obj_to_write, count); |
1940 | | |
1941 | 13.7k | new_obj[new_o].xref = QPDFXRefEntry(new_stream_id, count); |
1942 | 13.7k | } |
1943 | 1.57k | } |
1944 | 1.57k | { |
1945 | | // Adjust offsets to skip over comment before first object |
1946 | 1.57k | first = offsets.at(0); |
1947 | 13.7k | for (auto& iter: offsets) { |
1948 | 13.7k | iter -= first; |
1949 | 13.7k | } |
1950 | | |
1951 | | // Take one pass at writing pairs of numbers so we can get their size information |
1952 | 1.57k | { |
1953 | 1.57k | auto pp_discard = pipeline_stack.activate(true); |
1954 | 1.57k | writeObjectStreamOffsets(offsets, first_obj); |
1955 | 1.57k | first += pipeline->getCount(); |
1956 | 1.57k | } |
1957 | | |
1958 | | // Set up a stream to write the stream data into a buffer. |
1959 | 1.57k | auto pp_ostream = pipeline_stack.activate(stream_buffer_pass2); |
1960 | | |
1961 | 1.57k | writeObjectStreamOffsets(offsets, first_obj); |
1962 | 1.57k | write(stream_buffer_pass1); |
1963 | 1.57k | stream_buffer_pass1.clear(); |
1964 | 1.57k | stream_buffer_pass1.shrink_to_fit(); |
1965 | 1.57k | if (compressed) { |
1966 | 0 | stream_buffer_pass2 = pl::pipe<Pl_Flate>(stream_buffer_pass2, Pl_Flate::a_deflate); |
1967 | 0 | } |
1968 | 1.57k | } |
1969 | | |
1970 | | // Write the object |
1971 | 1.57k | openObject(new_stream_id); |
1972 | 1.57k | setDataKey(new_stream_id); |
1973 | 1.57k | write("<<").write_qdf("\n ").write(" /Type /ObjStm").write_qdf("\n "); |
1974 | 1.57k | size_t length = stream_buffer_pass2.size(); |
1975 | 1.57k | adjustAESStreamLength(length); |
1976 | 1.57k | write(" /Length ").write(length).write_qdf("\n "); |
1977 | 1.57k | if (compressed) { |
1978 | 0 | write(" /Filter /FlateDecode"); |
1979 | 0 | } |
1980 | 1.57k | write(" /N ").write(offsets.size()).write_qdf("\n ").write(" /First ").write(first); |
1981 | 1.57k | if (!object.null()) { |
1982 | | // If the original object has an /Extends key, preserve it. |
1983 | 384 | QPDFObjectHandle dict = object.getDict(); |
1984 | 384 | QPDFObjectHandle extends = dict.getKey("/Extends"); |
1985 | 384 | if (extends.isIndirect()) { |
1986 | 76 | write_qdf("\n ").write(" /Extends "); |
1987 | 76 | unparseChild(extends, 1, f_in_ostream); |
1988 | 76 | } |
1989 | 384 | } |
1990 | 1.57k | write_qdf("\n").write_no_qdf(" ").write(">>\nstream\n").write_encrypted(stream_buffer_pass2); |
1991 | 1.57k | write(cfg.newline_before_endstream() ? "\nendstream" : "endstream"); |
1992 | 1.57k | if (encryption) { |
1993 | 0 | cur_data_key.clear(); |
1994 | 0 | } |
1995 | 1.57k | closeObject(new_stream_id); |
1996 | 1.57k | } |
1997 | | |
1998 | | void |
1999 | | impl::Writer::writeObject(QPDFObjectHandle object, int object_stream_index) |
2000 | 85.6k | { |
2001 | 85.6k | QPDFObjGen old_og = object.getObjGen(); |
2002 | | |
2003 | 85.6k | if (object_stream_index == -1 && old_og.getGen() == 0 && |
2004 | 70.9k | object_stream_to_objects.contains(old_og.getObj())) { |
2005 | 1.57k | writeObjectStream(object); |
2006 | 1.57k | return; |
2007 | 1.57k | } |
2008 | | |
2009 | 84.0k | indicateProgress(false, false); |
2010 | 84.0k | auto new_id = obj[old_og].renumber; |
2011 | 84.0k | if (cfg.qdf()) { |
2012 | 84.0k | if (page_object_to_seq.contains(old_og)) { |
2013 | 10.4k | write("%% Page ").write(page_object_to_seq[old_og]).write("\n"); |
2014 | 10.4k | } |
2015 | 84.0k | if (contents_to_page_seq.contains(old_og)) { |
2016 | 5.75k | write("%% Contents for page ").write(contents_to_page_seq[old_og]).write("\n"); |
2017 | 5.75k | } |
2018 | 84.0k | } |
2019 | 84.0k | if (object_stream_index == -1) { |
2020 | 70.2k | if (cfg.qdf() && !cfg.no_original_object_ids()) { |
2021 | 70.2k | write("%% Original object ID: ").write(object.getObjGen().unparse(' ')).write("\n"); |
2022 | 70.2k | } |
2023 | 70.2k | openObject(new_id); |
2024 | 70.2k | setDataKey(new_id); |
2025 | 70.2k | unparseObject(object, 0, 0); |
2026 | 70.2k | cur_data_key.clear(); |
2027 | 70.2k | closeObject(new_id); |
2028 | 70.2k | } else { |
2029 | 13.7k | unparseObject(object, 0, f_in_ostream); |
2030 | 13.7k | write("\n"); |
2031 | 13.7k | } |
2032 | | |
2033 | 84.0k | if (!cfg.direct_stream_lengths() && object.isStream()) { |
2034 | 21.1k | if (cfg.qdf()) { |
2035 | 21.1k | if (added_newline) { |
2036 | 12.0k | write("%QDF: ignore_newline\n"); |
2037 | 12.0k | } |
2038 | 21.1k | } |
2039 | 21.1k | openObject(new_id + 1); |
2040 | 21.1k | write(cur_stream_length); |
2041 | 21.1k | closeObject(new_id + 1); |
2042 | 21.1k | } |
2043 | 84.0k | } |
2044 | | |
2045 | | std::string |
2046 | | impl::Writer::getOriginalID1() |
2047 | 8.71k | { |
2048 | 8.71k | QPDFObjectHandle trailer = qpdf.getTrailer(); |
2049 | 8.71k | if (trailer.hasKey("/ID")) { |
2050 | 933 | return trailer.getKey("/ID").getArrayItem(0).getStringValue(); |
2051 | 7.78k | } else { |
2052 | 7.78k | return ""; |
2053 | 7.78k | } |
2054 | 8.71k | } |
2055 | | |
2056 | | void |
2057 | | impl::Writer::generateID(bool encrypted) |
2058 | 8.71k | { |
2059 | | // Generate the ID lazily so that we can handle the user's preference to use static or |
2060 | | // deterministic ID generation. |
2061 | | |
2062 | 8.71k | if (!id2.empty()) { |
2063 | 0 | return; |
2064 | 0 | } |
2065 | | |
2066 | 8.71k | QPDFObjectHandle trailer = qpdf.getTrailer(); |
2067 | | |
2068 | 8.71k | std::string result; |
2069 | | |
2070 | 8.71k | if (cfg.static_id()) { |
2071 | | // For test suite use only... |
2072 | 0 | static unsigned char tmp[] = { |
2073 | 0 | 0x31, |
2074 | 0 | 0x41, |
2075 | 0 | 0x59, |
2076 | 0 | 0x26, |
2077 | 0 | 0x53, |
2078 | 0 | 0x58, |
2079 | 0 | 0x97, |
2080 | 0 | 0x93, |
2081 | 0 | 0x23, |
2082 | 0 | 0x84, |
2083 | 0 | 0x62, |
2084 | 0 | 0x64, |
2085 | 0 | 0x33, |
2086 | 0 | 0x83, |
2087 | 0 | 0x27, |
2088 | 0 | 0x95, |
2089 | 0 | 0x00}; |
2090 | 0 | result = reinterpret_cast<char*>(tmp); |
2091 | 8.71k | } else { |
2092 | | // The PDF specification has guidelines for creating IDs, but it states clearly that the |
2093 | | // only thing that's really important is that it is very likely to be unique. We can't |
2094 | | // really follow the guidelines in the spec exactly because we haven't written the file yet. |
2095 | | // This scheme should be fine though. The deterministic ID case uses a digest of a |
2096 | | // sufficient portion of the file's contents such no two non-matching files would match in |
2097 | | // the subsets used for this computation. Note that we explicitly omit the filename from |
2098 | | // the digest calculation for deterministic ID so that the same file converted with qpdf, in |
2099 | | // that case, would have the same ID regardless of the output file's name. |
2100 | | |
2101 | 8.71k | std::string seed; |
2102 | 8.71k | if (cfg.deterministic_id()) { |
2103 | 8.71k | if (encrypted) { |
2104 | 0 | throw std::runtime_error( |
2105 | 0 | "QPDFWriter: unable to generated a deterministic ID because the file to be " |
2106 | 0 | "written is encrypted (even though the file may not require a password)"); |
2107 | 0 | } |
2108 | 8.71k | if (deterministic_id_data.empty()) { |
2109 | 0 | throw std::logic_error( |
2110 | 0 | "INTERNAL ERROR: QPDFWriter::generateID has no data for deterministic ID"); |
2111 | 0 | } |
2112 | 8.71k | seed += deterministic_id_data; |
2113 | 8.71k | } else { |
2114 | 0 | seed += std::to_string(QUtil::get_current_time()); |
2115 | 0 | seed += filename; |
2116 | 0 | seed += " "; |
2117 | 0 | } |
2118 | 8.71k | seed += " QPDF "; |
2119 | 8.71k | if (trailer.hasKey("/Info")) { |
2120 | 1.38k | for (auto const& item: trailer.getKey("/Info").as_dictionary()) { |
2121 | 1.38k | if (item.second.isString()) { |
2122 | 600 | seed += " "; |
2123 | 600 | seed += item.second.getStringValue(); |
2124 | 600 | } |
2125 | 1.38k | } |
2126 | 311 | } |
2127 | | |
2128 | 8.71k | MD5 md5; |
2129 | 8.71k | md5.encodeString(seed.c_str()); |
2130 | 8.71k | MD5::Digest digest; |
2131 | 8.71k | md5.digest(digest); |
2132 | 8.71k | result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest)); |
2133 | 8.71k | } |
2134 | | |
2135 | | // If /ID already exists, follow the spec: use the original first word and generate a new second |
2136 | | // word. Otherwise, we'll use the generated ID for both. |
2137 | | |
2138 | 8.71k | id2 = result; |
2139 | | // Note: keep /ID from old file even if --static-id was given. |
2140 | 8.71k | id1 = getOriginalID1(); |
2141 | 8.71k | if (id1.empty()) { |
2142 | 7.83k | id1 = id2; |
2143 | 7.83k | } |
2144 | 8.71k | } |
2145 | | |
2146 | | void |
2147 | | impl::Writer::initializeSpecialStreams() |
2148 | 8.99k | { |
2149 | | // Mark all page content streams in case we are filtering or normalizing. |
2150 | 8.99k | int num = 0; |
2151 | 10.7k | for (auto& page: pages) { |
2152 | 10.7k | page_object_to_seq[page.getObjGen()] = ++num; |
2153 | 10.7k | QPDFObjectHandle contents = page.getKey("/Contents"); |
2154 | 10.7k | std::vector<QPDFObjGen> contents_objects; |
2155 | 10.7k | if (contents.isArray()) { |
2156 | 393 | int n = static_cast<int>(contents.size()); |
2157 | 33.8k | for (int i = 0; i < n; ++i) { |
2158 | 33.4k | contents_objects.push_back(contents.getArrayItem(i).getObjGen()); |
2159 | 33.4k | } |
2160 | 10.3k | } else if (contents.isStream()) { |
2161 | 1.98k | contents_objects.push_back(contents.getObjGen()); |
2162 | 1.98k | } |
2163 | | |
2164 | 35.4k | for (auto const& c: contents_objects) { |
2165 | 35.4k | contents_to_page_seq[c] = num; |
2166 | 35.4k | normalized_streams.insert(c); |
2167 | 35.4k | } |
2168 | 10.7k | } |
2169 | 8.99k | } |
2170 | | |
2171 | | void |
2172 | | impl::Writer::preserveObjectStreams() |
2173 | 8.97k | { |
2174 | 8.97k | auto const& xref = objects.xref_table(); |
2175 | | // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object |
2176 | | // streams out of old objects that have generation numbers greater than zero. However in an |
2177 | | // existing PDF, all object stream objects and all objects in them must have generation 0 |
2178 | | // because the PDF spec does not provide any way to do otherwise. This code filters out objects |
2179 | | // that are not allowed to be in object streams. In addition to removing objects that were |
2180 | | // erroneously included in object streams in the source PDF, it also prevents unreferenced |
2181 | | // objects from being included. |
2182 | 8.97k | auto end = xref.cend(); |
2183 | 8.97k | obj.streams_empty = true; |
2184 | 8.97k | if (cfg.preserve_unreferenced()) { |
2185 | 0 | for (auto iter = xref.cbegin(); iter != end; ++iter) { |
2186 | 0 | if (iter->second.getType() == 2) { |
2187 | | // Pdf contains object streams. |
2188 | 0 | obj.streams_empty = false; |
2189 | 0 | obj[iter->first].object_stream = iter->second.getObjStreamNumber(); |
2190 | 0 | } |
2191 | 0 | } |
2192 | 8.97k | } else { |
2193 | | // Start by scanning for first compressed object in case we don't have any object streams to |
2194 | | // process. |
2195 | 75.0k | for (auto iter = xref.cbegin(); iter != end; ++iter) { |
2196 | 66.9k | if (iter->second.getType() == 2) { |
2197 | | // Pdf contains object streams. |
2198 | 888 | obj.streams_empty = false; |
2199 | 888 | auto eligible = objects.compressible_set(); |
2200 | | // The object pointed to by iter may be a previous generation, in which case it is |
2201 | | // removed by compressible_set. We need to restart the loop (while the object |
2202 | | // table may contain multiple generations of an object). |
2203 | 145k | for (iter = xref.cbegin(); iter != end; ++iter) { |
2204 | 144k | if (iter->second.getType() == 2) { |
2205 | 129k | auto id = static_cast<size_t>(iter->first.getObj()); |
2206 | 129k | if (id < eligible.size() && eligible[id]) { |
2207 | 14.5k | obj[iter->first].object_stream = iter->second.getObjStreamNumber(); |
2208 | 114k | } else { |
2209 | 114k | QTC::TC("qpdf", "QPDFWriter exclude from object stream"); |
2210 | 114k | } |
2211 | 129k | } |
2212 | 144k | } |
2213 | 888 | return; |
2214 | 888 | } |
2215 | 66.9k | } |
2216 | 8.97k | } |
2217 | 8.97k | } |
2218 | | |
2219 | | void |
2220 | | impl::Writer::generateObjectStreams() |
2221 | 0 | { |
2222 | | // Basic strategy: make a list of objects that can go into an object stream. Then figure out |
2223 | | // how many object streams are needed so that we can distribute objects approximately evenly |
2224 | | // without having any object stream exceed 100 members. We don't have to worry about linearized |
2225 | | // files here -- if the file is linearized, we take care of excluding things that aren't allowed |
2226 | | // here later. |
2227 | | |
2228 | | // This code doesn't do anything with /Extends. |
2229 | |
|
2230 | 0 | auto eligible = objects.compressible_vector(); |
2231 | 0 | size_t n_object_streams = (eligible.size() + 99U) / 100U; |
2232 | |
|
2233 | 0 | initializeTables(2U * n_object_streams); |
2234 | 0 | if (n_object_streams == 0) { |
2235 | 0 | obj.streams_empty = true; |
2236 | 0 | return; |
2237 | 0 | } |
2238 | 0 | size_t n_per = eligible.size() / n_object_streams; |
2239 | 0 | if (n_per * n_object_streams < eligible.size()) { |
2240 | 0 | ++n_per; |
2241 | 0 | } |
2242 | 0 | unsigned int n = 0; |
2243 | 0 | int cur_ostream = qpdf.newIndirectNull().getObjectID(); |
2244 | 0 | for (auto const& item: eligible) { |
2245 | 0 | if (n == n_per) { |
2246 | 0 | n = 0; |
2247 | | // Construct a new null object as the "original" object stream. The rest of the code |
2248 | | // knows that this means we're creating the object stream from scratch. |
2249 | 0 | cur_ostream = qpdf.newIndirectNull().getObjectID(); |
2250 | 0 | } |
2251 | 0 | auto& o = obj[item]; |
2252 | 0 | o.object_stream = cur_ostream; |
2253 | 0 | o.gen = item.getGen(); |
2254 | 0 | ++n; |
2255 | 0 | } |
2256 | 0 | } |
2257 | | |
2258 | | Dictionary |
2259 | | impl::Writer::trimmed_trailer() |
2260 | 17.5k | { |
2261 | | // Remove keys from the trailer that necessarily have to be replaced when writing the file. |
2262 | | |
2263 | 17.5k | Dictionary trailer = qpdf.getTrailer().unsafeShallowCopy(); |
2264 | | |
2265 | | // Remove encryption keys |
2266 | 17.5k | trailer.erase("/ID"); |
2267 | 17.5k | trailer.erase("/Encrypt"); |
2268 | | |
2269 | | // Remove modification information |
2270 | 17.5k | trailer.erase("/Prev"); |
2271 | | |
2272 | | // Remove all trailer keys that potentially come from a cross-reference stream |
2273 | 17.5k | trailer.erase("/Index"); |
2274 | 17.5k | trailer.erase("/W"); |
2275 | 17.5k | trailer.erase("/Length"); |
2276 | 17.5k | trailer.erase("/Filter"); |
2277 | 17.5k | trailer.erase("/DecodeParms"); |
2278 | 17.5k | trailer.erase("/Type"); |
2279 | 17.5k | trailer.erase("/XRefStm"); |
2280 | | |
2281 | 17.5k | return trailer; |
2282 | 17.5k | } |
2283 | | |
2284 | | // Make document extension level information direct as required by the spec. |
2285 | | void |
2286 | | impl::Writer::prepareFileForWrite() |
2287 | 8.92k | { |
2288 | 8.92k | qpdf.fixDanglingReferences(); |
2289 | 8.92k | auto root = qpdf.getRoot(); |
2290 | 8.92k | auto oh = root.getKey("/Extensions"); |
2291 | 8.92k | if (oh.isDictionary()) { |
2292 | 406 | const bool extensions_indirect = oh.isIndirect(); |
2293 | 406 | if (extensions_indirect) { |
2294 | 96 | QTC::TC("qpdf", "QPDFWriter make Extensions direct"); |
2295 | 96 | oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy()); |
2296 | 96 | } |
2297 | 406 | if (oh.hasKey("/ADBE")) { |
2298 | 310 | auto adbe = oh.getKey("/ADBE"); |
2299 | 310 | if (adbe.isIndirect()) { |
2300 | 265 | QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1); |
2301 | 265 | adbe.makeDirect(); |
2302 | 265 | oh.replaceKey("/ADBE", adbe); |
2303 | 265 | } |
2304 | 310 | } |
2305 | 406 | } |
2306 | 8.92k | } |
2307 | | |
2308 | | void |
2309 | | impl::Writer::initializeTables(size_t extra) |
2310 | 8.97k | { |
2311 | 8.97k | auto size = objects.table_size() + 100u + extra; |
2312 | 8.97k | obj.resize(size); |
2313 | 8.97k | new_obj.resize(size); |
2314 | 8.97k | } |
2315 | | |
2316 | | void |
2317 | | impl::Writer::doWriteSetup() |
2318 | 8.99k | { |
2319 | 8.99k | if (did_write_setup) { |
2320 | 0 | return; |
2321 | 0 | } |
2322 | 8.99k | did_write_setup = true; |
2323 | | |
2324 | | // Do preliminary setup |
2325 | | |
2326 | 8.99k | if (cfg.linearize()) { |
2327 | 0 | cfg.qdf(false); |
2328 | 0 | } |
2329 | | |
2330 | 8.99k | if (cfg.pclm()) { |
2331 | 0 | encryption = nullptr; |
2332 | 0 | } |
2333 | | |
2334 | 8.99k | if (encryption) { |
2335 | | // Encryption has been explicitly set |
2336 | 0 | cfg.preserve_encryption(false); |
2337 | 8.99k | } else if (cfg.normalize_content() || cfg.pclm()) { |
2338 | | // Encryption makes looking at contents pretty useless. If the user explicitly encrypted |
2339 | | // though, we still obey that. |
2340 | 8.99k | cfg.preserve_encryption(false); |
2341 | 8.99k | } |
2342 | | |
2343 | 8.99k | if (cfg.preserve_encryption()) { |
2344 | 0 | copyEncryptionParameters(qpdf); |
2345 | 0 | } |
2346 | | |
2347 | 8.99k | if (!cfg.forced_pdf_version().empty()) { |
2348 | 0 | int major = 0; |
2349 | 0 | int minor = 0; |
2350 | 0 | parseVersion(cfg.forced_pdf_version(), major, minor); |
2351 | 0 | disableIncompatibleEncryption(major, minor, cfg.forced_extension_level()); |
2352 | 0 | if (compareVersions(major, minor, 1, 5) < 0) { |
2353 | 0 | cfg.object_streams(qpdf_o_disable); |
2354 | 0 | } |
2355 | 0 | } |
2356 | | |
2357 | 8.99k | if (cfg.qdf() || cfg.normalize_content()) { |
2358 | 8.99k | initializeSpecialStreams(); |
2359 | 8.99k | } |
2360 | | |
2361 | 8.99k | switch (cfg.object_streams()) { |
2362 | 0 | case qpdf_o_disable: |
2363 | 0 | initializeTables(); |
2364 | 0 | obj.streams_empty = true; |
2365 | 0 | break; |
2366 | | |
2367 | 8.97k | case qpdf_o_preserve: |
2368 | 8.97k | initializeTables(); |
2369 | 8.97k | preserveObjectStreams(); |
2370 | 8.97k | break; |
2371 | | |
2372 | 0 | case qpdf_o_generate: |
2373 | 0 | generateObjectStreams(); |
2374 | 0 | break; |
2375 | 8.99k | } |
2376 | | |
2377 | 8.95k | if (!obj.streams_empty) { |
2378 | 869 | if (cfg.linearize()) { |
2379 | | // Page dictionaries are not allowed to be compressed objects. |
2380 | 0 | for (auto& page: pages) { |
2381 | 0 | if (obj[page].object_stream > 0) { |
2382 | 0 | obj[page].object_stream = 0; |
2383 | 0 | } |
2384 | 0 | } |
2385 | 0 | } |
2386 | | |
2387 | 869 | if (cfg.linearize() || encryption) { |
2388 | | // The document catalog is not allowed to be compressed in cfg.linearized_ files either. |
2389 | | // It also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to |
2390 | | // handle encrypted files with compressed document catalogs, so we disable them in that |
2391 | | // case as well. |
2392 | 0 | if (obj[root_og].object_stream > 0) { |
2393 | 0 | obj[root_og].object_stream = 0; |
2394 | 0 | } |
2395 | 0 | } |
2396 | | |
2397 | | // Generate reverse mapping from object stream to objects |
2398 | 977k | obj.forEach([this](auto id, auto const& item) -> void { |
2399 | 977k | if (item.object_stream > 0) { |
2400 | 14.5k | auto& vec = object_stream_to_objects[item.object_stream]; |
2401 | 14.5k | vec.emplace_back(id, item.gen); |
2402 | 14.5k | if (max_ostream_index < vec.size()) { |
2403 | 5.75k | ++max_ostream_index; |
2404 | 5.75k | } |
2405 | 14.5k | } |
2406 | 977k | }); |
2407 | 869 | --max_ostream_index; |
2408 | | |
2409 | 869 | if (object_stream_to_objects.empty()) { |
2410 | 520 | obj.streams_empty = true; |
2411 | 520 | } else { |
2412 | 349 | setMinimumPDFVersion("1.5"); |
2413 | 349 | } |
2414 | 869 | } |
2415 | | |
2416 | 8.95k | setMinimumPDFVersion(qpdf.getPDFVersion(), qpdf.getExtensionLevel()); |
2417 | 8.95k | final_pdf_version = min_pdf_version; |
2418 | 8.95k | final_extension_level = min_extension_level; |
2419 | 8.95k | if (!cfg.forced_pdf_version().empty()) { |
2420 | 0 | final_pdf_version = cfg.forced_pdf_version(); |
2421 | 0 | final_extension_level = cfg.forced_extension_level(); |
2422 | 0 | } |
2423 | 8.95k | } |
2424 | | |
2425 | | void |
2426 | | QPDFWriter::write() |
2427 | 8.99k | { |
2428 | 8.99k | m->write(); |
2429 | 8.99k | } |
2430 | | |
2431 | | void |
2432 | | impl::Writer::write() |
2433 | 8.99k | { |
2434 | 8.99k | doWriteSetup(); |
2435 | | |
2436 | | // Set up progress reporting. For linearized files, we write two passes. events_expected is an |
2437 | | // approximation, but it's good enough for progress reporting, which is mostly a guess anyway. |
2438 | 8.99k | events_expected = QIntC::to_int(qpdf.getObjectCount() * (cfg.linearize() ? 2 : 1)); |
2439 | | |
2440 | 8.99k | prepareFileForWrite(); |
2441 | | |
2442 | 8.99k | if (cfg.linearize()) { |
2443 | 0 | writeLinearized(); |
2444 | 8.99k | } else { |
2445 | 8.99k | writeStandard(); |
2446 | 8.99k | } |
2447 | | |
2448 | 8.99k | pipeline->finish(); |
2449 | 8.99k | if (close_file) { |
2450 | 0 | fclose(file); |
2451 | 0 | } |
2452 | 8.99k | file = nullptr; |
2453 | 8.99k | if (buffer_pipeline) { |
2454 | 0 | output_buffer = buffer_pipeline->getBuffer(); |
2455 | 0 | buffer_pipeline = nullptr; |
2456 | 0 | } |
2457 | 8.99k | indicateProgress(false, true); |
2458 | 8.99k | } |
2459 | | |
2460 | | QPDFObjGen |
2461 | | QPDFWriter::getRenumberedObjGen(QPDFObjGen og) |
2462 | 0 | { |
2463 | 0 | return {m->obj[og].renumber, 0}; |
2464 | 0 | } |
2465 | | |
2466 | | std::map<QPDFObjGen, QPDFXRefEntry> |
2467 | | QPDFWriter::getWrittenXRefTable() |
2468 | 0 | { |
2469 | 0 | return m->getWrittenXRefTable(); |
2470 | 0 | } |
2471 | | |
2472 | | std::map<QPDFObjGen, QPDFXRefEntry> |
2473 | | impl::Writer::getWrittenXRefTable() |
2474 | 0 | { |
2475 | 0 | std::map<QPDFObjGen, QPDFXRefEntry> result; |
2476 | |
|
2477 | 0 | auto it = result.begin(); |
2478 | 0 | new_obj.forEach([&it, &result](auto id, auto const& item) -> void { |
2479 | 0 | if (item.xref.getType() != 0) { |
2480 | 0 | it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref); |
2481 | 0 | } |
2482 | 0 | }); |
2483 | 0 | return result; |
2484 | 0 | } |
2485 | | |
2486 | | void |
2487 | | impl::Writer::enqueuePart(std::vector<QPDFObjectHandle>& part) |
2488 | 0 | { |
2489 | 0 | for (auto const& oh: part) { |
2490 | 0 | enqueue(oh); |
2491 | 0 | } |
2492 | 0 | } |
2493 | | |
2494 | | void |
2495 | | impl::Writer::writeEncryptionDictionary() |
2496 | 0 | { |
2497 | 0 | encryption_dict_objid = openObject(encryption_dict_objid); |
2498 | 0 | auto& enc = *encryption; |
2499 | 0 | auto const V = enc.getV(); |
2500 | |
|
2501 | 0 | write("<<"); |
2502 | 0 | if (V >= 4) { |
2503 | 0 | write(" /CF << /StdCF << /AuthEvent /DocOpen /CFM "); |
2504 | 0 | write(cfg.encrypt_use_aes() ? (V < 5 ? "/AESV2" : "/AESV3") : "/V2"); |
2505 | | // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of |
2506 | | // MacOS won't open encrypted files without it. |
2507 | 0 | write(V < 5 ? " /Length 16 >> >>" : " /Length 32 >> >>"); |
2508 | 0 | if (!encryption->getEncryptMetadata()) { |
2509 | 0 | write(" /EncryptMetadata false"); |
2510 | 0 | } |
2511 | 0 | } |
2512 | 0 | write(" /Filter /Standard /Length ").write(enc.getLengthBytes() * 8); |
2513 | 0 | write(" /O ").write_string(enc.getO(), true); |
2514 | 0 | if (V >= 4) { |
2515 | 0 | write(" /OE ").write_string(enc.getOE(), true); |
2516 | 0 | } |
2517 | 0 | write(" /P ").write(enc.getP()); |
2518 | 0 | if (V >= 5) { |
2519 | 0 | write(" /Perms ").write_string(enc.getPerms(), true); |
2520 | 0 | } |
2521 | 0 | write(" /R ").write(enc.getR()); |
2522 | |
|
2523 | 0 | if (V >= 4) { |
2524 | 0 | write(" /StmF /StdCF /StrF /StdCF"); |
2525 | 0 | } |
2526 | 0 | write(" /U ").write_string(enc.getU(), true); |
2527 | 0 | if (V >= 4) { |
2528 | 0 | write(" /UE ").write_string(enc.getUE(), true); |
2529 | 0 | } |
2530 | 0 | write(" /V ").write(enc.getV()).write(" >>"); |
2531 | 0 | closeObject(encryption_dict_objid); |
2532 | 0 | } |
2533 | | |
2534 | | std::string |
2535 | | QPDFWriter::getFinalVersion() |
2536 | 0 | { |
2537 | 0 | m->doWriteSetup(); |
2538 | 0 | return m->final_pdf_version; |
2539 | 0 | } |
2540 | | |
2541 | | void |
2542 | | impl::Writer::writeHeader() |
2543 | 8.87k | { |
2544 | 8.87k | write("%PDF-").write(final_pdf_version); |
2545 | 8.87k | if (cfg.pclm()) { |
2546 | | // PCLm version |
2547 | 0 | write("\n%PCLm 1.0\n"); |
2548 | 8.87k | } else { |
2549 | | // This string of binary characters would not be valid UTF-8, so it really should be treated |
2550 | | // as binary. |
2551 | 8.87k | write("\n%\xbf\xf7\xa2\xfe\n"); |
2552 | 8.87k | } |
2553 | 8.87k | write_qdf("%QDF-1.0\n\n"); |
2554 | | |
2555 | | // Note: do not write extra header text here. Linearized PDFs must include the entire |
2556 | | // linearization parameter dictionary within the first 1024 characters of the PDF file, so for |
2557 | | // linearized files, we have to write extra header text after the linearization parameter |
2558 | | // dictionary. |
2559 | 8.87k | } |
2560 | | |
2561 | | void |
2562 | | impl::Writer::writeHintStream(int hint_id) |
2563 | 0 | { |
2564 | 0 | std::string hint_buffer; |
2565 | 0 | int S = 0; |
2566 | 0 | int O = 0; |
2567 | 0 | bool compressed = cfg.compress_streams(); |
2568 | 0 | lin.generateHintStream(new_obj, obj, hint_buffer, S, O, compressed); |
2569 | |
|
2570 | 0 | openObject(hint_id); |
2571 | 0 | setDataKey(hint_id); |
2572 | |
|
2573 | 0 | size_t hlen = hint_buffer.size(); |
2574 | |
|
2575 | 0 | write("<< "); |
2576 | 0 | if (compressed) { |
2577 | 0 | write("/Filter /FlateDecode "); |
2578 | 0 | } |
2579 | 0 | write("/S ").write(S); |
2580 | 0 | if (O) { |
2581 | 0 | write(" /O ").write(O); |
2582 | 0 | } |
2583 | 0 | adjustAESStreamLength(hlen); |
2584 | 0 | write(" /Length ").write(hlen); |
2585 | 0 | write(" >>\nstream\n").write_encrypted(hint_buffer); |
2586 | |
|
2587 | 0 | if (encryption) { |
2588 | 0 | QTC::TC("qpdf", "QPDFWriter encrypted hint stream"); |
2589 | 0 | } |
2590 | |
|
2591 | 0 | write(hint_buffer.empty() || hint_buffer.back() != '\n' ? "\nendstream" : "endstream"); |
2592 | 0 | closeObject(hint_id); |
2593 | 0 | } |
2594 | | |
2595 | | qpdf_offset_t |
2596 | | impl::Writer::writeXRefTable(trailer_e which, int first, int last, int size) |
2597 | 8.42k | { |
2598 | | // There are too many extra arguments to replace overloaded function with defaults in the header |
2599 | | // file...too much risk of leaving something off. |
2600 | 8.42k | return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0); |
2601 | 8.42k | } |
2602 | | |
2603 | | qpdf_offset_t |
2604 | | impl::Writer::writeXRefTable( |
2605 | | trailer_e which, |
2606 | | int first, |
2607 | | int last, |
2608 | | int size, |
2609 | | qpdf_offset_t prev, |
2610 | | bool suppress_offsets, |
2611 | | int hint_id, |
2612 | | qpdf_offset_t hint_offset, |
2613 | | qpdf_offset_t hint_length, |
2614 | | int linearization_pass) |
2615 | 8.42k | { |
2616 | 8.42k | write("xref\n").write(first).write(" ").write(last - first + 1); |
2617 | 8.42k | qpdf_offset_t space_before_zero = pipeline->getCount(); |
2618 | 8.42k | write("\n"); |
2619 | 8.42k | if (first == 0) { |
2620 | 8.42k | write("0000000000 65535 f \n"); |
2621 | 8.42k | ++first; |
2622 | 8.42k | } |
2623 | 89.4k | for (int i = first; i <= last; ++i) { |
2624 | 81.0k | qpdf_offset_t offset = 0; |
2625 | 81.0k | if (!suppress_offsets) { |
2626 | 81.0k | offset = new_obj[i].xref.getOffset(); |
2627 | 81.0k | if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) { |
2628 | 0 | offset += hint_length; |
2629 | 0 | } |
2630 | 81.0k | } |
2631 | 81.0k | write(QUtil::int_to_string(offset, 10)).write(" 00000 n \n"); |
2632 | 81.0k | } |
2633 | 8.42k | writeTrailer(which, size, false, prev, linearization_pass); |
2634 | 8.42k | write("\n"); |
2635 | 8.42k | return space_before_zero; |
2636 | 8.42k | } |
2637 | | |
2638 | | qpdf_offset_t |
2639 | | impl::Writer::writeXRefStream( |
2640 | | int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size) |
2641 | 287 | { |
2642 | | // There are too many extra arguments to replace overloaded function with defaults in the header |
2643 | | // file...too much risk of leaving something off. |
2644 | 287 | return writeXRefStream( |
2645 | 287 | objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0); |
2646 | 287 | } |
2647 | | |
2648 | | qpdf_offset_t |
2649 | | impl::Writer::writeXRefStream( |
2650 | | int xref_id, |
2651 | | int max_id, |
2652 | | qpdf_offset_t max_offset, |
2653 | | trailer_e which, |
2654 | | int first, |
2655 | | int last, |
2656 | | int size, |
2657 | | qpdf_offset_t prev, |
2658 | | int hint_id, |
2659 | | qpdf_offset_t hint_offset, |
2660 | | qpdf_offset_t hint_length, |
2661 | | bool skip_compression, |
2662 | | int linearization_pass) |
2663 | 287 | { |
2664 | 287 | qpdf_offset_t xref_offset = pipeline->getCount(); |
2665 | 287 | qpdf_offset_t space_before_zero = xref_offset - 1; |
2666 | | |
2667 | | // field 1 contains offsets and object stream identifiers |
2668 | 287 | unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id)); |
2669 | | |
2670 | | // field 2 contains object stream indices |
2671 | 287 | unsigned int f2_size = bytesNeeded(QIntC::to_longlong(max_ostream_index)); |
2672 | | |
2673 | 287 | unsigned int esize = 1 + f1_size + f2_size; |
2674 | | |
2675 | | // Must store in xref table in advance of writing the actual data rather than waiting for |
2676 | | // openObject to do it. |
2677 | 287 | new_obj[xref_id].xref = QPDFXRefEntry(pipeline->getCount()); |
2678 | | |
2679 | 287 | std::string xref_data; |
2680 | 287 | const bool compressed = cfg.compress_streams() && !cfg.qdf(); |
2681 | 287 | { |
2682 | 287 | auto pp_xref = pipeline_stack.activate(xref_data); |
2683 | | |
2684 | 23.6k | for (int i = first; i <= last; ++i) { |
2685 | 23.3k | QPDFXRefEntry& e = new_obj[i].xref; |
2686 | 23.3k | switch (e.getType()) { |
2687 | 287 | case 0: |
2688 | 287 | writeBinary(0, 1); |
2689 | 287 | writeBinary(0, f1_size); |
2690 | 287 | writeBinary(0, f2_size); |
2691 | 287 | break; |
2692 | | |
2693 | 9.69k | case 1: |
2694 | 9.69k | { |
2695 | 9.69k | qpdf_offset_t offset = e.getOffset(); |
2696 | 9.69k | if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) { |
2697 | 0 | offset += hint_length; |
2698 | 0 | } |
2699 | 9.69k | writeBinary(1, 1); |
2700 | 9.69k | writeBinary(QIntC::to_ulonglong(offset), f1_size); |
2701 | 9.69k | writeBinary(0, f2_size); |
2702 | 9.69k | } |
2703 | 9.69k | break; |
2704 | | |
2705 | 13.3k | case 2: |
2706 | 13.3k | writeBinary(2, 1); |
2707 | 13.3k | writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size); |
2708 | 13.3k | writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size); |
2709 | 13.3k | break; |
2710 | | |
2711 | 0 | default: |
2712 | 0 | throw std::logic_error("invalid type writing xref stream"); |
2713 | 0 | break; |
2714 | 23.3k | } |
2715 | 23.3k | } |
2716 | 287 | } |
2717 | | |
2718 | 287 | if (compressed) { |
2719 | 0 | xref_data = pl::pipe<Pl_PNGFilter>(xref_data, Pl_PNGFilter::a_encode, esize); |
2720 | 0 | if (!skip_compression) { |
2721 | | // Write the stream dictionary for compression but don't actually compress. This |
2722 | | // helps us with computation of padding for pass 1 of linearization. |
2723 | 0 | xref_data = pl::pipe<Pl_Flate>(xref_data, Pl_Flate::a_deflate); |
2724 | 0 | } |
2725 | 0 | } |
2726 | | |
2727 | 287 | openObject(xref_id); |
2728 | 287 | write("<<").write_qdf("\n ").write(" /Type /XRef").write_qdf("\n "); |
2729 | 287 | write(" /Length ").write(xref_data.size()); |
2730 | 287 | if (compressed) { |
2731 | 0 | write_qdf("\n ").write(" /Filter /FlateDecode").write_qdf("\n "); |
2732 | 0 | write(" /DecodeParms << /Columns ").write(esize).write(" /Predictor 12 >>"); |
2733 | 0 | } |
2734 | 287 | write_qdf("\n ").write(" /W [ 1 ").write(f1_size).write(" ").write(f2_size).write(" ]"); |
2735 | 287 | if (!(first == 0 && last == (size - 1))) { |
2736 | 0 | write(" /Index [ ").write(first).write(" ").write(last - first + 1).write(" ]"); |
2737 | 0 | } |
2738 | 287 | writeTrailer(which, size, true, prev, linearization_pass); |
2739 | 287 | write("\nstream\n").write(xref_data).write("\nendstream"); |
2740 | 287 | closeObject(xref_id); |
2741 | 287 | return space_before_zero; |
2742 | 287 | } |
2743 | | |
2744 | | size_t |
2745 | | impl::Writer::calculateXrefStreamPadding(qpdf_offset_t xref_bytes) |
2746 | 0 | { |
2747 | | // This routine is called right after a linearization first pass xref stream has been written |
2748 | | // without compression. Calculate the amount of padding that would be required in the worst |
2749 | | // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is |
2750 | | // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add |
2751 | | // 10 extra bytes for number length increases. |
2752 | |
|
2753 | 0 | return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384))); |
2754 | 0 | } |
2755 | | |
2756 | | void |
2757 | | impl::Writer::writeLinearized() |
2758 | 0 | { |
2759 | | // Optimize file and enqueue objects in order |
2760 | |
|
2761 | 0 | std::map<int, int> stream_cache; |
2762 | |
|
2763 | 0 | auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) { |
2764 | 0 | if (auto& result = stream_cache[stream.getObjectID()]) { |
2765 | 0 | return result; |
2766 | 0 | } else { |
2767 | 0 | return result = will_filter_stream(stream) ? 2 : 1; |
2768 | 0 | } |
2769 | 0 | }; |
2770 | |
|
2771 | 0 | lin.optimize(obj, skip_stream_parameters); |
2772 | |
|
2773 | 0 | std::vector<QPDFObjectHandle> part4; |
2774 | 0 | std::vector<QPDFObjectHandle> part6; |
2775 | 0 | std::vector<QPDFObjectHandle> part7; |
2776 | 0 | std::vector<QPDFObjectHandle> part8; |
2777 | 0 | std::vector<QPDFObjectHandle> part9; |
2778 | 0 | lin.parts(obj, part4, part6, part7, part8, part9); |
2779 | | |
2780 | | // Object number sequence: |
2781 | | // |
2782 | | // second half |
2783 | | // second half uncompressed objects |
2784 | | // second half xref stream, if any |
2785 | | // second half compressed objects |
2786 | | // first half |
2787 | | // linearization dictionary |
2788 | | // first half xref stream, if any |
2789 | | // part 4 uncompresesd objects |
2790 | | // encryption dictionary, if any |
2791 | | // hint stream |
2792 | | // part 6 uncompressed objects |
2793 | | // first half compressed objects |
2794 | | // |
2795 | | |
2796 | | // Second half objects |
2797 | 0 | int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size()); |
2798 | 0 | int second_half_first_obj = 1; |
2799 | 0 | int after_second_half = 1 + second_half_uncompressed; |
2800 | 0 | next_objid = after_second_half; |
2801 | 0 | int second_half_xref = 0; |
2802 | 0 | bool need_xref_stream = !obj.streams_empty; |
2803 | 0 | if (need_xref_stream) { |
2804 | 0 | second_half_xref = next_objid++; |
2805 | 0 | } |
2806 | | // Assign numbers to all compressed objects in the second half. |
2807 | 0 | std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9}; |
2808 | 0 | for (int i = 0; i < 3; ++i) { |
2809 | 0 | for (auto const& oh: *vecs2[i]) { |
2810 | 0 | assignCompressedObjectNumbers(oh.getObjGen()); |
2811 | 0 | } |
2812 | 0 | } |
2813 | 0 | int second_half_end = next_objid - 1; |
2814 | 0 | int second_trailer_size = next_objid; |
2815 | | |
2816 | | // First half objects |
2817 | 0 | int first_half_start = next_objid; |
2818 | 0 | int lindict_id = next_objid++; |
2819 | 0 | int first_half_xref = 0; |
2820 | 0 | if (need_xref_stream) { |
2821 | 0 | first_half_xref = next_objid++; |
2822 | 0 | } |
2823 | 0 | int part4_first_obj = next_objid; |
2824 | 0 | next_objid += QIntC::to_int(part4.size()); |
2825 | 0 | int after_part4 = next_objid; |
2826 | 0 | if (encryption) { |
2827 | 0 | encryption_dict_objid = next_objid++; |
2828 | 0 | } |
2829 | 0 | int hint_id = next_objid++; |
2830 | 0 | int part6_first_obj = next_objid; |
2831 | 0 | next_objid += QIntC::to_int(part6.size()); |
2832 | 0 | int after_part6 = next_objid; |
2833 | | // Assign numbers to all compressed objects in the first half |
2834 | 0 | std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6}; |
2835 | 0 | for (int i = 0; i < 2; ++i) { |
2836 | 0 | for (auto const& oh: *vecs1[i]) { |
2837 | 0 | assignCompressedObjectNumbers(oh.getObjGen()); |
2838 | 0 | } |
2839 | 0 | } |
2840 | 0 | int first_half_end = next_objid - 1; |
2841 | 0 | int first_trailer_size = next_objid; |
2842 | |
|
2843 | 0 | int part4_end_marker = part4.back().getObjectID(); |
2844 | 0 | int part6_end_marker = part6.back().getObjectID(); |
2845 | 0 | qpdf_offset_t space_before_zero = 0; |
2846 | 0 | qpdf_offset_t file_size = 0; |
2847 | 0 | qpdf_offset_t part6_end_offset = 0; |
2848 | 0 | qpdf_offset_t first_half_max_obj_offset = 0; |
2849 | 0 | qpdf_offset_t second_xref_offset = 0; |
2850 | 0 | qpdf_offset_t first_xref_end = 0; |
2851 | 0 | qpdf_offset_t second_xref_end = 0; |
2852 | |
|
2853 | 0 | next_objid = part4_first_obj; |
2854 | 0 | enqueuePart(part4); |
2855 | 0 | if (next_objid != after_part4) { |
2856 | | // This can happen with very botched files as in the fuzzer test. There are likely some |
2857 | | // faulty assumptions in calculateLinearizationData |
2858 | 0 | throw std::runtime_error("error encountered after writing part 4 of linearized data"); |
2859 | 0 | } |
2860 | 0 | next_objid = part6_first_obj; |
2861 | 0 | enqueuePart(part6); |
2862 | 0 | if (next_objid != after_part6) { |
2863 | 0 | throw std::runtime_error("error encountered after writing part 6 of linearized data"); |
2864 | 0 | } |
2865 | 0 | next_objid = second_half_first_obj; |
2866 | 0 | enqueuePart(part7); |
2867 | 0 | enqueuePart(part8); |
2868 | 0 | enqueuePart(part9); |
2869 | 0 | if (next_objid != after_second_half) { |
2870 | 0 | throw std::runtime_error("error encountered after writing part 9 of cfg.linearized_ data"); |
2871 | 0 | } |
2872 | | |
2873 | 0 | qpdf_offset_t hint_length = 0; |
2874 | 0 | std::string hint_buffer; |
2875 | | |
2876 | | // Write file in two passes. Part numbers refer to PDF spec 1.4. |
2877 | |
|
2878 | 0 | FILE* lin_pass1_file = nullptr; |
2879 | 0 | auto pp_pass1 = pipeline_stack.popper(); |
2880 | 0 | auto pp_md5 = pipeline_stack.popper(); |
2881 | 0 | for (int pass: {1, 2}) { |
2882 | 0 | if (pass == 1) { |
2883 | 0 | if (!cfg.linearize_pass1().empty()) { |
2884 | 0 | lin_pass1_file = QUtil::safe_fopen(cfg.linearize_pass1().data(), "wb"); |
2885 | 0 | pipeline_stack.activate( |
2886 | 0 | pp_pass1, |
2887 | 0 | std::make_unique<Pl_StdioFile>("linearization pass1", lin_pass1_file)); |
2888 | 0 | } else { |
2889 | 0 | pipeline_stack.activate(pp_pass1, true); |
2890 | 0 | } |
2891 | 0 | if (cfg.deterministic_id()) { |
2892 | 0 | pipeline_stack.activate_md5(pp_md5); |
2893 | 0 | } |
2894 | 0 | } |
2895 | | |
2896 | | // Part 1: header |
2897 | |
|
2898 | 0 | writeHeader(); |
2899 | | |
2900 | | // Part 2: linearization parameter dictionary. Save enough space to write real dictionary. |
2901 | | // 200 characters is enough space if all numerical values in the parameter dictionary that |
2902 | | // contain offsets are 20 digits long plus a few extra characters for safety. The entire |
2903 | | // linearization parameter dictionary must appear within the first 1024 characters of the |
2904 | | // file. |
2905 | |
|
2906 | 0 | qpdf_offset_t pos = pipeline->getCount(); |
2907 | 0 | openObject(lindict_id); |
2908 | 0 | write("<<"); |
2909 | 0 | if (pass == 2) { |
2910 | 0 | write(" /Linearized 1 /L ").write(file_size + hint_length); |
2911 | | // Implementation note 121 states that a space is mandatory after this open bracket. |
2912 | 0 | write(" /H [ ").write(new_obj[hint_id].xref.getOffset()).write(" "); |
2913 | 0 | write(hint_length); |
2914 | 0 | write(" ] /O ").write(obj[pages.all().at(0)].renumber); |
2915 | 0 | write(" /E ").write(part6_end_offset + hint_length); |
2916 | 0 | write(" /N ").write(pages.size()); |
2917 | 0 | write(" /T ").write(space_before_zero + hint_length); |
2918 | 0 | } |
2919 | 0 | write(" >>"); |
2920 | 0 | closeObject(lindict_id); |
2921 | 0 | static int const pad = 200; |
2922 | 0 | write(QIntC::to_size(pos - pipeline->getCount() + pad), ' ').write("\n"); |
2923 | | |
2924 | | // If the user supplied any additional header text, write it here after the linearization |
2925 | | // parameter dictionary. |
2926 | 0 | write(cfg.extra_header_text()); |
2927 | | |
2928 | | // Part 3: first page cross reference table and trailer. |
2929 | |
|
2930 | 0 | qpdf_offset_t first_xref_offset = pipeline->getCount(); |
2931 | 0 | qpdf_offset_t hint_offset = 0; |
2932 | 0 | if (pass == 2) { |
2933 | 0 | hint_offset = new_obj[hint_id].xref.getOffset(); |
2934 | 0 | } |
2935 | 0 | if (need_xref_stream) { |
2936 | | // Must pad here too. |
2937 | 0 | if (pass == 1) { |
2938 | | // Set first_half_max_obj_offset to a value large enough to force four bytes to be |
2939 | | // reserved for each file offset. This would provide adequate space for the xref |
2940 | | // stream as long as the last object in page 1 starts with in the first 4 GB of the |
2941 | | // file, which is extremely likely. In the second pass, we will know the actual |
2942 | | // value for this, but it's okay if it's smaller. |
2943 | 0 | first_half_max_obj_offset = 1 << 25; |
2944 | 0 | } |
2945 | 0 | pos = pipeline->getCount(); |
2946 | 0 | writeXRefStream( |
2947 | 0 | first_half_xref, |
2948 | 0 | first_half_end, |
2949 | 0 | first_half_max_obj_offset, |
2950 | 0 | t_lin_first, |
2951 | 0 | first_half_start, |
2952 | 0 | first_half_end, |
2953 | 0 | first_trailer_size, |
2954 | 0 | hint_length + second_xref_offset, |
2955 | 0 | hint_id, |
2956 | 0 | hint_offset, |
2957 | 0 | hint_length, |
2958 | 0 | (pass == 1), |
2959 | 0 | pass); |
2960 | 0 | qpdf_offset_t endpos = pipeline->getCount(); |
2961 | 0 | if (pass == 1) { |
2962 | | // Pad so we have enough room for the real xref stream. |
2963 | 0 | write(calculateXrefStreamPadding(endpos - pos), ' '); |
2964 | 0 | first_xref_end = pipeline->getCount(); |
2965 | 0 | } else { |
2966 | | // Pad so that the next object starts at the same place as in pass 1. |
2967 | 0 | write(QIntC::to_size(first_xref_end - endpos), ' '); |
2968 | |
|
2969 | 0 | if (pipeline->getCount() != first_xref_end) { |
2970 | 0 | throw std::logic_error( |
2971 | 0 | "insufficient padding for first pass xref stream; first_xref_end=" + |
2972 | 0 | std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos)); |
2973 | 0 | } |
2974 | 0 | } |
2975 | 0 | write("\n"); |
2976 | 0 | } else { |
2977 | 0 | writeXRefTable( |
2978 | 0 | t_lin_first, |
2979 | 0 | first_half_start, |
2980 | 0 | first_half_end, |
2981 | 0 | first_trailer_size, |
2982 | 0 | hint_length + second_xref_offset, |
2983 | 0 | (pass == 1), |
2984 | 0 | hint_id, |
2985 | 0 | hint_offset, |
2986 | 0 | hint_length, |
2987 | 0 | pass); |
2988 | 0 | write("startxref\n0\n%%EOF\n"); |
2989 | 0 | } |
2990 | | |
2991 | | // Parts 4 through 9 |
2992 | | |
2993 | 0 | for (auto const& cur_object: object_queue) { |
2994 | 0 | if (cur_object.getObjectID() == part6_end_marker) { |
2995 | 0 | first_half_max_obj_offset = pipeline->getCount(); |
2996 | 0 | } |
2997 | 0 | writeObject(cur_object); |
2998 | 0 | if (cur_object.getObjectID() == part4_end_marker) { |
2999 | 0 | if (encryption) { |
3000 | 0 | writeEncryptionDictionary(); |
3001 | 0 | } |
3002 | 0 | if (pass == 1) { |
3003 | 0 | new_obj[hint_id].xref = QPDFXRefEntry(pipeline->getCount()); |
3004 | 0 | } else { |
3005 | | // Part 5: hint stream |
3006 | 0 | write(hint_buffer); |
3007 | 0 | } |
3008 | 0 | } |
3009 | 0 | if (cur_object.getObjectID() == part6_end_marker) { |
3010 | 0 | part6_end_offset = pipeline->getCount(); |
3011 | 0 | } |
3012 | 0 | } |
3013 | | |
3014 | | // Part 10: overflow hint stream -- not used |
3015 | | |
3016 | | // Part 11: main cross reference table and trailer |
3017 | |
|
3018 | 0 | second_xref_offset = pipeline->getCount(); |
3019 | 0 | if (need_xref_stream) { |
3020 | 0 | pos = pipeline->getCount(); |
3021 | 0 | space_before_zero = writeXRefStream( |
3022 | 0 | second_half_xref, |
3023 | 0 | second_half_end, |
3024 | 0 | second_xref_offset, |
3025 | 0 | t_lin_second, |
3026 | 0 | 0, |
3027 | 0 | second_half_end, |
3028 | 0 | second_trailer_size, |
3029 | 0 | 0, |
3030 | 0 | 0, |
3031 | 0 | 0, |
3032 | 0 | 0, |
3033 | 0 | (pass == 1), |
3034 | 0 | pass); |
3035 | 0 | qpdf_offset_t endpos = pipeline->getCount(); |
3036 | |
|
3037 | 0 | if (pass == 1) { |
3038 | | // Pad so we have enough room for the real xref stream. See comments for previous |
3039 | | // xref stream on how we calculate the padding. |
3040 | 0 | write(calculateXrefStreamPadding(endpos - pos), ' ').write("\n"); |
3041 | 0 | second_xref_end = pipeline->getCount(); |
3042 | 0 | } else { |
3043 | | // Make the file size the same. |
3044 | 0 | auto padding = |
3045 | 0 | QIntC::to_size(second_xref_end + hint_length - 1 - pipeline->getCount()); |
3046 | 0 | write(padding, ' ').write("\n"); |
3047 | | |
3048 | | // If this assertion fails, maybe we didn't have enough padding above. |
3049 | 0 | if (pipeline->getCount() != second_xref_end + hint_length) { |
3050 | 0 | throw std::logic_error( |
3051 | 0 | "count mismatch after xref stream; possible insufficient padding?"); |
3052 | 0 | } |
3053 | 0 | } |
3054 | 0 | } else { |
3055 | 0 | space_before_zero = writeXRefTable( |
3056 | 0 | t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass); |
3057 | 0 | } |
3058 | 0 | write("startxref\n").write(first_xref_offset).write("\n%%EOF\n"); |
3059 | |
|
3060 | 0 | if (pass == 1) { |
3061 | 0 | if (cfg.deterministic_id()) { |
3062 | 0 | QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1); |
3063 | 0 | computeDeterministicIDData(); |
3064 | 0 | pp_md5.pop(); |
3065 | 0 | } |
3066 | | |
3067 | | // Close first pass pipeline |
3068 | 0 | file_size = pipeline->getCount(); |
3069 | 0 | pp_pass1.pop(); |
3070 | | |
3071 | | // Save hint offset since it will be set to zero by calling openObject. |
3072 | 0 | qpdf_offset_t hint_offset1 = new_obj[hint_id].xref.getOffset(); |
3073 | | |
3074 | | // Write hint stream to a buffer |
3075 | 0 | { |
3076 | 0 | auto pp_hint = pipeline_stack.activate(hint_buffer); |
3077 | 0 | writeHintStream(hint_id); |
3078 | 0 | } |
3079 | 0 | hint_length = QIntC::to_offset(hint_buffer.size()); |
3080 | | |
3081 | | // Restore hint offset |
3082 | 0 | new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1); |
3083 | 0 | if (lin_pass1_file) { |
3084 | | // Write some debugging information |
3085 | 0 | fprintf( |
3086 | 0 | lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str()); |
3087 | 0 | fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str()); |
3088 | 0 | fprintf( |
3089 | 0 | lin_pass1_file, |
3090 | 0 | "%% second_xref_offset=%s\n", |
3091 | 0 | std::to_string(second_xref_offset).c_str()); |
3092 | 0 | fprintf( |
3093 | 0 | lin_pass1_file, |
3094 | 0 | "%% second_xref_end=%s\n", |
3095 | 0 | std::to_string(second_xref_end).c_str()); |
3096 | 0 | fclose(lin_pass1_file); |
3097 | 0 | lin_pass1_file = nullptr; |
3098 | 0 | } |
3099 | 0 | } |
3100 | 0 | } |
3101 | 0 | } |
3102 | | |
3103 | | void |
3104 | | impl::Writer::enqueueObjectsStandard() |
3105 | 8.87k | { |
3106 | 8.87k | if (cfg.preserve_unreferenced()) { |
3107 | 0 | for (auto const& oh: qpdf.getAllObjects()) { |
3108 | 0 | enqueue(oh); |
3109 | 0 | } |
3110 | 0 | } |
3111 | | |
3112 | | // Put root first on queue. |
3113 | 8.87k | auto trailer = trimmed_trailer(); |
3114 | 8.87k | enqueue(trailer["/Root"]); |
3115 | | |
3116 | | // Next place any other objects referenced from the trailer dictionary into the queue, handling |
3117 | | // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op. |
3118 | 16.9k | for (auto& item: trailer) { |
3119 | 16.9k | if (!item.second.null()) { |
3120 | 13.7k | enqueue(item.second); |
3121 | 13.7k | } |
3122 | 16.9k | } |
3123 | 8.87k | } |
3124 | | |
3125 | | void |
3126 | | impl::Writer::enqueueObjectsPCLm() |
3127 | 0 | { |
3128 | | // Image transform stream content for page strip images. Each of this new stream has to come |
3129 | | // after every page image strip written in the pclm file. |
3130 | 0 | std::string image_transform_content = "q /image Do Q\n"; |
3131 | | |
3132 | | // enqueue all pages first |
3133 | 0 | for (auto& page: pages) { |
3134 | 0 | enqueue(page); |
3135 | 0 | enqueue(page["/Contents"]); |
3136 | | |
3137 | | // enqueue all the strips for each page |
3138 | 0 | for (auto& image: Dictionary(page["/Resources"]["/XObject"])) { |
3139 | 0 | if (!image.second.null()) { |
3140 | 0 | enqueue(image.second); |
3141 | 0 | enqueue(qpdf.newStream(image_transform_content)); |
3142 | 0 | } |
3143 | 0 | } |
3144 | 0 | } |
3145 | |
|
3146 | 0 | enqueue(trimmed_trailer()["/Root"]); |
3147 | 0 | } |
3148 | | |
3149 | | void |
3150 | | impl::Writer::indicateProgress(bool decrement, bool finished) |
3151 | 106k | { |
3152 | 106k | if (decrement) { |
3153 | 13.7k | --events_seen; |
3154 | 13.7k | return; |
3155 | 13.7k | } |
3156 | | |
3157 | 92.5k | ++events_seen; |
3158 | | |
3159 | 92.5k | if (!progress_reporter.get()) { |
3160 | 92.5k | return; |
3161 | 92.5k | } |
3162 | | |
3163 | 0 | if (finished || events_seen >= next_progress_report) { |
3164 | 0 | int percentage = |
3165 | 0 | (finished ? 100 |
3166 | 0 | : next_progress_report == 0 |
3167 | 0 | ? 0 |
3168 | 0 | : std::min(99, 1 + ((100 * events_seen) / events_expected))); |
3169 | 0 | progress_reporter->reportProgress(percentage); |
3170 | 0 | } |
3171 | 0 | int increment = std::max(1, (events_expected / 100)); |
3172 | 0 | while (events_seen >= next_progress_report) { |
3173 | 0 | next_progress_report += increment; |
3174 | 0 | } |
3175 | 0 | } |
3176 | | |
3177 | | void |
3178 | | QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr) |
3179 | 0 | { |
3180 | 0 | m->progress_reporter = pr; |
3181 | 0 | } |
3182 | | |
3183 | | void |
3184 | | impl::Writer::writeStandard() |
3185 | 8.87k | { |
3186 | 8.87k | auto pp_md5 = pipeline_stack.popper(); |
3187 | 8.87k | if (cfg.deterministic_id()) { |
3188 | 8.87k | pipeline_stack.activate_md5(pp_md5); |
3189 | 8.87k | } |
3190 | | |
3191 | | // Start writing |
3192 | | |
3193 | 8.87k | writeHeader(); |
3194 | 8.87k | write(cfg.extra_header_text()); |
3195 | | |
3196 | 8.87k | if (cfg.pclm()) { |
3197 | 0 | enqueueObjectsPCLm(); |
3198 | 8.87k | } else { |
3199 | 8.87k | enqueueObjectsStandard(); |
3200 | 8.87k | } |
3201 | | |
3202 | | // Now start walking queue, outputting each object. |
3203 | 80.7k | while (object_queue_front < object_queue.size()) { |
3204 | 71.8k | QPDFObjectHandle cur_object = object_queue.at(object_queue_front); |
3205 | 71.8k | ++object_queue_front; |
3206 | 71.8k | writeObject(cur_object); |
3207 | 71.8k | } |
3208 | | |
3209 | | // Write out the encryption dictionary, if any |
3210 | 8.87k | if (encryption) { |
3211 | 0 | writeEncryptionDictionary(); |
3212 | 0 | } |
3213 | | |
3214 | | // Now write out xref. next_objid is now the number of objects. |
3215 | 8.87k | qpdf_offset_t xref_offset = pipeline->getCount(); |
3216 | 8.87k | if (object_stream_to_objects.empty()) { |
3217 | | // Write regular cross-reference table |
3218 | 8.42k | writeXRefTable(t_normal, 0, next_objid - 1, next_objid); |
3219 | 8.42k | } else { |
3220 | | // Write cross-reference stream. |
3221 | 445 | int xref_id = next_objid++; |
3222 | 445 | writeXRefStream(xref_id, xref_id, xref_offset, t_normal, 0, next_objid - 1, next_objid); |
3223 | 445 | } |
3224 | 8.87k | write("startxref\n").write(xref_offset).write("\n%%EOF\n"); |
3225 | | |
3226 | 8.87k | if (cfg.deterministic_id()) { |
3227 | 8.46k | QTC::TC( |
3228 | 8.46k | "qpdf", |
3229 | 8.46k | "QPDFWriter standard deterministic ID", |
3230 | 8.46k | object_stream_to_objects.empty() ? 0 : 1); |
3231 | 8.46k | } |
3232 | 8.87k | } |