/src/qpdf/libqpdf/QPDFWriter.cc
Line | Count | Source |
1 | | #include <qpdf/qpdf-config.h> // include early for large file support |
2 | | |
3 | | #include <qpdf/QPDFWriter_private.hh> |
4 | | |
5 | | #include <qpdf/MD5.hh> |
6 | | #include <qpdf/Pl_AES_PDF.hh> |
7 | | #include <qpdf/Pl_Flate.hh> |
8 | | #include <qpdf/Pl_MD5.hh> |
9 | | #include <qpdf/Pl_PNGFilter.hh> |
10 | | #include <qpdf/Pl_RC4.hh> |
11 | | #include <qpdf/Pl_StdioFile.hh> |
12 | | #include <qpdf/QIntC.hh> |
13 | | #include <qpdf/QPDFObjectHandle_private.hh> |
14 | | #include <qpdf/QPDFObject_private.hh> |
15 | | #include <qpdf/QPDF_private.hh> |
16 | | #include <qpdf/QTC.hh> |
17 | | #include <qpdf/QUtil.hh> |
18 | | #include <qpdf/RC4.hh> |
19 | | #include <qpdf/Util.hh> |
20 | | |
21 | | #include <algorithm> |
22 | | #include <concepts> |
23 | | #include <cstdlib> |
24 | | #include <stdexcept> |
25 | | #include <tuple> |
26 | | |
27 | | using namespace std::literals; |
28 | | using namespace qpdf; |
29 | | |
30 | | using Encryption = QPDF::Doc::Encryption; |
31 | | |
32 | | QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default) |
33 | 0 | { |
34 | | // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
35 | 0 | } |
36 | | |
37 | | QPDFWriter::FunctionProgressReporter::FunctionProgressReporter(std::function<void(int)> handler) : |
38 | 0 | handler(handler) |
39 | 0 | { |
40 | 0 | } |
41 | | |
42 | | QPDFWriter::FunctionProgressReporter::~FunctionProgressReporter() // NOLINT |
43 | | // (modernize-use-equals-default) |
44 | 0 | { |
45 | | // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
46 | 0 | } |
47 | | |
48 | | void |
49 | | QPDFWriter::FunctionProgressReporter::reportProgress(int progress) |
50 | 0 | { |
51 | 0 | handler(progress); |
52 | 0 | } |
53 | | |
54 | | namespace |
55 | | { |
56 | | class Pl_stack |
57 | | { |
58 | | // A pipeline Popper is normally returned by Pl_stack::activate, or, if necessary, a |
59 | | // reference to a Popper instance can be passed into activate. When the Popper goes out of |
60 | | // scope, the pipeline stack is popped. This causes finish to be called on the current |
61 | | // pipeline and the pipeline stack to be popped until the top of stack is a previous active |
62 | | // top of stack and restores the pipeline to that point. It deletes any pipelines that it |
63 | | // pops. |
64 | | class Popper |
65 | | { |
66 | | friend class Pl_stack; |
67 | | |
68 | | public: |
69 | | Popper() = default; |
70 | | Popper(Popper const&) = delete; |
71 | | Popper(Popper&& other) noexcept |
72 | 0 | { |
73 | 0 | // For MSVC, default pops the stack |
74 | 0 | if (this != &other) { |
75 | 0 | stack = other.stack; |
76 | 0 | stack_id = other.stack_id; |
77 | 0 | other.stack = nullptr; |
78 | 0 | other.stack_id = 0; |
79 | 0 | }; |
80 | 0 | } |
81 | | Popper& operator=(Popper const&) = delete; |
82 | | Popper& |
83 | | operator=(Popper&& other) noexcept |
84 | 0 | { |
85 | 0 | // For MSVC, default pops the stack |
86 | 0 | if (this != &other) { |
87 | 0 | stack = other.stack; |
88 | 0 | stack_id = other.stack_id; |
89 | 0 | other.stack = nullptr; |
90 | 0 | other.stack_id = 0; |
91 | 0 | }; |
92 | 0 | return *this; |
93 | 0 | } |
94 | | |
95 | | ~Popper(); |
96 | | |
97 | | // Manually pop pipeline from the pipeline stack. |
98 | | void pop(); |
99 | | |
100 | | private: |
101 | | Popper(Pl_stack& stack) : |
102 | 108k | stack(&stack) |
103 | 108k | { |
104 | 108k | } |
105 | | |
106 | | Pl_stack* stack{nullptr}; |
107 | | unsigned long stack_id{0}; |
108 | | }; |
109 | | |
110 | | public: |
111 | | Pl_stack(pl::Count*& top) : |
112 | 9.18k | top(top) |
113 | 9.18k | { |
114 | 9.18k | } |
115 | | |
116 | | Popper |
117 | | popper() |
118 | 15.8k | { |
119 | 15.8k | return {*this}; |
120 | 15.8k | } |
121 | | |
122 | | void |
123 | | initialize(Pipeline* p) |
124 | 9.18k | { |
125 | 9.18k | auto c = std::make_unique<pl::Count>(++last_id, p); |
126 | 9.18k | top = c.get(); |
127 | 9.18k | stack.emplace_back(std::move(c)); |
128 | 9.18k | } |
129 | | |
130 | | Popper |
131 | | activate(std::string& str) |
132 | 87.9k | { |
133 | 87.9k | Popper pp{*this}; |
134 | 87.9k | activate(pp, str); |
135 | 87.9k | return pp; |
136 | 87.9k | } |
137 | | |
138 | | void |
139 | | activate(Popper& pp, std::string& str) |
140 | 87.9k | { |
141 | 87.9k | activate(pp, false, &str, nullptr); |
142 | 87.9k | } |
143 | | |
144 | | void |
145 | | activate(Popper& pp, std::unique_ptr<Pipeline> next) |
146 | 0 | { |
147 | 0 | count_buffer.clear(); |
148 | 0 | activate(pp, false, &count_buffer, std::move(next)); |
149 | 0 | } |
150 | | |
151 | | Popper |
152 | | activate( |
153 | | bool discard = false, |
154 | | std::string* str = nullptr, |
155 | | std::unique_ptr<Pipeline> next = nullptr) |
156 | 4.68k | { |
157 | 4.68k | Popper pp{*this}; |
158 | 4.68k | activate(pp, discard, str, std::move(next)); |
159 | 4.68k | return pp; |
160 | 4.68k | } |
161 | | |
162 | | void |
163 | | activate( |
164 | | Popper& pp, |
165 | | bool discard = false, |
166 | | std::string* str = nullptr, |
167 | | std::unique_ptr<Pipeline> next = nullptr) |
168 | 100k | { |
169 | 100k | std::unique_ptr<pl::Count> c; |
170 | 100k | if (next) { |
171 | 0 | c = std::make_unique<pl::Count>(++last_id, count_buffer, std::move(next)); |
172 | 100k | } else if (discard) { |
173 | 12.6k | c = std::make_unique<pl::Count>(++last_id, nullptr); |
174 | 87.9k | } else if (!str) { |
175 | 0 | c = std::make_unique<pl::Count>(++last_id, top); |
176 | 87.9k | } else { |
177 | 87.9k | c = std::make_unique<pl::Count>(++last_id, *str); |
178 | 87.9k | } |
179 | 100k | pp.stack_id = last_id; |
180 | 100k | top = c.get(); |
181 | 100k | stack.emplace_back(std::move(c)); |
182 | 100k | } |
183 | | void |
184 | | activate_md5(Popper& pp) |
185 | 0 | { |
186 | 0 | qpdf_assert_debug(!md5_pipeline); |
187 | 0 | qpdf_assert_debug(md5_id == 0); |
188 | 0 | qpdf_assert_debug(top->getCount() == 0); |
189 | 0 | md5_pipeline = std::make_unique<Pl_MD5>("qpdf md5", top); |
190 | 0 | md5_pipeline->persistAcrossFinish(true); |
191 | | // Special case code in pop clears m->md5_pipeline upon deletion. |
192 | 0 | auto c = std::make_unique<pl::Count>(++last_id, md5_pipeline.get()); |
193 | 0 | pp.stack_id = last_id; |
194 | 0 | md5_id = last_id; |
195 | 0 | top = c.get(); |
196 | 0 | stack.emplace_back(std::move(c)); |
197 | 0 | } |
198 | | |
199 | | // Return the hex digest and disable the MD5 pipeline. |
200 | | std::string |
201 | | hex_digest() |
202 | 0 | { |
203 | 0 | qpdf_assert_debug(md5_pipeline); |
204 | 0 | auto digest = md5_pipeline->getHexDigest(); |
205 | 0 | md5_pipeline->enable(false); |
206 | 0 | return digest; |
207 | 0 | } |
208 | | |
209 | | void |
210 | | clear_buffer() |
211 | 0 | { |
212 | 0 | count_buffer.clear(); |
213 | 0 | } |
214 | | |
215 | | private: |
216 | | void |
217 | | pop(unsigned long stack_id) |
218 | 108k | { |
219 | 108k | if (!stack_id) { |
220 | 7.91k | return; |
221 | 7.91k | } |
222 | 100k | qpdf_assert_debug(stack.size() >= 2); |
223 | 100k | top->finish(); |
224 | 100k | qpdf_assert_debug(stack.back().get() == top); |
225 | | // It used to be possible for this assertion to fail if writeLinearized exits by |
226 | | // exception when deterministic ID. There are no longer any cases in which two |
227 | | // dynamically allocated pipeline Popper objects ever exist at the same time, so the |
228 | | // assertion will fail if they get popped out of order from automatic destruction. |
229 | 100k | qpdf_assert_debug(top->id() == stack_id); |
230 | 100k | if (stack_id == md5_id) { |
231 | 0 | md5_pipeline = nullptr; |
232 | 0 | md5_id = 0; |
233 | 0 | } |
234 | 100k | stack.pop_back(); |
235 | 100k | top = stack.back().get(); |
236 | 100k | } |
237 | | |
238 | | std::vector<std::unique_ptr<pl::Count>> stack; |
239 | | pl::Count*& top; |
240 | | std::unique_ptr<Pl_MD5> md5_pipeline{nullptr}; |
241 | | unsigned long last_id{0}; |
242 | | unsigned long md5_id{0}; |
243 | | std::string count_buffer; |
244 | | }; |
245 | | } // namespace |
246 | | |
247 | | Pl_stack::Popper::~Popper() |
248 | 108k | { |
249 | 108k | if (stack) { |
250 | 101k | stack->pop(stack_id); |
251 | 101k | } |
252 | 108k | } |
253 | | |
254 | | void |
255 | | Pl_stack::Popper::pop() |
256 | 7.50k | { |
257 | 7.50k | if (stack) { |
258 | 7.50k | stack->pop(stack_id); |
259 | 7.50k | } |
260 | 7.50k | stack_id = 0; |
261 | 7.50k | stack = nullptr; |
262 | 7.50k | } |
263 | | |
264 | | // Writer class is restricted to QPDFWriter so that only it can call certain methods. |
265 | | class QPDF::Doc::Writer |
266 | | { |
267 | | friend class QPDFWriter; |
268 | | Writer(QPDF& pdf) : |
269 | 9.43k | pdf(pdf), |
270 | 9.43k | lin(pdf.m->lin), |
271 | 9.43k | objects(pdf.m->objects) |
272 | 9.43k | { |
273 | 9.43k | } |
274 | | |
275 | | protected: |
276 | | void |
277 | | optimize( |
278 | | QPDFWriter::ObjTable const& obj, |
279 | | std::function<int(QPDFObjectHandle&)> skip_stream_parameters) |
280 | 9.07k | { |
281 | 9.07k | lin.optimize(obj, skip_stream_parameters); |
282 | 9.07k | } |
283 | | |
284 | | void |
285 | | getLinearizedParts( |
286 | | QPDFWriter::ObjTable const& obj, |
287 | | std::vector<QPDFObjectHandle>& part4, |
288 | | std::vector<QPDFObjectHandle>& part6, |
289 | | std::vector<QPDFObjectHandle>& part7, |
290 | | std::vector<QPDFObjectHandle>& part8, |
291 | | std::vector<QPDFObjectHandle>& part9) |
292 | 9.01k | { |
293 | 9.01k | lin.getLinearizedParts(obj, part4, part6, part7, part8, part9); |
294 | 9.01k | } |
295 | | |
296 | | void |
297 | | generateHintStream( |
298 | | QPDFWriter::NewObjTable const& new_obj, |
299 | | QPDFWriter::ObjTable const& obj, |
300 | | std::string& hint_stream, |
301 | | int& S, |
302 | | int& O, |
303 | | bool compressed) |
304 | 7.50k | { |
305 | 7.50k | lin.generateHintStream(new_obj, obj, hint_stream, S, O, compressed); |
306 | 7.50k | } |
307 | | |
308 | | std::vector<QPDFObjGen> |
309 | | getCompressibleObjGens() |
310 | 0 | { |
311 | 0 | return objects.getCompressibleObjVector(); |
312 | 0 | } |
313 | | |
314 | | std::vector<bool> |
315 | | getCompressibleObjSet() |
316 | 1.29k | { |
317 | 1.29k | return objects.getCompressibleObjSet(); |
318 | 1.29k | } |
319 | | |
320 | | std::map<QPDFObjGen, QPDFXRefEntry> const& |
321 | | getXRefTable() |
322 | 9.16k | { |
323 | 9.16k | return objects.getXRefTableInternal(); |
324 | 9.16k | } |
325 | | |
326 | | size_t |
327 | | tableSize() |
328 | 9.16k | { |
329 | 9.16k | return pdf.m->objects.tableSize(); |
330 | 9.16k | } |
331 | | |
332 | | QPDF& pdf; |
333 | | QPDF::Doc::Linearization& lin; |
334 | | QPDF::Doc::Objects& objects; |
335 | | }; |
336 | | |
337 | | class QPDFWriter::Members: QPDF::Doc::Writer |
338 | | { |
339 | | friend class QPDFWriter; |
340 | | |
341 | | public: |
342 | | // flags used by unparseObject |
343 | | static int const f_stream = 1 << 0; |
344 | | static int const f_filtered = 1 << 1; |
345 | | static int const f_in_ostream = 1 << 2; |
346 | | static int const f_hex_string = 1 << 3; |
347 | | static int const f_no_encryption = 1 << 4; |
348 | | |
349 | | enum trailer_e { t_normal, t_lin_first, t_lin_second }; |
350 | | |
351 | | Members(QPDFWriter& w, QPDF& pdf) : |
352 | 9.43k | QPDF::Doc::Writer(pdf), |
353 | 9.43k | w(w), |
354 | | root_og( |
355 | 9.43k | pdf.getRoot().getObjGen().isIndirect() ? pdf.getRoot().getObjGen() : QPDFObjGen(-1, 0)), |
356 | 9.43k | pipeline_stack(pipeline) |
357 | 9.43k | { |
358 | 9.43k | } |
359 | | |
360 | | Members(Members const&) = delete; |
361 | | |
362 | | ~Members() |
363 | 9.18k | { |
364 | 9.18k | if (file && close_file) { |
365 | 0 | fclose(file); |
366 | 0 | } |
367 | 9.18k | delete output_buffer; |
368 | 9.18k | } |
369 | | |
370 | | void write(); |
371 | | std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable(); |
372 | | void setMinimumPDFVersion(std::string const& version, int extension_level); |
373 | | void copyEncryptionParameters(QPDF&); |
374 | | void doWriteSetup(); |
375 | | void prepareFileForWrite(); |
376 | | |
377 | | void disableIncompatibleEncryption(int major, int minor, int extension_level); |
378 | | void interpretR3EncryptionParameters( |
379 | | bool allow_accessibility, |
380 | | bool allow_extract, |
381 | | bool allow_assemble, |
382 | | bool allow_annotate_and_form, |
383 | | bool allow_form_filling, |
384 | | bool allow_modify_other, |
385 | | qpdf_r3_print_e print, |
386 | | qpdf_r3_modify_e modify); |
387 | | void setEncryptionParameters(char const* user_password, char const* owner_password); |
388 | | void setEncryptionMinimumVersion(); |
389 | | void parseVersion(std::string const& version, int& major, int& minor) const; |
390 | | int compareVersions(int major1, int minor1, int major2, int minor2) const; |
391 | | void generateID(bool encrypted); |
392 | | std::string getOriginalID1(); |
393 | | void initializeTables(size_t extra = 0); |
394 | | void preserveObjectStreams(); |
395 | | void generateObjectStreams(); |
396 | | void initializeSpecialStreams(); |
397 | | void enqueueObject(QPDFObjectHandle object); |
398 | | void enqueueObjectsStandard(); |
399 | | void enqueueObjectsPCLm(); |
400 | | void enqueuePart(std::vector<QPDFObjectHandle>& part); |
401 | | void assignCompressedObjectNumbers(QPDFObjGen og); |
402 | | Dictionary trimmed_trailer(); |
403 | | |
404 | | // Returns tuple<filter, compress_stream, is_root_metadata> |
405 | | std::tuple<const bool, const bool, const bool> |
406 | | will_filter_stream(QPDFObjectHandle stream, std::string* stream_data); |
407 | | |
408 | | // Test whether stream would be filtered if it were written. |
409 | | bool will_filter_stream(QPDFObjectHandle stream); |
410 | | unsigned int bytesNeeded(long long n); |
411 | | void writeBinary(unsigned long long val, unsigned int bytes); |
412 | | Members& write(std::string_view str); |
413 | | Members& write(size_t count, char c); |
414 | | Members& write(std::integral auto val); |
415 | | Members& write_name(std::string const& str); |
416 | | Members& write_string(std::string const& str, bool force_binary = false); |
417 | | Members& write_encrypted(std::string_view str); |
418 | | |
419 | | template <typename... Args> |
420 | | Members& write_qdf(Args&&... args); |
421 | | template <typename... Args> |
422 | | Members& write_no_qdf(Args&&... args); |
423 | | void writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj); |
424 | | void writeObjectStream(QPDFObjectHandle object); |
425 | | void writeObject(QPDFObjectHandle object, int object_stream_index = -1); |
426 | | void writeTrailer( |
427 | | trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass); |
428 | | void unparseObject( |
429 | | QPDFObjectHandle object, |
430 | | size_t level, |
431 | | int flags, |
432 | | // for stream dictionaries |
433 | | size_t stream_length = 0, |
434 | | bool compress = false); |
435 | | void unparseChild(QPDFObjectHandle const& child, size_t level, int flags); |
436 | | int openObject(int objid = 0); |
437 | | void closeObject(int objid); |
438 | | void writeStandard(); |
439 | | void writeLinearized(); |
440 | | void writeEncryptionDictionary(); |
441 | | void writeHeader(); |
442 | | void writeHintStream(int hint_id); |
443 | | qpdf_offset_t writeXRefTable(trailer_e which, int first, int last, int size); |
444 | | qpdf_offset_t writeXRefTable( |
445 | | trailer_e which, |
446 | | int first, |
447 | | int last, |
448 | | int size, |
449 | | // for linearization |
450 | | qpdf_offset_t prev, |
451 | | bool suppress_offsets, |
452 | | int hint_id, |
453 | | qpdf_offset_t hint_offset, |
454 | | qpdf_offset_t hint_length, |
455 | | int linearization_pass); |
456 | | qpdf_offset_t writeXRefStream( |
457 | | int objid, |
458 | | int max_id, |
459 | | qpdf_offset_t max_offset, |
460 | | trailer_e which, |
461 | | int first, |
462 | | int last, |
463 | | int size); |
464 | | qpdf_offset_t writeXRefStream( |
465 | | int objid, |
466 | | int max_id, |
467 | | qpdf_offset_t max_offset, |
468 | | trailer_e which, |
469 | | int first, |
470 | | int last, |
471 | | int size, |
472 | | // for linearization |
473 | | qpdf_offset_t prev, |
474 | | int hint_id, |
475 | | qpdf_offset_t hint_offset, |
476 | | qpdf_offset_t hint_length, |
477 | | bool skip_compression, |
478 | | int linearization_pass); |
479 | | |
480 | | void setDataKey(int objid); |
481 | | void indicateProgress(bool decrement, bool finished); |
482 | | size_t calculateXrefStreamPadding(qpdf_offset_t xref_bytes); |
483 | | |
484 | | void adjustAESStreamLength(size_t& length); |
485 | | void computeDeterministicIDData(); |
486 | | |
487 | | private: |
488 | | QPDFWriter& w; |
489 | | QPDFObjGen root_og{-1, 0}; |
490 | | char const* filename{"unspecified"}; |
491 | | FILE* file{nullptr}; |
492 | | bool close_file{false}; |
493 | | std::unique_ptr<Pl_Buffer> buffer_pipeline{nullptr}; |
494 | | Buffer* output_buffer{nullptr}; |
495 | | bool normalize_content_set{false}; |
496 | | bool normalize_content{false}; |
497 | | bool compress_streams{true}; |
498 | | bool compress_streams_set{false}; |
499 | | qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_generalized}; |
500 | | bool stream_decode_level_set{false}; |
501 | | bool recompress_flate{false}; |
502 | | bool qdf_mode{false}; |
503 | | bool preserve_unreferenced_objects{false}; |
504 | | bool newline_before_endstream{false}; |
505 | | bool static_id{false}; |
506 | | bool suppress_original_object_ids{false}; |
507 | | bool direct_stream_lengths{true}; |
508 | | bool preserve_encryption{true}; |
509 | | bool linearized{false}; |
510 | | bool pclm{false}; |
511 | | qpdf_object_stream_e object_stream_mode{qpdf_o_preserve}; |
512 | | |
513 | | std::unique_ptr<QPDF::Doc::Encryption> encryption; |
514 | | std::string encryption_key; |
515 | | bool encrypt_use_aes{false}; |
516 | | |
517 | | std::string id1; // for /ID key of |
518 | | std::string id2; // trailer dictionary |
519 | | std::string final_pdf_version; |
520 | | int final_extension_level{0}; |
521 | | std::string min_pdf_version; |
522 | | int min_extension_level{0}; |
523 | | std::string forced_pdf_version; |
524 | | int forced_extension_level{0}; |
525 | | std::string extra_header_text; |
526 | | int encryption_dict_objid{0}; |
527 | | std::string cur_data_key; |
528 | | std::unique_ptr<Pipeline> file_pl; |
529 | | qpdf::pl::Count* pipeline{nullptr}; |
530 | | std::vector<QPDFObjectHandle> object_queue; |
531 | | size_t object_queue_front{0}; |
532 | | QPDFWriter::ObjTable obj; |
533 | | QPDFWriter::NewObjTable new_obj; |
534 | | int next_objid{1}; |
535 | | int cur_stream_length_id{0}; |
536 | | size_t cur_stream_length{0}; |
537 | | bool added_newline{false}; |
538 | | size_t max_ostream_index{0}; |
539 | | std::set<QPDFObjGen> normalized_streams; |
540 | | std::map<QPDFObjGen, int> page_object_to_seq; |
541 | | std::map<QPDFObjGen, int> contents_to_page_seq; |
542 | | std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects; |
543 | | Pl_stack pipeline_stack; |
544 | | bool deterministic_id{false}; |
545 | | std::string deterministic_id_data; |
546 | | bool did_write_setup{false}; |
547 | | |
548 | | // For linearization only |
549 | | std::string lin_pass1_filename; |
550 | | |
551 | | // For progress reporting |
552 | | std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter; |
553 | | int events_expected{0}; |
554 | | int events_seen{0}; |
555 | | int next_progress_report{0}; |
556 | | }; |
557 | | |
558 | | QPDFWriter::QPDFWriter(QPDF& pdf) : |
559 | 9.43k | m(std::make_shared<Members>(*this, pdf)) |
560 | 9.43k | { |
561 | 9.43k | } |
562 | | |
563 | | QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) : |
564 | 0 | m(std::make_shared<Members>(*this, pdf)) |
565 | 0 | { |
566 | 0 | setOutputFilename(filename); |
567 | 0 | } |
568 | | |
569 | | QPDFWriter::QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file) : |
570 | 0 | m(std::make_shared<Members>(*this, pdf)) |
571 | 0 | { |
572 | 0 | setOutputFile(description, file, close_file); |
573 | 0 | } |
574 | | |
575 | | void |
576 | | QPDFWriter::setOutputFilename(char const* filename) |
577 | 0 | { |
578 | 0 | char const* description = filename; |
579 | 0 | FILE* f = nullptr; |
580 | 0 | bool close_file = false; |
581 | 0 | if (filename == nullptr) { |
582 | 0 | description = "standard output"; |
583 | 0 | f = stdout; |
584 | 0 | QUtil::binary_stdout(); |
585 | 0 | } else { |
586 | 0 | f = QUtil::safe_fopen(filename, "wb+"); |
587 | 0 | close_file = true; |
588 | 0 | } |
589 | 0 | setOutputFile(description, f, close_file); |
590 | 0 | } |
591 | | |
592 | | void |
593 | | QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file) |
594 | 0 | { |
595 | 0 | m->filename = description; |
596 | 0 | m->file = file; |
597 | 0 | m->close_file = close_file; |
598 | 0 | m->file_pl = std::make_unique<Pl_StdioFile>("qpdf output", file); |
599 | 0 | m->pipeline_stack.initialize(m->file_pl.get()); |
600 | 0 | } |
601 | | |
602 | | void |
603 | | QPDFWriter::setOutputMemory() |
604 | 0 | { |
605 | 0 | m->filename = "memory buffer"; |
606 | 0 | m->buffer_pipeline = std::make_unique<Pl_Buffer>("qpdf output"); |
607 | 0 | m->pipeline_stack.initialize(m->buffer_pipeline.get()); |
608 | 0 | } |
609 | | |
610 | | Buffer* |
611 | | QPDFWriter::getBuffer() |
612 | 0 | { |
613 | 0 | Buffer* result = m->output_buffer; |
614 | 0 | m->output_buffer = nullptr; |
615 | 0 | return result; |
616 | 0 | } |
617 | | |
618 | | std::shared_ptr<Buffer> |
619 | | QPDFWriter::getBufferSharedPointer() |
620 | 0 | { |
621 | 0 | return std::shared_ptr<Buffer>(getBuffer()); |
622 | 0 | } |
623 | | |
624 | | void |
625 | | QPDFWriter::setOutputPipeline(Pipeline* p) |
626 | 9.18k | { |
627 | 9.18k | m->filename = "custom pipeline"; |
628 | 9.18k | m->pipeline_stack.initialize(p); |
629 | 9.18k | } |
630 | | |
631 | | void |
632 | | QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode) |
633 | 0 | { |
634 | 0 | m->object_stream_mode = mode; |
635 | 0 | } |
636 | | |
637 | | void |
638 | | QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode) |
639 | 0 | { |
640 | 0 | switch (mode) { |
641 | 0 | case qpdf_s_uncompress: |
642 | 0 | m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level); |
643 | 0 | m->compress_streams = false; |
644 | 0 | break; |
645 | | |
646 | 0 | case qpdf_s_preserve: |
647 | 0 | m->stream_decode_level = qpdf_dl_none; |
648 | 0 | m->compress_streams = false; |
649 | 0 | break; |
650 | | |
651 | 0 | case qpdf_s_compress: |
652 | 0 | m->stream_decode_level = std::max(qpdf_dl_generalized, m->stream_decode_level); |
653 | 0 | m->compress_streams = true; |
654 | 0 | break; |
655 | 0 | } |
656 | 0 | m->stream_decode_level_set = true; |
657 | 0 | m->compress_streams_set = true; |
658 | 0 | } |
659 | | |
660 | | void |
661 | | QPDFWriter::setCompressStreams(bool val) |
662 | 0 | { |
663 | 0 | m->compress_streams = val; |
664 | 0 | m->compress_streams_set = true; |
665 | 0 | } |
666 | | |
667 | | void |
668 | | QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val) |
669 | 9.18k | { |
670 | 9.18k | m->stream_decode_level = val; |
671 | 9.18k | m->stream_decode_level_set = true; |
672 | 9.18k | } |
673 | | |
674 | | void |
675 | | QPDFWriter::setRecompressFlate(bool val) |
676 | 0 | { |
677 | 0 | m->recompress_flate = val; |
678 | 0 | } |
679 | | |
680 | | void |
681 | | QPDFWriter::setContentNormalization(bool val) |
682 | 0 | { |
683 | 0 | m->normalize_content_set = true; |
684 | 0 | m->normalize_content = val; |
685 | 0 | } |
686 | | |
687 | | void |
688 | | QPDFWriter::setQDFMode(bool val) |
689 | 0 | { |
690 | 0 | m->qdf_mode = val; |
691 | 0 | } |
692 | | |
693 | | void |
694 | | QPDFWriter::setPreserveUnreferencedObjects(bool val) |
695 | 0 | { |
696 | 0 | m->preserve_unreferenced_objects = val; |
697 | 0 | } |
698 | | |
699 | | void |
700 | | QPDFWriter::setNewlineBeforeEndstream(bool val) |
701 | 0 | { |
702 | 0 | m->newline_before_endstream = val; |
703 | 0 | } |
704 | | |
705 | | void |
706 | | QPDFWriter::setMinimumPDFVersion(std::string const& version, int extension_level) |
707 | 10.0k | { |
708 | 10.0k | m->setMinimumPDFVersion(version, extension_level); |
709 | 10.0k | } |
710 | | |
711 | | void |
712 | | QPDFWriter::Members::setMinimumPDFVersion(std::string const& version, int extension_level) |
713 | 19.1k | { |
714 | 19.1k | bool set_version = false; |
715 | 19.1k | bool set_extension_level = false; |
716 | 19.1k | if (min_pdf_version.empty()) { |
717 | 9.16k | set_version = true; |
718 | 9.16k | set_extension_level = true; |
719 | 10.0k | } else { |
720 | 10.0k | int old_major = 0; |
721 | 10.0k | int old_minor = 0; |
722 | 10.0k | int min_major = 0; |
723 | 10.0k | int min_minor = 0; |
724 | 10.0k | parseVersion(version, old_major, old_minor); |
725 | 10.0k | parseVersion(min_pdf_version, min_major, min_minor); |
726 | 10.0k | int compare = compareVersions(old_major, old_minor, min_major, min_minor); |
727 | 10.0k | if (compare > 0) { |
728 | 223 | QTC::TC("qpdf", "QPDFWriter increasing minimum version", extension_level == 0 ? 0 : 1); |
729 | 223 | set_version = true; |
730 | 223 | set_extension_level = true; |
731 | 9.79k | } else if (compare == 0) { |
732 | 1.03k | if (extension_level > min_extension_level) { |
733 | 3 | set_extension_level = true; |
734 | 3 | } |
735 | 1.03k | } |
736 | 10.0k | } |
737 | | |
738 | 19.1k | if (set_version) { |
739 | 9.38k | min_pdf_version = version; |
740 | 9.38k | } |
741 | 19.1k | if (set_extension_level) { |
742 | 9.39k | min_extension_level = extension_level; |
743 | 9.39k | } |
744 | 19.1k | } |
745 | | |
746 | | void |
747 | | QPDFWriter::setMinimumPDFVersion(PDFVersion const& v) |
748 | 0 | { |
749 | 0 | std::string version; |
750 | 0 | int extension_level; |
751 | 0 | v.getVersion(version, extension_level); |
752 | 0 | setMinimumPDFVersion(version, extension_level); |
753 | 0 | } |
754 | | |
755 | | void |
756 | | QPDFWriter::forcePDFVersion(std::string const& version, int extension_level) |
757 | 0 | { |
758 | 0 | m->forced_pdf_version = version; |
759 | 0 | m->forced_extension_level = extension_level; |
760 | 0 | } |
761 | | |
762 | | void |
763 | | QPDFWriter::setExtraHeaderText(std::string const& text) |
764 | 0 | { |
765 | 0 | m->extra_header_text = text; |
766 | 0 | if (!m->extra_header_text.empty() && *m->extra_header_text.rbegin() != '\n') { |
767 | 0 | m->extra_header_text += "\n"; |
768 | 0 | } else { |
769 | 0 | QTC::TC("qpdf", "QPDFWriter extra header text no newline"); |
770 | 0 | } |
771 | 0 | } |
772 | | |
773 | | void |
774 | | QPDFWriter::setStaticID(bool val) |
775 | 9.18k | { |
776 | 9.18k | m->static_id = val; |
777 | 9.18k | } |
778 | | |
779 | | void |
780 | | QPDFWriter::setDeterministicID(bool val) |
781 | 0 | { |
782 | 0 | m->deterministic_id = val; |
783 | 0 | } |
784 | | |
785 | | void |
786 | | QPDFWriter::setStaticAesIV(bool val) |
787 | 0 | { |
788 | 0 | if (val) { |
789 | 0 | Pl_AES_PDF::useStaticIV(); |
790 | 0 | } |
791 | 0 | } |
792 | | |
793 | | void |
794 | | QPDFWriter::setSuppressOriginalObjectIDs(bool val) |
795 | 0 | { |
796 | 0 | m->suppress_original_object_ids = val; |
797 | 0 | } |
798 | | |
799 | | void |
800 | | QPDFWriter::setPreserveEncryption(bool val) |
801 | 0 | { |
802 | 0 | m->preserve_encryption = val; |
803 | 0 | } |
804 | | |
805 | | void |
806 | | QPDFWriter::setLinearization(bool val) |
807 | 9.18k | { |
808 | 9.18k | m->linearized = val; |
809 | 9.18k | if (val) { |
810 | 9.18k | m->pclm = false; |
811 | 9.18k | } |
812 | 9.18k | } |
813 | | |
814 | | void |
815 | | QPDFWriter::setLinearizationPass1Filename(std::string const& filename) |
816 | 0 | { |
817 | 0 | m->lin_pass1_filename = filename; |
818 | 0 | } |
819 | | |
820 | | void |
821 | | QPDFWriter::setPCLm(bool val) |
822 | 0 | { |
823 | 0 | m->pclm = val; |
824 | 0 | if (val) { |
825 | 0 | m->linearized = false; |
826 | 0 | } |
827 | 0 | } |
828 | | |
829 | | void |
830 | | QPDFWriter::setR2EncryptionParametersInsecure( |
831 | | char const* user_password, |
832 | | char const* owner_password, |
833 | | bool allow_print, |
834 | | bool allow_modify, |
835 | | bool allow_extract, |
836 | | bool allow_annotate) |
837 | 0 | { |
838 | 0 | m->encryption = std::make_unique<Encryption>(1, 2, 5, true); |
839 | 0 | if (!allow_print) { |
840 | 0 | m->encryption->setP(3, false); |
841 | 0 | } |
842 | 0 | if (!allow_modify) { |
843 | 0 | m->encryption->setP(4, false); |
844 | 0 | } |
845 | 0 | if (!allow_extract) { |
846 | 0 | m->encryption->setP(5, false); |
847 | 0 | } |
848 | 0 | if (!allow_annotate) { |
849 | 0 | m->encryption->setP(6, false); |
850 | 0 | } |
851 | 0 | m->setEncryptionParameters(user_password, owner_password); |
852 | 0 | } |
853 | | |
854 | | void |
855 | | QPDFWriter::setR3EncryptionParametersInsecure( |
856 | | char const* user_password, |
857 | | char const* owner_password, |
858 | | bool allow_accessibility, |
859 | | bool allow_extract, |
860 | | bool allow_assemble, |
861 | | bool allow_annotate_and_form, |
862 | | bool allow_form_filling, |
863 | | bool allow_modify_other, |
864 | | qpdf_r3_print_e print) |
865 | 0 | { |
866 | 0 | m->encryption = std::make_unique<Encryption>(2, 3, 16, true); |
867 | 0 | m->interpretR3EncryptionParameters( |
868 | 0 | allow_accessibility, |
869 | 0 | allow_extract, |
870 | 0 | allow_assemble, |
871 | 0 | allow_annotate_and_form, |
872 | 0 | allow_form_filling, |
873 | 0 | allow_modify_other, |
874 | 0 | print, |
875 | 0 | qpdf_r3m_all); |
876 | 0 | m->setEncryptionParameters(user_password, owner_password); |
877 | 0 | } |
878 | | |
879 | | void |
880 | | QPDFWriter::setR4EncryptionParametersInsecure( |
881 | | char const* user_password, |
882 | | char const* owner_password, |
883 | | bool allow_accessibility, |
884 | | bool allow_extract, |
885 | | bool allow_assemble, |
886 | | bool allow_annotate_and_form, |
887 | | bool allow_form_filling, |
888 | | bool allow_modify_other, |
889 | | qpdf_r3_print_e print, |
890 | | bool encrypt_metadata, |
891 | | bool use_aes) |
892 | 0 | { |
893 | 0 | m->encryption = std::make_unique<Encryption>(4, 4, 16, encrypt_metadata); |
894 | 0 | m->encrypt_use_aes = use_aes; |
895 | 0 | m->interpretR3EncryptionParameters( |
896 | 0 | allow_accessibility, |
897 | 0 | allow_extract, |
898 | 0 | allow_assemble, |
899 | 0 | allow_annotate_and_form, |
900 | 0 | allow_form_filling, |
901 | 0 | allow_modify_other, |
902 | 0 | print, |
903 | 0 | qpdf_r3m_all); |
904 | 0 | m->setEncryptionParameters(user_password, owner_password); |
905 | 0 | } |
906 | | |
907 | | void |
908 | | QPDFWriter::setR5EncryptionParameters( |
909 | | char const* user_password, |
910 | | char const* owner_password, |
911 | | bool allow_accessibility, |
912 | | bool allow_extract, |
913 | | bool allow_assemble, |
914 | | bool allow_annotate_and_form, |
915 | | bool allow_form_filling, |
916 | | bool allow_modify_other, |
917 | | qpdf_r3_print_e print, |
918 | | bool encrypt_metadata) |
919 | 0 | { |
920 | 0 | m->encryption = std::make_unique<Encryption>(5, 5, 32, encrypt_metadata); |
921 | 0 | m->encrypt_use_aes = true; |
922 | 0 | m->interpretR3EncryptionParameters( |
923 | 0 | allow_accessibility, |
924 | 0 | allow_extract, |
925 | 0 | allow_assemble, |
926 | 0 | allow_annotate_and_form, |
927 | 0 | allow_form_filling, |
928 | 0 | allow_modify_other, |
929 | 0 | print, |
930 | 0 | qpdf_r3m_all); |
931 | 0 | m->setEncryptionParameters(user_password, owner_password); |
932 | 0 | } |
933 | | |
934 | | void |
935 | | QPDFWriter::setR6EncryptionParameters( |
936 | | char const* user_password, |
937 | | char const* owner_password, |
938 | | bool allow_accessibility, |
939 | | bool allow_extract, |
940 | | bool allow_assemble, |
941 | | bool allow_annotate_and_form, |
942 | | bool allow_form_filling, |
943 | | bool allow_modify_other, |
944 | | qpdf_r3_print_e print, |
945 | | bool encrypt_metadata) |
946 | 9.18k | { |
947 | 9.18k | m->encryption = std::make_unique<Encryption>(5, 6, 32, encrypt_metadata); |
948 | 9.18k | m->interpretR3EncryptionParameters( |
949 | 9.18k | allow_accessibility, |
950 | 9.18k | allow_extract, |
951 | 9.18k | allow_assemble, |
952 | 9.18k | allow_annotate_and_form, |
953 | 9.18k | allow_form_filling, |
954 | 9.18k | allow_modify_other, |
955 | 9.18k | print, |
956 | 9.18k | qpdf_r3m_all); |
957 | 9.18k | m->encrypt_use_aes = true; |
958 | 9.18k | m->setEncryptionParameters(user_password, owner_password); |
959 | 9.18k | } |
960 | | |
961 | | void |
962 | | QPDFWriter::Members::interpretR3EncryptionParameters( |
963 | | bool allow_accessibility, |
964 | | bool allow_extract, |
965 | | bool allow_assemble, |
966 | | bool allow_annotate_and_form, |
967 | | bool allow_form_filling, |
968 | | bool allow_modify_other, |
969 | | qpdf_r3_print_e print, |
970 | | qpdf_r3_modify_e modify) |
971 | 9.18k | { |
972 | | // Acrobat 5 security options: |
973 | | |
974 | | // Checkboxes: |
975 | | // Enable Content Access for the Visually Impaired |
976 | | // Allow Content Copying and Extraction |
977 | | |
978 | | // Allowed changes menu: |
979 | | // None |
980 | | // Only Document Assembly |
981 | | // Only Form Field Fill-in or Signing |
982 | | // Comment Authoring, Form Field Fill-in or Signing |
983 | | // General Editing, Comment and Form Field Authoring |
984 | | |
985 | | // Allowed printing menu: |
986 | | // None |
987 | | // Low Resolution |
988 | | // Full printing |
989 | | |
990 | | // Meanings of bits in P when R >= 3 |
991 | | // |
992 | | // 3: low-resolution printing |
993 | | // 4: document modification except as controlled by 6, 9, and 11 |
994 | | // 5: extraction |
995 | | // 6: add/modify annotations (comment), fill in forms |
996 | | // if 4+6 are set, also allows modification of form fields |
997 | | // 9: fill in forms even if 6 is clear |
998 | | // 10: accessibility; ignored by readers, should always be set |
999 | | // 11: document assembly even if 4 is clear |
1000 | | // 12: high-resolution printing |
1001 | 9.18k | if (!allow_accessibility && encryption->getR() <= 3) { |
1002 | | // Bit 10 is deprecated and should always be set. This used to mean accessibility. There |
1003 | | // is no way to disable accessibility with R > 3. |
1004 | 0 | encryption->setP(10, false); |
1005 | 0 | } |
1006 | 9.18k | if (!allow_extract) { |
1007 | 0 | encryption->setP(5, false); |
1008 | 0 | } |
1009 | | |
1010 | 9.18k | switch (print) { |
1011 | 0 | case qpdf_r3p_none: |
1012 | 0 | encryption->setP(3, false); // any printing |
1013 | 0 | [[fallthrough]]; |
1014 | 0 | case qpdf_r3p_low: |
1015 | 0 | encryption->setP(12, false); // high resolution printing |
1016 | 0 | [[fallthrough]]; |
1017 | 9.18k | case qpdf_r3p_full: |
1018 | 9.18k | break; |
1019 | | // no default so gcc warns for missing cases |
1020 | 9.18k | } |
1021 | | |
1022 | | // Modify options. The qpdf_r3_modify_e options control groups of bits and lack the full |
1023 | | // flexibility of the spec. This is unfortunate, but it's been in the API for ages, and we're |
1024 | | // stuck with it. See also allow checks below to control the bits individually. |
1025 | | |
1026 | | // NOT EXERCISED IN TEST SUITE |
1027 | 9.18k | switch (modify) { |
1028 | 0 | case qpdf_r3m_none: |
1029 | 0 | encryption->setP(11, false); // document assembly |
1030 | 0 | [[fallthrough]]; |
1031 | 0 | case qpdf_r3m_assembly: |
1032 | 0 | encryption->setP(9, false); // filling in form fields |
1033 | 0 | [[fallthrough]]; |
1034 | 0 | case qpdf_r3m_form: |
1035 | 0 | encryption->setP(6, false); // modify annotations, fill in form fields |
1036 | 0 | [[fallthrough]]; |
1037 | 0 | case qpdf_r3m_annotate: |
1038 | 0 | encryption->setP(4, false); // other modifications |
1039 | 0 | [[fallthrough]]; |
1040 | 9.18k | case qpdf_r3m_all: |
1041 | 9.18k | break; |
1042 | | // no default so gcc warns for missing cases |
1043 | 9.18k | } |
1044 | | // END NOT EXERCISED IN TEST SUITE |
1045 | | |
1046 | 9.18k | if (!allow_assemble) { |
1047 | 0 | encryption->setP(11, false); |
1048 | 0 | } |
1049 | 9.18k | if (!allow_annotate_and_form) { |
1050 | 0 | encryption->setP(6, false); |
1051 | 0 | } |
1052 | 9.18k | if (!allow_form_filling) { |
1053 | 0 | encryption->setP(9, false); |
1054 | 0 | } |
1055 | 9.18k | if (!allow_modify_other) { |
1056 | 0 | encryption->setP(4, false); |
1057 | 0 | } |
1058 | 9.18k | } |
1059 | | |
1060 | | void |
1061 | | QPDFWriter::Members::setEncryptionParameters(char const* user_password, char const* owner_password) |
1062 | 9.18k | { |
1063 | 9.18k | generateID(true); |
1064 | 9.18k | encryption->setId1(id1); |
1065 | 9.18k | encryption_key = encryption->compute_parameters(user_password, owner_password); |
1066 | 9.18k | setEncryptionMinimumVersion(); |
1067 | 9.18k | } |
1068 | | |
1069 | | void |
1070 | | QPDFWriter::copyEncryptionParameters(QPDF& qpdf) |
1071 | 0 | { |
1072 | 0 | m->copyEncryptionParameters(qpdf); |
1073 | 0 | } |
1074 | | |
1075 | | void |
1076 | | QPDFWriter::Members::copyEncryptionParameters(QPDF& qpdf) |
1077 | 0 | { |
1078 | 0 | preserve_encryption = false; |
1079 | 0 | QPDFObjectHandle trailer = qpdf.getTrailer(); |
1080 | 0 | if (trailer.hasKey("/Encrypt")) { |
1081 | 0 | generateID(true); |
1082 | 0 | id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue(); |
1083 | 0 | QPDFObjectHandle encrypt = trailer.getKey("/Encrypt"); |
1084 | 0 | int V = encrypt.getKey("/V").getIntValueAsInt(); |
1085 | 0 | int key_len = 5; |
1086 | 0 | if (V > 1) { |
1087 | 0 | key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8; |
1088 | 0 | } |
1089 | 0 | const bool encrypt_metadata = |
1090 | 0 | encrypt.hasKey("/EncryptMetadata") && encrypt.getKey("/EncryptMetadata").isBool() |
1091 | 0 | ? encrypt.getKey("/EncryptMetadata").getBoolValue() |
1092 | 0 | : true; |
1093 | 0 | if (V >= 4) { |
1094 | | // When copying encryption parameters, use AES even if the original file did not. |
1095 | | // Acrobat doesn't create files with V >= 4 that don't use AES, and the logic of |
1096 | | // figuring out whether AES is used or not is complicated with /StmF, /StrF, and /EFF |
1097 | | // all potentially having different values. |
1098 | 0 | encrypt_use_aes = true; |
1099 | 0 | } |
1100 | 0 | QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", encrypt_metadata ? 0 : 1); |
1101 | 0 | QTC::TC("qpdf", "QPDFWriter copy use_aes", encrypt_use_aes ? 0 : 1); |
1102 | |
|
1103 | 0 | encryption = std::make_unique<Encryption>( |
1104 | 0 | V, |
1105 | 0 | encrypt.getKey("/R").getIntValueAsInt(), |
1106 | 0 | key_len, |
1107 | 0 | static_cast<int>(encrypt.getKey("/P").getIntValue()), |
1108 | 0 | encrypt.getKey("/O").getStringValue(), |
1109 | 0 | encrypt.getKey("/U").getStringValue(), |
1110 | 0 | V < 5 ? "" : encrypt.getKey("/OE").getStringValue(), |
1111 | 0 | V < 5 ? "" : encrypt.getKey("/UE").getStringValue(), |
1112 | 0 | V < 5 ? "" : encrypt.getKey("/Perms").getStringValue(), |
1113 | 0 | id1, // id1 == the other file's id1 |
1114 | 0 | encrypt_metadata); |
1115 | 0 | encryption_key = V >= 5 ? qpdf.getEncryptionKey() |
1116 | 0 | : encryption->compute_encryption_key(qpdf.getPaddedUserPassword()); |
1117 | 0 | setEncryptionMinimumVersion(); |
1118 | 0 | } |
1119 | 0 | } |
1120 | | |
1121 | | void |
1122 | | QPDFWriter::Members::disableIncompatibleEncryption(int major, int minor, int extension_level) |
1123 | 0 | { |
1124 | 0 | if (!encryption) { |
1125 | 0 | return; |
1126 | 0 | } |
1127 | 0 | if (compareVersions(major, minor, 1, 3) < 0) { |
1128 | 0 | encryption = nullptr; |
1129 | 0 | return; |
1130 | 0 | } |
1131 | 0 | int V = encryption->getV(); |
1132 | 0 | int R = encryption->getR(); |
1133 | 0 | if (compareVersions(major, minor, 1, 4) < 0) { |
1134 | 0 | if (V > 1 || R > 2) { |
1135 | 0 | encryption = nullptr; |
1136 | 0 | } |
1137 | 0 | } else if (compareVersions(major, minor, 1, 5) < 0) { |
1138 | 0 | if (V > 2 || R > 3) { |
1139 | 0 | encryption = nullptr; |
1140 | 0 | } |
1141 | 0 | } else if (compareVersions(major, minor, 1, 6) < 0) { |
1142 | 0 | if (encrypt_use_aes) { |
1143 | 0 | encryption = nullptr; |
1144 | 0 | } |
1145 | 0 | } else if ( |
1146 | 0 | (compareVersions(major, minor, 1, 7) < 0) || |
1147 | 0 | ((compareVersions(major, minor, 1, 7) == 0) && extension_level < 3)) { |
1148 | 0 | if (V >= 5 || R >= 5) { |
1149 | 0 | encryption = nullptr; |
1150 | 0 | } |
1151 | 0 | } |
1152 | |
|
1153 | 0 | if (!encryption) { |
1154 | 0 | QTC::TC("qpdf", "QPDFWriter forced version disabled encryption"); |
1155 | 0 | } |
1156 | 0 | } |
1157 | | |
1158 | | void |
1159 | | QPDFWriter::Members::parseVersion(std::string const& version, int& major, int& minor) const |
1160 | 20.0k | { |
1161 | 20.0k | major = QUtil::string_to_int(version.c_str()); |
1162 | 20.0k | minor = 0; |
1163 | 20.0k | size_t p = version.find('.'); |
1164 | 20.0k | if ((p != std::string::npos) && (version.length() > p)) { |
1165 | 20.0k | minor = QUtil::string_to_int(version.substr(p + 1).c_str()); |
1166 | 20.0k | } |
1167 | 20.0k | std::string tmp = std::to_string(major) + "." + std::to_string(minor); |
1168 | 20.0k | if (tmp != version) { |
1169 | | // The version number in the input is probably invalid. This happens with some files that |
1170 | | // are designed to exercise bugs, such as files in the fuzzer corpus. Unfortunately |
1171 | | // QPDFWriter doesn't have a way to give a warning, so we just ignore this case. |
1172 | 19 | } |
1173 | 20.0k | } |
1174 | | |
1175 | | int |
1176 | | QPDFWriter::Members::compareVersions(int major1, int minor1, int major2, int minor2) const |
1177 | 10.0k | { |
1178 | 10.0k | if (major1 < major2) { |
1179 | 78 | return -1; |
1180 | 78 | } |
1181 | 9.93k | if (major1 > major2) { |
1182 | 129 | return 1; |
1183 | 129 | } |
1184 | 9.80k | if (minor1 < minor2) { |
1185 | 8.67k | return -1; |
1186 | 8.67k | } |
1187 | 1.12k | return minor1 > minor2 ? 1 : 0; |
1188 | 9.80k | } |
1189 | | |
1190 | | void |
1191 | | QPDFWriter::Members::setEncryptionMinimumVersion() |
1192 | 9.16k | { |
1193 | 9.16k | auto const R = encryption->getR(); |
1194 | 9.16k | if (R >= 6) { |
1195 | 9.16k | w.setMinimumPDFVersion("1.7", 8); |
1196 | 9.16k | } else if (R == 5) { |
1197 | 0 | w.setMinimumPDFVersion("1.7", 3); |
1198 | 0 | } else if (R == 4) { |
1199 | 0 | w.setMinimumPDFVersion(encrypt_use_aes ? "1.6" : "1.5"); |
1200 | 0 | } else if (R == 3) { |
1201 | 0 | w.setMinimumPDFVersion("1.4"); |
1202 | 0 | } else { |
1203 | 0 | w.setMinimumPDFVersion("1.3"); |
1204 | 0 | } |
1205 | 9.16k | } |
1206 | | |
1207 | | void |
1208 | | QPDFWriter::Members::setDataKey(int objid) |
1209 | 237k | { |
1210 | 237k | if (encryption) { |
1211 | 237k | cur_data_key = QPDF::compute_data_key( |
1212 | 237k | encryption_key, objid, 0, encrypt_use_aes, encryption->getV(), encryption->getR()); |
1213 | 237k | } |
1214 | 237k | } |
1215 | | |
1216 | | unsigned int |
1217 | | QPDFWriter::Members::bytesNeeded(long long n) |
1218 | 7.49k | { |
1219 | 7.49k | unsigned int bytes = 0; |
1220 | 18.1k | while (n) { |
1221 | 10.6k | ++bytes; |
1222 | 10.6k | n >>= 8; |
1223 | 10.6k | } |
1224 | 7.49k | return bytes; |
1225 | 7.49k | } |
1226 | | |
1227 | | void |
1228 | | QPDFWriter::Members::writeBinary(unsigned long long val, unsigned int bytes) |
1229 | 415k | { |
1230 | 415k | if (bytes > sizeof(unsigned long long)) { |
1231 | 0 | throw std::logic_error("QPDFWriter::writeBinary called with too many bytes"); |
1232 | 0 | } |
1233 | 415k | unsigned char data[sizeof(unsigned long long)]; |
1234 | 1.02M | for (unsigned int i = 0; i < bytes; ++i) { |
1235 | 609k | data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff); |
1236 | 609k | val >>= 8; |
1237 | 609k | } |
1238 | 415k | pipeline->write(data, bytes); |
1239 | 415k | } |
1240 | | |
1241 | | QPDFWriter::Members& |
1242 | | QPDFWriter::Members::write(std::string_view str) |
1243 | 8.12M | { |
1244 | 8.12M | pipeline->write(str); |
1245 | 8.12M | return *this; |
1246 | 8.12M | } |
1247 | | |
1248 | | QPDFWriter::Members& |
1249 | | QPDFWriter::Members::write(std::integral auto val) |
1250 | 1.11M | { |
1251 | 1.11M | pipeline->write(std::to_string(val)); |
1252 | 1.11M | return *this; |
1253 | 1.11M | } _ZN10QPDFWriter7Members5writeITkNSt3__18integralEiEERS0_T_ Line | Count | Source | 1250 | 839k | { | 1251 | 839k | pipeline->write(std::to_string(val)); | 1252 | 839k | return *this; | 1253 | 839k | } |
_ZN10QPDFWriter7Members5writeITkNSt3__18integralExEERS0_T_ Line | Count | Source | 1250 | 199k | { | 1251 | 199k | pipeline->write(std::to_string(val)); | 1252 | 199k | return *this; | 1253 | 199k | } |
_ZN10QPDFWriter7Members5writeITkNSt3__18integralEmEERS0_T_ Line | Count | Source | 1250 | 64.6k | { | 1251 | 64.6k | pipeline->write(std::to_string(val)); | 1252 | 64.6k | return *this; | 1253 | 64.6k | } |
_ZN10QPDFWriter7Members5writeITkNSt3__18integralEjEERS0_T_ Line | Count | Source | 1250 | 7.49k | { | 1251 | 7.49k | pipeline->write(std::to_string(val)); | 1252 | 7.49k | return *this; | 1253 | 7.49k | } |
|
1254 | | |
1255 | | QPDFWriter::Members& |
1256 | | QPDFWriter::Members::write(size_t count, char c) |
1257 | 24.5k | { |
1258 | 24.5k | pipeline->write(count, c); |
1259 | 24.5k | return *this; |
1260 | 24.5k | } |
1261 | | |
1262 | | QPDFWriter::Members& |
1263 | | QPDFWriter::Members::write_name(std::string const& str) |
1264 | 856k | { |
1265 | 856k | pipeline->write(Name::normalize(str)); |
1266 | 856k | return *this; |
1267 | 856k | } |
1268 | | |
1269 | | QPDFWriter::Members& |
1270 | | QPDFWriter::Members::write_string(std::string const& str, bool force_binary) |
1271 | 106k | { |
1272 | 106k | pipeline->write(QPDF_String(str).unparse(force_binary)); |
1273 | 106k | return *this; |
1274 | 106k | } |
1275 | | |
1276 | | template <typename... Args> |
1277 | | QPDFWriter::Members& |
1278 | | QPDFWriter::Members::write_qdf(Args&&... args) |
1279 | 602k | { |
1280 | 602k | if (qdf_mode) { |
1281 | 0 | pipeline->write(std::forward<Args>(args)...); |
1282 | 0 | } |
1283 | 602k | return *this; |
1284 | 602k | } QPDFWriter::Members& QPDFWriter::Members::write_qdf<char const (&) [2]>(char const (&) [2]) Line | Count | Source | 1279 | 495k | { | 1280 | 495k | if (qdf_mode) { | 1281 | 0 | pipeline->write(std::forward<Args>(args)...); | 1282 | 0 | } | 1283 | 495k | return *this; | 1284 | 495k | } |
QPDFWriter::Members& QPDFWriter::Members::write_qdf<char const (&) [3]>(char const (&) [3]) Line | Count | Source | 1279 | 61.6k | { | 1280 | 61.6k | if (qdf_mode) { | 1281 | 0 | pipeline->write(std::forward<Args>(args)...); | 1282 | 0 | } | 1283 | 61.6k | return *this; | 1284 | 61.6k | } |
QPDFWriter::Members& QPDFWriter::Members::write_qdf<char const (&) [4]>(char const (&) [4]) Line | Count | Source | 1279 | 30.4k | { | 1280 | 30.4k | if (qdf_mode) { | 1281 | 0 | pipeline->write(std::forward<Args>(args)...); | 1282 | 0 | } | 1283 | 30.4k | return *this; | 1284 | 30.4k | } |
QPDFWriter::Members& QPDFWriter::Members::write_qdf<char const (&) [11]>(char const (&) [11]) Line | Count | Source | 1279 | 15.4k | { | 1280 | 15.4k | if (qdf_mode) { | 1281 | 0 | pipeline->write(std::forward<Args>(args)...); | 1282 | 0 | } | 1283 | 15.4k | return *this; | 1284 | 15.4k | } |
|
1285 | | |
1286 | | template <typename... Args> |
1287 | | QPDFWriter::Members& |
1288 | | QPDFWriter::Members::write_no_qdf(Args&&... args) |
1289 | 193k | { |
1290 | 193k | if (!qdf_mode) { |
1291 | 193k | pipeline->write(std::forward<Args>(args)...); |
1292 | 193k | } |
1293 | 193k | return *this; |
1294 | 193k | } QPDFWriter::Members& QPDFWriter::Members::write_no_qdf<char const (&) [2]>(char const (&) [2]) Line | Count | Source | 1289 | 163k | { | 1290 | 163k | if (!qdf_mode) { | 1291 | 163k | pipeline->write(std::forward<Args>(args)...); | 1292 | 163k | } | 1293 | 163k | return *this; | 1294 | 163k | } |
QPDFWriter::Members& QPDFWriter::Members::write_no_qdf<char const (&) [4]>(char const (&) [4]) Line | Count | Source | 1289 | 30.4k | { | 1290 | 30.4k | if (!qdf_mode) { | 1291 | 30.4k | pipeline->write(std::forward<Args>(args)...); | 1292 | 30.4k | } | 1293 | 30.4k | return *this; | 1294 | 30.4k | } |
|
1295 | | |
1296 | | void |
1297 | | QPDFWriter::Members::adjustAESStreamLength(size_t& length) |
1298 | 50.1k | { |
1299 | 50.1k | if (encryption && !cur_data_key.empty() && encrypt_use_aes) { |
1300 | | // Stream length will be padded with 1 to 16 bytes to end up as a multiple of 16. It will |
1301 | | // also be prepended by 16 bits of random data. |
1302 | 50.1k | length += 32 - (length & 0xf); |
1303 | 50.1k | } |
1304 | 50.1k | } |
1305 | | |
1306 | | QPDFWriter::Members& |
1307 | | QPDFWriter::Members::write_encrypted(std::string_view str) |
1308 | 49.8k | { |
1309 | 49.8k | if (!(encryption && !cur_data_key.empty())) { |
1310 | 0 | write(str); |
1311 | 49.8k | } else if (encrypt_use_aes) { |
1312 | 49.8k | write(pl::pipe<Pl_AES_PDF>(str, true, cur_data_key)); |
1313 | 49.8k | } else { |
1314 | 0 | write(pl::pipe<Pl_RC4>(str, cur_data_key)); |
1315 | 0 | } |
1316 | | |
1317 | 49.8k | return *this; |
1318 | 49.8k | } |
1319 | | |
1320 | | void |
1321 | | QPDFWriter::Members::computeDeterministicIDData() |
1322 | 0 | { |
1323 | 0 | if (!id2.empty()) { |
1324 | | // Can't happen in the code |
1325 | 0 | throw std::logic_error( |
1326 | 0 | "Deterministic ID computation enabled after ID generation has already occurred."); |
1327 | 0 | } |
1328 | 0 | qpdf_assert_debug(deterministic_id_data.empty()); |
1329 | 0 | deterministic_id_data = pipeline_stack.hex_digest(); |
1330 | 0 | } |
1331 | | |
1332 | | int |
1333 | | QPDFWriter::Members::openObject(int objid) |
1334 | 271k | { |
1335 | 271k | if (objid == 0) { |
1336 | 0 | objid = next_objid++; |
1337 | 0 | } |
1338 | 271k | new_obj[objid].xref = QPDFXRefEntry(pipeline->getCount()); |
1339 | 271k | write(objid).write(" 0 obj\n"); |
1340 | 271k | return objid; |
1341 | 271k | } |
1342 | | |
1343 | | void |
1344 | | QPDFWriter::Members::closeObject(int objid) |
1345 | 270k | { |
1346 | | // Write a newline before endobj as it makes the file easier to repair. |
1347 | 270k | write("\nendobj\n").write_qdf("\n"); |
1348 | 270k | auto& no = new_obj[objid]; |
1349 | 270k | no.length = pipeline->getCount() - no.xref.getOffset(); |
1350 | 270k | } |
1351 | | |
1352 | | void |
1353 | | QPDFWriter::Members::assignCompressedObjectNumbers(QPDFObjGen og) |
1354 | 133k | { |
1355 | 133k | int objid = og.getObj(); |
1356 | 133k | if (og.getGen() != 0 || !object_stream_to_objects.contains(objid)) { |
1357 | | // This is not an object stream. |
1358 | 130k | return; |
1359 | 130k | } |
1360 | | |
1361 | | // Reserve numbers for the objects that belong to this object stream. |
1362 | 42.8k | for (auto const& iter: object_stream_to_objects[objid]) { |
1363 | 42.8k | obj[iter].renumber = next_objid++; |
1364 | 42.8k | } |
1365 | 3.33k | } |
1366 | | |
1367 | | void |
1368 | | QPDFWriter::Members::enqueueObject(QPDFObjectHandle object) |
1369 | 133k | { |
1370 | 133k | if (object.isIndirect()) { |
1371 | | // This owner check can only be done for indirect objects. It is possible for a direct |
1372 | | // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle from |
1373 | | // one file was insert into another file without copying. Doing that is safe even if the |
1374 | | // original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from its owner. |
1375 | 133k | if (object.getOwningQPDF() != &pdf) { |
1376 | 0 | throw std::logic_error( |
1377 | 0 | "QPDFObjectHandle from different QPDF found while writing. Use " |
1378 | 0 | "QPDF::copyForeignObject to add objects from another file."); |
1379 | 0 | } |
1380 | | |
1381 | 133k | if (qdf_mode && object.isStreamOfType("/XRef")) { |
1382 | | // As a special case, do not output any extraneous XRef streams in QDF mode. Doing so |
1383 | | // will confuse fix-qdf, which expects to see only one XRef stream at the end of the |
1384 | | // file. This case can occur when creating a QDF from a file with object streams when |
1385 | | // preserving unreferenced objects since the old cross reference streams are not |
1386 | | // actually referenced by object number. |
1387 | 0 | return; |
1388 | 0 | } |
1389 | | |
1390 | 133k | QPDFObjGen og = object.getObjGen(); |
1391 | 133k | auto& o = obj[og]; |
1392 | | |
1393 | 133k | if (o.renumber == 0) { |
1394 | 131k | if (o.object_stream > 0) { |
1395 | | // This is in an object stream. Don't process it here. Instead, enqueue the object |
1396 | | // stream. Object streams always have generation 0. |
1397 | | // Detect loops by storing invalid object ID -1, which will get overwritten later. |
1398 | 31 | o.renumber = -1; |
1399 | 31 | enqueueObject(pdf.getObject(o.object_stream, 0)); |
1400 | 131k | } else { |
1401 | 131k | object_queue.emplace_back(object); |
1402 | 131k | o.renumber = next_objid++; |
1403 | | |
1404 | 131k | if (og.getGen() == 0 && object_stream_to_objects.contains(og.getObj())) { |
1405 | | // For linearized files, uncompressed objects go at end, and we take care of |
1406 | | // assigning numbers to them elsewhere. |
1407 | 3.21k | if (!linearized) { |
1408 | 0 | assignCompressedObjectNumbers(og); |
1409 | 0 | } |
1410 | 128k | } else if (!direct_stream_lengths && object.isStream()) { |
1411 | | // reserve next object ID for length |
1412 | 0 | ++next_objid; |
1413 | 0 | } |
1414 | 131k | } |
1415 | 131k | } else if (o.renumber == -1) { |
1416 | | // This can happen if a specially constructed file indicates that an object stream is |
1417 | | // inside itself. |
1418 | 1 | } |
1419 | 133k | return; |
1420 | 133k | } else if (!linearized) { |
1421 | 0 | if (object.isArray()) { |
1422 | 0 | for (auto& item: object.as_array()) { |
1423 | 0 | enqueueObject(item); |
1424 | 0 | } |
1425 | 0 | } else if (auto d = object.as_dictionary()) { |
1426 | 0 | for (auto const& item: d) { |
1427 | 0 | if (!item.second.null()) { |
1428 | 0 | enqueueObject(item.second); |
1429 | 0 | } |
1430 | 0 | } |
1431 | 0 | } |
1432 | 207 | } else { |
1433 | | // ignore |
1434 | 207 | } |
1435 | 133k | } |
1436 | | |
1437 | | void |
1438 | | QPDFWriter::Members::unparseChild(QPDFObjectHandle const& child, size_t level, int flags) |
1439 | 1.95M | { |
1440 | 1.95M | if (!linearized) { |
1441 | 0 | enqueueObject(child); |
1442 | 0 | } |
1443 | 1.95M | if (child.isIndirect()) { |
1444 | 397k | write(obj[child].renumber).write(" 0 R"); |
1445 | 1.56M | } else { |
1446 | 1.56M | unparseObject(child, level, flags); |
1447 | 1.56M | } |
1448 | 1.95M | } |
1449 | | |
1450 | | void |
1451 | | QPDFWriter::Members::writeTrailer( |
1452 | | trailer_e which, int size, bool xref_stream, qpdf_offset_t prev, int linearization_pass) |
1453 | 30.4k | { |
1454 | 30.4k | auto trailer = trimmed_trailer(); |
1455 | 30.4k | if (xref_stream) { |
1456 | 2.49k | cur_data_key.clear(); |
1457 | 27.9k | } else { |
1458 | 27.9k | write("trailer <<"); |
1459 | 27.9k | } |
1460 | 30.4k | write_qdf("\n"); |
1461 | 30.4k | if (which == t_lin_second) { |
1462 | 15.0k | write(" /Size ").write(size); |
1463 | 15.4k | } else { |
1464 | 39.0k | for (auto const& [key, value]: trailer) { |
1465 | 39.0k | if (value.null()) { |
1466 | 9.00k | continue; |
1467 | 9.00k | } |
1468 | 30.0k | write_qdf(" ").write_no_qdf(" ").write_name(key).write(" "); |
1469 | 30.0k | if (key == "/Size") { |
1470 | 4.42k | write(size); |
1471 | 4.42k | if (which == t_lin_first) { |
1472 | 4.42k | write(" /Prev "); |
1473 | 4.42k | qpdf_offset_t pos = pipeline->getCount(); |
1474 | 4.42k | write(prev).write(QIntC::to_size(pos - pipeline->getCount() + 21), ' '); |
1475 | 4.42k | } |
1476 | 25.6k | } else { |
1477 | 25.6k | unparseChild(value, 1, 0); |
1478 | 25.6k | } |
1479 | 30.0k | write_qdf("\n"); |
1480 | 30.0k | } |
1481 | 15.4k | } |
1482 | | |
1483 | | // Write ID |
1484 | 30.4k | write_qdf(" ").write(" /ID ["); |
1485 | 30.4k | if (linearization_pass == 1) { |
1486 | 15.4k | std::string original_id1 = getOriginalID1(); |
1487 | 15.4k | if (original_id1.empty()) { |
1488 | 13.1k | write("<00000000000000000000000000000000>"); |
1489 | 13.1k | } else { |
1490 | | // Write a string of zeroes equal in length to the representation of the original ID. |
1491 | | // While writing the original ID would have the same number of bytes, it would cause a |
1492 | | // change to the deterministic ID generated by older versions of the software that |
1493 | | // hard-coded the length of the ID to 16 bytes. |
1494 | 2.26k | size_t len = QPDF_String(original_id1).unparse(true).length() - 2; |
1495 | 2.26k | write("<").write(len, '0').write(">"); |
1496 | 2.26k | } |
1497 | 15.4k | write("<00000000000000000000000000000000>"); |
1498 | 15.4k | } else { |
1499 | 14.9k | if (linearization_pass == 0 && deterministic_id) { |
1500 | 0 | computeDeterministicIDData(); |
1501 | 0 | } |
1502 | 14.9k | generateID(encryption.get()); |
1503 | 14.9k | write_string(id1, true).write_string(id2, true); |
1504 | 14.9k | } |
1505 | 30.4k | write("]"); |
1506 | | |
1507 | 30.4k | if (which != t_lin_second) { |
1508 | | // Write reference to encryption dictionary |
1509 | 15.4k | if (encryption) { |
1510 | 15.4k | write(" /Encrypt ").write(encryption_dict_objid).write(" 0 R"); |
1511 | 15.4k | } |
1512 | 15.4k | } |
1513 | | |
1514 | 30.4k | write_qdf("\n>>").write_no_qdf(" >>"); |
1515 | 30.4k | } |
1516 | | |
1517 | | bool |
1518 | | QPDFWriter::Members::will_filter_stream(QPDFObjectHandle stream) |
1519 | 21.2k | { |
1520 | 21.2k | std::string s; |
1521 | 21.2k | [[maybe_unused]] auto [filter, ignore1, ignore2] = will_filter_stream(stream, &s); |
1522 | 21.2k | return filter; |
1523 | 21.2k | } |
1524 | | |
1525 | | std::tuple<const bool, const bool, const bool> |
1526 | | QPDFWriter::Members::will_filter_stream(QPDFObjectHandle stream, std::string* stream_data) |
1527 | 59.3k | { |
1528 | 59.3k | const bool is_root_metadata = stream.isRootMetadata(); |
1529 | 59.3k | bool filter = false; |
1530 | 59.3k | auto decode_level = stream_decode_level; |
1531 | 59.3k | int encode_flags = 0; |
1532 | 59.3k | Dictionary stream_dict = stream.getDict(); |
1533 | | |
1534 | 59.3k | if (stream.getFilterOnWrite()) { |
1535 | 44.9k | filter = stream.isDataModified() || compress_streams || decode_level != qpdf_dl_none; |
1536 | 44.9k | if (compress_streams) { |
1537 | | // Don't filter if the stream is already compressed with FlateDecode. This way we don't |
1538 | | // make it worse if the original file used a better Flate algorithm, and we don't spend |
1539 | | // time and CPU cycles uncompressing and recompressing stuff. This can be overridden |
1540 | | // with setRecompressFlate(true). |
1541 | 44.9k | Name Filter = stream_dict["/Filter"]; |
1542 | 44.9k | if (Filter && !recompress_flate && !stream.isDataModified() && |
1543 | 19.0k | (Filter == "/FlateDecode" || Filter == "/Fl")) { |
1544 | 11.3k | filter = false; |
1545 | 11.3k | } |
1546 | 44.9k | } |
1547 | 44.9k | if (is_root_metadata && (!encryption || !encryption->getEncryptMetadata())) { |
1548 | 0 | filter = true; |
1549 | 0 | decode_level = qpdf_dl_all; |
1550 | 44.9k | } else if (normalize_content && normalized_streams.contains(stream)) { |
1551 | 0 | encode_flags = qpdf_ef_normalize; |
1552 | 0 | filter = true; |
1553 | 44.9k | } else if (filter && compress_streams) { |
1554 | 33.5k | encode_flags = qpdf_ef_compress; |
1555 | 33.5k | } |
1556 | 44.9k | } |
1557 | | |
1558 | | // Disable compression for empty streams to improve compatibility |
1559 | 59.3k | if (Integer(stream_dict["/Length"]) == 0) { |
1560 | 2.53k | filter = true; |
1561 | 2.53k | encode_flags = 0; |
1562 | 2.53k | } |
1563 | | |
1564 | 68.6k | for (bool first_attempt: {true, false}) { |
1565 | 68.6k | auto pp_stream_data = |
1566 | 68.6k | stream_data ? pipeline_stack.activate(*stream_data) : pipeline_stack.activate(true); |
1567 | | |
1568 | 68.6k | try { |
1569 | 68.6k | if (stream.pipeStreamData( |
1570 | 68.6k | pipeline, |
1571 | 68.6k | filter ? encode_flags : 0, |
1572 | 68.6k | filter ? decode_level : qpdf_dl_none, |
1573 | 68.6k | false, |
1574 | 68.6k | first_attempt)) { |
1575 | 25.9k | return {true, encode_flags & qpdf_ef_compress, is_root_metadata}; |
1576 | 25.9k | } |
1577 | 42.6k | if (!filter) { |
1578 | 33.2k | break; |
1579 | 33.2k | } |
1580 | 42.6k | } catch (std::runtime_error& e) { |
1581 | 92 | if (!(filter && first_attempt)) { |
1582 | 19 | throw std::runtime_error( |
1583 | 19 | "error while getting stream data for " + stream.unparse() + ": " + e.what()); |
1584 | 19 | } |
1585 | 73 | stream.warn("error while getting stream data: "s + e.what()); |
1586 | 73 | stream.warn("qpdf will attempt to write the damaged stream unchanged"); |
1587 | 73 | } |
1588 | | // Try again |
1589 | 9.29k | filter = false; |
1590 | 9.29k | stream.setFilterOnWrite(false); |
1591 | 9.29k | if (stream_data) { |
1592 | 9.29k | stream_data->clear(); |
1593 | 9.29k | } |
1594 | 9.29k | } |
1595 | 33.2k | return {false, false, is_root_metadata}; |
1596 | 59.3k | } |
1597 | | |
1598 | | void |
1599 | | QPDFWriter::Members::unparseObject( |
1600 | | QPDFObjectHandle object, size_t level, int flags, size_t stream_length, bool compress) |
1601 | 1.89M | { |
1602 | 1.89M | QPDFObjGen old_og = object.getObjGen(); |
1603 | 1.89M | int child_flags = flags & ~f_stream; |
1604 | | // For non-qdf, "indent" and "indent_large" are a single space between tokens. For qdf, they |
1605 | | // include the preceding newline. |
1606 | 1.89M | std::string indent_large = " "; |
1607 | 1.89M | if (qdf_mode) { |
1608 | 0 | indent_large.append(2 * (level + 1), ' '); |
1609 | 0 | indent_large[0] = '\n'; |
1610 | 0 | } |
1611 | 1.89M | std::string_view indent{indent_large.data(), qdf_mode ? indent_large.size() - 2 : 1}; |
1612 | | |
1613 | 1.89M | if (auto const tc = object.getTypeCode(); tc == ::ot_array) { |
1614 | | // Note: PDF spec 1.4 implementation note 121 states that Acrobat requires a space after the |
1615 | | // [ in the /H key of the linearization parameter dictionary. We'll do this unconditionally |
1616 | | // for all arrays because it looks nicer and doesn't make the files that much bigger. |
1617 | 115k | write("["); |
1618 | 1.10M | for (auto const& item: object.as_array()) { |
1619 | 1.10M | write(indent_large); |
1620 | 1.10M | unparseChild(item, level + 1, child_flags); |
1621 | 1.10M | } |
1622 | 115k | write(indent).write("]"); |
1623 | 1.77M | } else if (tc == ::ot_dictionary) { |
1624 | | // Handle special cases for specific dictionaries. |
1625 | | |
1626 | 282k | if (old_og == root_og) { |
1627 | | // Extensions dictionaries. |
1628 | | |
1629 | | // We have one of several cases: |
1630 | | // |
1631 | | // * We need ADBE |
1632 | | // - We already have Extensions |
1633 | | // - If it has the right ADBE, preserve it |
1634 | | // - Otherwise, replace ADBE |
1635 | | // - We don't have Extensions: create one from scratch |
1636 | | // * We don't want ADBE |
1637 | | // - We already have Extensions |
1638 | | // - If it only has ADBE, remove it |
1639 | | // - If it has other things, keep those and remove ADBE |
1640 | | // - We have no extensions: no action required |
1641 | | // |
1642 | | // Before writing, we guarantee that /Extensions, if present, is direct through the ADBE |
1643 | | // dictionary, so we can modify in place. |
1644 | | |
1645 | 15.3k | auto extensions = object.getKey("/Extensions"); |
1646 | 15.3k | const bool has_extensions = extensions.isDictionary(); |
1647 | 15.3k | const bool need_extensions_adbe = final_extension_level > 0; |
1648 | | |
1649 | 15.3k | if (has_extensions || need_extensions_adbe) { |
1650 | | // Make a shallow copy of this object so we can modify it safely without affecting |
1651 | | // the original. This code has logic to skip certain keys in agreement with |
1652 | | // prepareFileForWrite and with skip_stream_parameters so that replacing them |
1653 | | // doesn't leave unreferenced objects in the output. We can use unsafeShallowCopy |
1654 | | // here because all we are doing is removing or replacing top-level keys. |
1655 | 15.0k | object = object.unsafeShallowCopy(); |
1656 | 15.0k | if (!has_extensions) { |
1657 | 14.3k | extensions = QPDFObjectHandle(); |
1658 | 14.3k | } |
1659 | | |
1660 | 15.0k | const bool have_extensions_adbe = extensions && extensions.hasKey("/ADBE"); |
1661 | 15.0k | const bool have_extensions_other = |
1662 | 15.0k | extensions && extensions.getKeys().size() > (have_extensions_adbe ? 1u : 0u); |
1663 | | |
1664 | 15.0k | if (need_extensions_adbe) { |
1665 | 15.0k | if (!(have_extensions_other || have_extensions_adbe)) { |
1666 | | // We need Extensions and don't have it. Create it here. |
1667 | 14.4k | QTC::TC("qpdf", "QPDFWriter create Extensions", qdf_mode ? 0 : 1); |
1668 | 14.4k | extensions = object.replaceKeyAndGetNew( |
1669 | 14.4k | "/Extensions", QPDFObjectHandle::newDictionary()); |
1670 | 14.4k | } |
1671 | 15.0k | } else if (!have_extensions_other) { |
1672 | | // We have Extensions dictionary and don't want one. |
1673 | 13 | if (have_extensions_adbe) { |
1674 | 8 | QTC::TC("qpdf", "QPDFWriter remove existing Extensions"); |
1675 | 8 | object.removeKey("/Extensions"); |
1676 | 8 | extensions = QPDFObjectHandle(); // uninitialized |
1677 | 8 | } |
1678 | 13 | } |
1679 | | |
1680 | 15.0k | if (extensions) { |
1681 | 15.0k | QTC::TC("qpdf", "QPDFWriter preserve Extensions"); |
1682 | 15.0k | QPDFObjectHandle adbe = extensions.getKey("/ADBE"); |
1683 | 15.0k | if (adbe.isDictionary() && |
1684 | 460 | adbe.getKey("/BaseVersion").isNameAndEquals("/" + final_pdf_version) && |
1685 | 306 | adbe.getKey("/ExtensionLevel").isInteger() && |
1686 | 297 | (adbe.getKey("/ExtensionLevel").getIntValue() == final_extension_level)) { |
1687 | 14.7k | } else { |
1688 | 14.7k | if (need_extensions_adbe) { |
1689 | 14.7k | extensions.replaceKey( |
1690 | 14.7k | "/ADBE", |
1691 | 14.7k | QPDFObjectHandle::parse( |
1692 | 14.7k | "<< /BaseVersion /" + final_pdf_version + " /ExtensionLevel " + |
1693 | 14.7k | std::to_string(final_extension_level) + " >>")); |
1694 | 14.7k | } else { |
1695 | 24 | extensions.removeKey("/ADBE"); |
1696 | 24 | } |
1697 | 14.7k | } |
1698 | 15.0k | } |
1699 | 15.0k | } |
1700 | 15.3k | } |
1701 | | |
1702 | | // Stream dictionaries. |
1703 | | |
1704 | 282k | if (flags & f_stream) { |
1705 | | // Suppress /Length since we will write it manually |
1706 | | |
1707 | | // Make a shallow copy of this object so we can modify it safely without affecting the |
1708 | | // original. This code has logic to skip certain keys in agreement with |
1709 | | // prepareFileForWrite and with skip_stream_parameters so that replacing them doesn't |
1710 | | // leave unreferenced objects in the output. We can use unsafeShallowCopy here because |
1711 | | // all we are doing is removing or replacing top-level keys. |
1712 | 37.9k | object = object.unsafeShallowCopy(); |
1713 | | |
1714 | 37.9k | object.removeKey("/Length"); |
1715 | | |
1716 | | // If /DecodeParms is an empty list, remove it. |
1717 | 37.9k | if (object.getKey("/DecodeParms").empty()) { |
1718 | 35.8k | object.removeKey("/DecodeParms"); |
1719 | 35.8k | } |
1720 | | |
1721 | 37.9k | if (flags & f_filtered) { |
1722 | | // We will supply our own filter and decode parameters. |
1723 | 16.6k | object.removeKey("/Filter"); |
1724 | 16.6k | object.removeKey("/DecodeParms"); |
1725 | 21.3k | } else { |
1726 | | // Make sure, no matter what else we have, that we don't have /Crypt in the output |
1727 | | // filters. |
1728 | 21.3k | QPDFObjectHandle filter = object.getKey("/Filter"); |
1729 | 21.3k | QPDFObjectHandle decode_parms = object.getKey("/DecodeParms"); |
1730 | 21.3k | if (filter.isOrHasName("/Crypt")) { |
1731 | 378 | if (filter.isName()) { |
1732 | 28 | object.removeKey("/Filter"); |
1733 | 28 | object.removeKey("/DecodeParms"); |
1734 | 350 | } else { |
1735 | 350 | int idx = 0; |
1736 | 16.4k | for (auto const& item: filter.as_array()) { |
1737 | 16.4k | if (item.isNameAndEquals("/Crypt")) { |
1738 | | // If filter is an array, then the code in QPDF_Stream has already |
1739 | | // verified that DecodeParms and Filters are arrays of the same |
1740 | | // length, but if they weren't for some reason, eraseItem does type |
1741 | | // and bounds checking. Fuzzing tells us that this can actually |
1742 | | // happen. |
1743 | 350 | filter.eraseItem(idx); |
1744 | 350 | decode_parms.eraseItem(idx); |
1745 | 350 | break; |
1746 | 350 | } |
1747 | 16.1k | ++idx; |
1748 | 16.1k | } |
1749 | 350 | } |
1750 | 378 | } |
1751 | 21.3k | } |
1752 | 37.9k | } |
1753 | | |
1754 | 282k | write("<<"); |
1755 | | |
1756 | 989k | for (auto const& [key, value]: object.as_dictionary()) { |
1757 | 989k | if (!value.null()) { |
1758 | 826k | write(indent_large).write_name(key).write(" "); |
1759 | 826k | if (key == "/Contents" && object.isDictionaryOfType("/Sig") && |
1760 | 32 | object.hasKey("/ByteRange")) { |
1761 | 20 | QTC::TC("qpdf", "QPDFWriter no encryption sig contents"); |
1762 | 20 | unparseChild(value, level + 1, child_flags | f_hex_string | f_no_encryption); |
1763 | 826k | } else { |
1764 | 826k | unparseChild(value, level + 1, child_flags); |
1765 | 826k | } |
1766 | 826k | } |
1767 | 989k | } |
1768 | | |
1769 | 282k | if (flags & f_stream) { |
1770 | 37.7k | write(indent_large).write("/Length "); |
1771 | | |
1772 | 37.7k | if (direct_stream_lengths) { |
1773 | 37.7k | write(stream_length); |
1774 | 37.7k | } else { |
1775 | 0 | write(cur_stream_length_id).write(" 0 R"); |
1776 | 0 | } |
1777 | 37.7k | if (compress && (flags & f_filtered)) { |
1778 | 16.4k | write(indent_large).write("/Filter /FlateDecode"); |
1779 | 16.4k | } |
1780 | 37.7k | } |
1781 | | |
1782 | 282k | write(indent).write(">>"); |
1783 | 1.49M | } else if (tc == ::ot_stream) { |
1784 | | // Write stream data to a buffer. |
1785 | 38.0k | if (!direct_stream_lengths) { |
1786 | 0 | cur_stream_length_id = obj[old_og].renumber + 1; |
1787 | 0 | } |
1788 | | |
1789 | 38.0k | flags |= f_stream; |
1790 | 38.0k | std::string stream_data; |
1791 | 38.0k | auto [filter, compress_stream, is_root_metadata] = will_filter_stream(object, &stream_data); |
1792 | 38.0k | if (filter) { |
1793 | 16.6k | flags |= f_filtered; |
1794 | 16.6k | } |
1795 | 38.0k | QPDFObjectHandle stream_dict = object.getDict(); |
1796 | | |
1797 | 38.0k | cur_stream_length = stream_data.size(); |
1798 | 38.0k | if (is_root_metadata && encryption && !encryption->getEncryptMetadata()) { |
1799 | | // Don't encrypt stream data for the metadata stream |
1800 | 0 | cur_data_key.clear(); |
1801 | 0 | } |
1802 | 38.0k | adjustAESStreamLength(cur_stream_length); |
1803 | 38.0k | unparseObject(stream_dict, 0, flags, cur_stream_length, compress_stream); |
1804 | 38.0k | char last_char = stream_data.empty() ? '\0' : stream_data.back(); |
1805 | 38.0k | write("\nstream\n").write_encrypted(stream_data); |
1806 | 38.0k | added_newline = newline_before_endstream || (qdf_mode && last_char != '\n'); |
1807 | 38.0k | write(added_newline ? "\nendstream" : "endstream"); |
1808 | 1.45M | } else if (tc == ::ot_string) { |
1809 | 43.3k | std::string val; |
1810 | 43.3k | if (encryption && !(flags & f_in_ostream) && !(flags & f_no_encryption) && |
1811 | 30.1k | !cur_data_key.empty()) { |
1812 | 25.9k | val = object.getStringValue(); |
1813 | 25.9k | if (encrypt_use_aes) { |
1814 | 25.9k | Pl_Buffer bufpl("encrypted string"); |
1815 | 25.9k | Pl_AES_PDF pl("aes encrypt string", &bufpl, true, cur_data_key); |
1816 | 25.9k | pl.writeString(val); |
1817 | 25.9k | pl.finish(); |
1818 | 25.9k | val = QPDF_String(bufpl.getString()).unparse(true); |
1819 | 25.9k | } else { |
1820 | 0 | auto tmp_ph = QUtil::make_unique_cstr(val); |
1821 | 0 | char* tmp = tmp_ph.get(); |
1822 | 0 | size_t vlen = val.length(); |
1823 | 0 | RC4 rc4( |
1824 | 0 | QUtil::unsigned_char_pointer(cur_data_key), |
1825 | 0 | QIntC::to_int(cur_data_key.length())); |
1826 | 0 | auto data = QUtil::unsigned_char_pointer(tmp); |
1827 | 0 | rc4.process(data, vlen, data); |
1828 | 0 | val = QPDF_String(std::string(tmp, vlen)).unparse(); |
1829 | 0 | } |
1830 | 25.9k | } else if (flags & f_hex_string) { |
1831 | 20 | val = QPDF_String(object.getStringValue()).unparse(true); |
1832 | 17.3k | } else { |
1833 | 17.3k | val = object.unparseResolved(); |
1834 | 17.3k | } |
1835 | 43.3k | write(val); |
1836 | 1.41M | } else { |
1837 | 1.41M | write(object.unparseResolved()); |
1838 | 1.41M | } |
1839 | 1.89M | } |
1840 | | |
1841 | | void |
1842 | | QPDFWriter::Members::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets, int first_obj) |
1843 | 9.37k | { |
1844 | 9.37k | qpdf_assert_debug(first_obj > 0); |
1845 | 9.37k | bool is_first = true; |
1846 | 9.37k | auto id = std::to_string(first_obj) + ' '; |
1847 | 138k | for (auto& offset: offsets) { |
1848 | 138k | if (is_first) { |
1849 | 9.37k | is_first = false; |
1850 | 128k | } else { |
1851 | 128k | write_qdf("\n").write_no_qdf(" "); |
1852 | 128k | } |
1853 | 138k | write(id); |
1854 | 138k | util::increment(id, 1); |
1855 | 138k | write(offset); |
1856 | 138k | } |
1857 | 9.37k | write("\n"); |
1858 | 9.37k | } |
1859 | | |
1860 | | void |
1861 | | QPDFWriter::Members::writeObjectStream(QPDFObjectHandle object) |
1862 | 4.69k | { |
1863 | | // Note: object might be null if this is a place-holder for an object stream that we are |
1864 | | // generating from scratch. |
1865 | | |
1866 | 4.69k | QPDFObjGen old_og = object.getObjGen(); |
1867 | 4.69k | qpdf_assert_debug(old_og.getGen() == 0); |
1868 | 4.69k | int old_id = old_og.getObj(); |
1869 | 4.69k | int new_stream_id = obj[old_og].renumber; |
1870 | | |
1871 | 4.69k | std::vector<qpdf_offset_t> offsets; |
1872 | 4.69k | qpdf_offset_t first = 0; |
1873 | | |
1874 | | // Generate stream itself. We have to do this in two passes so we can calculate offsets in the |
1875 | | // first pass. |
1876 | 4.69k | std::string stream_buffer_pass1; |
1877 | 4.69k | std::string stream_buffer_pass2; |
1878 | 4.69k | int first_obj = -1; |
1879 | 4.69k | const bool compressed = compress_streams && !qdf_mode; |
1880 | 4.69k | { |
1881 | | // Pass 1 |
1882 | 4.69k | auto pp_ostream_pass1 = pipeline_stack.activate(stream_buffer_pass1); |
1883 | | |
1884 | 4.69k | int count = -1; |
1885 | 69.0k | for (auto const& og: object_stream_to_objects[old_id]) { |
1886 | 69.0k | ++count; |
1887 | 69.0k | int new_o = obj[og].renumber; |
1888 | 69.0k | if (first_obj == -1) { |
1889 | 4.69k | first_obj = new_o; |
1890 | 4.69k | } |
1891 | 69.0k | if (qdf_mode) { |
1892 | 0 | write("%% Object stream: object ").write(new_o).write(", index ").write(count); |
1893 | 0 | if (!suppress_original_object_ids) { |
1894 | 0 | write("; original object ID: ").write(og.getObj()); |
1895 | | // For compatibility, only write the generation if non-zero. While object |
1896 | | // streams only allow objects with generation 0, if we are generating object |
1897 | | // streams, the old object could have a non-zero generation. |
1898 | 0 | if (og.getGen() != 0) { |
1899 | 0 | write(" ").write(og.getGen()); |
1900 | 0 | } |
1901 | 0 | } |
1902 | 0 | write("\n"); |
1903 | 0 | } |
1904 | | |
1905 | 69.0k | offsets.push_back(pipeline->getCount()); |
1906 | | // To avoid double-counting objects being written in object streams for progress |
1907 | | // reporting, decrement in pass 1. |
1908 | 69.0k | indicateProgress(true, false); |
1909 | | |
1910 | 69.0k | QPDFObjectHandle obj_to_write = pdf.getObject(og); |
1911 | 69.0k | if (obj_to_write.isStream()) { |
1912 | | // This condition occurred in a fuzz input. Ideally we should block it at parse |
1913 | | // time, but it's not clear to me how to construct a case for this. |
1914 | 0 | obj_to_write.warn("stream found inside object stream; treating as null"); |
1915 | 0 | obj_to_write = QPDFObjectHandle::newNull(); |
1916 | 0 | } |
1917 | 69.0k | writeObject(obj_to_write, count); |
1918 | | |
1919 | 69.0k | new_obj[new_o].xref = QPDFXRefEntry(new_stream_id, count); |
1920 | 69.0k | } |
1921 | 4.69k | } |
1922 | 4.69k | { |
1923 | | // Adjust offsets to skip over comment before first object |
1924 | 4.69k | first = offsets.at(0); |
1925 | 69.0k | for (auto& iter: offsets) { |
1926 | 69.0k | iter -= first; |
1927 | 69.0k | } |
1928 | | |
1929 | | // Take one pass at writing pairs of numbers so we can get their size information |
1930 | 4.69k | { |
1931 | 4.69k | auto pp_discard = pipeline_stack.activate(true); |
1932 | 4.69k | writeObjectStreamOffsets(offsets, first_obj); |
1933 | 4.69k | first += pipeline->getCount(); |
1934 | 4.69k | } |
1935 | | |
1936 | | // Set up a stream to write the stream data into a buffer. |
1937 | 4.69k | auto pp_ostream = pipeline_stack.activate(stream_buffer_pass2); |
1938 | | |
1939 | 4.69k | writeObjectStreamOffsets(offsets, first_obj); |
1940 | 4.69k | write(stream_buffer_pass1); |
1941 | 4.69k | stream_buffer_pass1.clear(); |
1942 | 4.69k | stream_buffer_pass1.shrink_to_fit(); |
1943 | 4.69k | if (compressed) { |
1944 | 4.68k | stream_buffer_pass2 = pl::pipe<Pl_Flate>(stream_buffer_pass2, Pl_Flate::a_deflate); |
1945 | 4.68k | } |
1946 | 4.69k | } |
1947 | | |
1948 | | // Write the object |
1949 | 4.69k | openObject(new_stream_id); |
1950 | 4.69k | setDataKey(new_stream_id); |
1951 | 4.69k | write("<<").write_qdf("\n ").write(" /Type /ObjStm").write_qdf("\n "); |
1952 | 4.69k | size_t length = stream_buffer_pass2.size(); |
1953 | 4.69k | adjustAESStreamLength(length); |
1954 | 4.69k | write(" /Length ").write(length).write_qdf("\n "); |
1955 | 4.69k | if (compressed) { |
1956 | 4.68k | write(" /Filter /FlateDecode"); |
1957 | 4.68k | } |
1958 | 4.69k | write(" /N ").write(offsets.size()).write_qdf("\n ").write(" /First ").write(first); |
1959 | 4.69k | if (!object.null()) { |
1960 | | // If the original object has an /Extends key, preserve it. |
1961 | 1.54k | QPDFObjectHandle dict = object.getDict(); |
1962 | 1.54k | QPDFObjectHandle extends = dict.getKey("/Extends"); |
1963 | 1.54k | if (extends.isIndirect()) { |
1964 | 287 | write_qdf("\n ").write(" /Extends "); |
1965 | 287 | unparseChild(extends, 1, f_in_ostream); |
1966 | 287 | } |
1967 | 1.54k | } |
1968 | 4.69k | write_qdf("\n").write_no_qdf(" ").write(">>\nstream\n").write_encrypted(stream_buffer_pass2); |
1969 | 4.69k | if (encryption) { |
1970 | 4.54k | QTC::TC("qpdf", "QPDFWriter encrypt object stream"); |
1971 | 4.54k | } |
1972 | 4.69k | write(newline_before_endstream ? "\nendstream" : "endstream"); |
1973 | 4.69k | cur_data_key.clear(); |
1974 | 4.69k | closeObject(new_stream_id); |
1975 | 4.69k | } |
1976 | | |
1977 | | void |
1978 | | QPDFWriter::Members::writeObject(QPDFObjectHandle object, int object_stream_index) |
1979 | 299k | { |
1980 | 299k | QPDFObjGen old_og = object.getObjGen(); |
1981 | | |
1982 | 299k | if (object_stream_index == -1 && old_og.getGen() == 0 && |
1983 | 228k | object_stream_to_objects.contains(old_og.getObj())) { |
1984 | 4.69k | writeObjectStream(object); |
1985 | 4.69k | return; |
1986 | 4.69k | } |
1987 | | |
1988 | 294k | indicateProgress(false, false); |
1989 | 294k | auto new_id = obj[old_og].renumber; |
1990 | 294k | if (qdf_mode) { |
1991 | 0 | if (page_object_to_seq.contains(old_og)) { |
1992 | 0 | write("%% Page ").write(page_object_to_seq[old_og]).write("\n"); |
1993 | 0 | } |
1994 | 0 | if (contents_to_page_seq.contains(old_og)) { |
1995 | 0 | write("%% Contents for page ").write(contents_to_page_seq[old_og]).write("\n"); |
1996 | 0 | } |
1997 | 0 | } |
1998 | 294k | if (object_stream_index == -1) { |
1999 | 225k | if (qdf_mode && !suppress_original_object_ids) { |
2000 | 0 | write("%% Original object ID: ").write(object.getObjGen().unparse(' ')).write("\n"); |
2001 | 0 | } |
2002 | 225k | openObject(new_id); |
2003 | 225k | setDataKey(new_id); |
2004 | 225k | unparseObject(object, 0, 0); |
2005 | 225k | cur_data_key.clear(); |
2006 | 225k | closeObject(new_id); |
2007 | 225k | } else { |
2008 | 69.0k | unparseObject(object, 0, f_in_ostream); |
2009 | 69.0k | write("\n"); |
2010 | 69.0k | } |
2011 | | |
2012 | 294k | if (!direct_stream_lengths && object.isStream()) { |
2013 | 0 | if (qdf_mode) { |
2014 | 0 | if (added_newline) { |
2015 | 0 | write("%QDF: ignore_newline\n"); |
2016 | 0 | } |
2017 | 0 | } |
2018 | 0 | openObject(new_id + 1); |
2019 | 0 | write(cur_stream_length); |
2020 | 0 | closeObject(new_id + 1); |
2021 | 0 | } |
2022 | 294k | } |
2023 | | |
2024 | | std::string |
2025 | | QPDFWriter::Members::getOriginalID1() |
2026 | 24.6k | { |
2027 | 24.6k | QPDFObjectHandle trailer = pdf.getTrailer(); |
2028 | 24.6k | if (trailer.hasKey("/ID")) { |
2029 | 3.72k | return trailer.getKey("/ID").getArrayItem(0).getStringValue(); |
2030 | 20.8k | } else { |
2031 | 20.8k | return ""; |
2032 | 20.8k | } |
2033 | 24.6k | } |
2034 | | |
2035 | | void |
2036 | | QPDFWriter::Members::generateID(bool encrypted) |
2037 | 24.1k | { |
2038 | | // Generate the ID lazily so that we can handle the user's preference to use static or |
2039 | | // deterministic ID generation. |
2040 | | |
2041 | 24.1k | if (!id2.empty()) { |
2042 | 14.9k | return; |
2043 | 14.9k | } |
2044 | | |
2045 | 9.18k | QPDFObjectHandle trailer = pdf.getTrailer(); |
2046 | | |
2047 | 9.18k | std::string result; |
2048 | | |
2049 | 9.18k | if (static_id) { |
2050 | | // For test suite use only... |
2051 | 9.18k | static unsigned char tmp[] = { |
2052 | 9.18k | 0x31, |
2053 | 9.18k | 0x41, |
2054 | 9.18k | 0x59, |
2055 | 9.18k | 0x26, |
2056 | 9.18k | 0x53, |
2057 | 9.18k | 0x58, |
2058 | 9.18k | 0x97, |
2059 | 9.18k | 0x93, |
2060 | 9.18k | 0x23, |
2061 | 9.18k | 0x84, |
2062 | 9.18k | 0x62, |
2063 | 9.18k | 0x64, |
2064 | 9.18k | 0x33, |
2065 | 9.18k | 0x83, |
2066 | 9.18k | 0x27, |
2067 | 9.18k | 0x95, |
2068 | 9.18k | 0x00}; |
2069 | 9.18k | result = reinterpret_cast<char*>(tmp); |
2070 | 9.18k | } else { |
2071 | | // The PDF specification has guidelines for creating IDs, but it states clearly that the |
2072 | | // only thing that's really important is that it is very likely to be unique. We can't |
2073 | | // really follow the guidelines in the spec exactly because we haven't written the file yet. |
2074 | | // This scheme should be fine though. The deterministic ID case uses a digest of a |
2075 | | // sufficient portion of the file's contents such no two non-matching files would match in |
2076 | | // the subsets used for this computation. Note that we explicitly omit the filename from |
2077 | | // the digest calculation for deterministic ID so that the same file converted with qpdf, in |
2078 | | // that case, would have the same ID regardless of the output file's name. |
2079 | |
|
2080 | 0 | std::string seed; |
2081 | 0 | if (deterministic_id) { |
2082 | 0 | if (encrypted) { |
2083 | 0 | throw std::runtime_error( |
2084 | 0 | "QPDFWriter: unable to generated a deterministic ID because the file to be " |
2085 | 0 | "written is encrypted (even though the file may not require a password)"); |
2086 | 0 | } |
2087 | 0 | if (deterministic_id_data.empty()) { |
2088 | 0 | throw std::logic_error( |
2089 | 0 | "INTERNAL ERROR: QPDFWriter::generateID has no data for deterministic ID"); |
2090 | 0 | } |
2091 | 0 | seed += deterministic_id_data; |
2092 | 0 | } else { |
2093 | 0 | seed += std::to_string(QUtil::get_current_time()); |
2094 | 0 | seed += filename; |
2095 | 0 | seed += " "; |
2096 | 0 | } |
2097 | 0 | seed += " QPDF "; |
2098 | 0 | if (trailer.hasKey("/Info")) { |
2099 | 0 | for (auto const& item: trailer.getKey("/Info").as_dictionary()) { |
2100 | 0 | if (item.second.isString()) { |
2101 | 0 | seed += " "; |
2102 | 0 | seed += item.second.getStringValue(); |
2103 | 0 | } |
2104 | 0 | } |
2105 | 0 | } |
2106 | |
|
2107 | 0 | MD5 md5; |
2108 | 0 | md5.encodeString(seed.c_str()); |
2109 | 0 | MD5::Digest digest; |
2110 | 0 | md5.digest(digest); |
2111 | 0 | result = std::string(reinterpret_cast<char*>(digest), sizeof(MD5::Digest)); |
2112 | 0 | } |
2113 | | |
2114 | | // If /ID already exists, follow the spec: use the original first word and generate a new second |
2115 | | // word. Otherwise, we'll use the generated ID for both. |
2116 | | |
2117 | 9.18k | id2 = result; |
2118 | | // Note: keep /ID from old file even if --static-id was given. |
2119 | 9.18k | id1 = getOriginalID1(); |
2120 | 9.18k | if (id1.empty()) { |
2121 | 7.82k | id1 = id2; |
2122 | 7.82k | } |
2123 | 9.18k | } |
2124 | | |
2125 | | void |
2126 | | QPDFWriter::Members::initializeSpecialStreams() |
2127 | 0 | { |
2128 | | // Mark all page content streams in case we are filtering or normalizing. |
2129 | 0 | std::vector<QPDFObjectHandle> pages = pdf.getAllPages(); |
2130 | 0 | int num = 0; |
2131 | 0 | for (auto& page: pages) { |
2132 | 0 | page_object_to_seq[page.getObjGen()] = ++num; |
2133 | 0 | QPDFObjectHandle contents = page.getKey("/Contents"); |
2134 | 0 | std::vector<QPDFObjGen> contents_objects; |
2135 | 0 | if (contents.isArray()) { |
2136 | 0 | int n = static_cast<int>(contents.size()); |
2137 | 0 | for (int i = 0; i < n; ++i) { |
2138 | 0 | contents_objects.push_back(contents.getArrayItem(i).getObjGen()); |
2139 | 0 | } |
2140 | 0 | } else if (contents.isStream()) { |
2141 | 0 | contents_objects.push_back(contents.getObjGen()); |
2142 | 0 | } |
2143 | |
|
2144 | 0 | for (auto const& c: contents_objects) { |
2145 | 0 | contents_to_page_seq[c] = num; |
2146 | 0 | normalized_streams.insert(c); |
2147 | 0 | } |
2148 | 0 | } |
2149 | 0 | } |
2150 | | |
2151 | | void |
2152 | | QPDFWriter::Members::preserveObjectStreams() |
2153 | 9.16k | { |
2154 | 9.16k | auto const& xref = getXRefTable(); |
2155 | | // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object |
2156 | | // streams out of old objects that have generation numbers greater than zero. However in an |
2157 | | // existing PDF, all object stream objects and all objects in them must have generation 0 |
2158 | | // because the PDF spec does not provide any way to do otherwise. This code filters out objects |
2159 | | // that are not allowed to be in object streams. In addition to removing objects that were |
2160 | | // erroneously included in object streams in the source PDF, it also prevents unreferenced |
2161 | | // objects from being included. |
2162 | 9.16k | auto end = xref.cend(); |
2163 | 9.16k | obj.streams_empty = true; |
2164 | 9.16k | if (preserve_unreferenced_objects) { |
2165 | 0 | for (auto iter = xref.cbegin(); iter != end; ++iter) { |
2166 | 0 | if (iter->second.getType() == 2) { |
2167 | | // Pdf contains object streams. |
2168 | 0 | obj.streams_empty = false; |
2169 | 0 | obj[iter->first].object_stream = iter->second.getObjStreamNumber(); |
2170 | 0 | } |
2171 | 0 | } |
2172 | 9.16k | } else { |
2173 | | // Start by scanning for first compressed object in case we don't have any object streams to |
2174 | | // process. |
2175 | 97.0k | for (auto iter = xref.cbegin(); iter != end; ++iter) { |
2176 | 89.1k | if (iter->second.getType() == 2) { |
2177 | | // Pdf contains object streams. |
2178 | 1.29k | obj.streams_empty = false; |
2179 | 1.29k | auto eligible = getCompressibleObjSet(); |
2180 | | // The object pointed to by iter may be a previous generation, in which case it is |
2181 | | // removed by getCompressibleObjSet. We need to restart the loop (while the object |
2182 | | // table may contain multiple generations of an object). |
2183 | 244k | for (iter = xref.cbegin(); iter != end; ++iter) { |
2184 | 243k | if (iter->second.getType() == 2) { |
2185 | 220k | auto id = static_cast<size_t>(iter->first.getObj()); |
2186 | 220k | if (id < eligible.size() && eligible[id]) { |
2187 | 45.5k | obj[iter->first].object_stream = iter->second.getObjStreamNumber(); |
2188 | 175k | } else { |
2189 | 175k | QTC::TC("qpdf", "QPDFWriter exclude from object stream"); |
2190 | 175k | } |
2191 | 220k | } |
2192 | 243k | } |
2193 | 1.29k | return; |
2194 | 1.29k | } |
2195 | 89.1k | } |
2196 | 9.16k | } |
2197 | 9.16k | } |
2198 | | |
2199 | | void |
2200 | | QPDFWriter::Members::generateObjectStreams() |
2201 | 0 | { |
2202 | | // Basic strategy: make a list of objects that can go into an object stream. Then figure out |
2203 | | // how many object streams are needed so that we can distribute objects approximately evenly |
2204 | | // without having any object stream exceed 100 members. We don't have to worry about linearized |
2205 | | // files here -- if the file is linearized, we take care of excluding things that aren't allowed |
2206 | | // here later. |
2207 | | |
2208 | | // This code doesn't do anything with /Extends. |
2209 | |
|
2210 | 0 | std::vector<QPDFObjGen> eligible = getCompressibleObjGens(); |
2211 | 0 | size_t n_object_streams = (eligible.size() + 99U) / 100U; |
2212 | |
|
2213 | 0 | initializeTables(2U * n_object_streams); |
2214 | 0 | if (n_object_streams == 0) { |
2215 | 0 | obj.streams_empty = true; |
2216 | 0 | return; |
2217 | 0 | } |
2218 | 0 | size_t n_per = eligible.size() / n_object_streams; |
2219 | 0 | if (n_per * n_object_streams < eligible.size()) { |
2220 | 0 | ++n_per; |
2221 | 0 | } |
2222 | 0 | unsigned int n = 0; |
2223 | 0 | int cur_ostream = pdf.newIndirectNull().getObjectID(); |
2224 | 0 | for (auto const& item: eligible) { |
2225 | 0 | if (n == n_per) { |
2226 | 0 | n = 0; |
2227 | | // Construct a new null object as the "original" object stream. The rest of the code |
2228 | | // knows that this means we're creating the object stream from scratch. |
2229 | 0 | cur_ostream = pdf.newIndirectNull().getObjectID(); |
2230 | 0 | } |
2231 | 0 | auto& o = obj[item]; |
2232 | 0 | o.object_stream = cur_ostream; |
2233 | 0 | o.gen = item.getGen(); |
2234 | 0 | ++n; |
2235 | 0 | } |
2236 | 0 | } |
2237 | | |
2238 | | Dictionary |
2239 | | QPDFWriter::Members::trimmed_trailer() |
2240 | 30.4k | { |
2241 | | // Remove keys from the trailer that necessarily have to be replaced when writing the file. |
2242 | | |
2243 | 30.4k | Dictionary trailer = pdf.getTrailer().unsafeShallowCopy(); |
2244 | | |
2245 | | // Remove encryption keys |
2246 | 30.4k | trailer.erase("/ID"); |
2247 | 30.4k | trailer.erase("/Encrypt"); |
2248 | | |
2249 | | // Remove modification information |
2250 | 30.4k | trailer.erase("/Prev"); |
2251 | | |
2252 | | // Remove all trailer keys that potentially come from a cross-reference stream |
2253 | 30.4k | trailer.erase("/Index"); |
2254 | 30.4k | trailer.erase("/W"); |
2255 | 30.4k | trailer.erase("/Length"); |
2256 | 30.4k | trailer.erase("/Filter"); |
2257 | 30.4k | trailer.erase("/DecodeParms"); |
2258 | 30.4k | trailer.erase("/Type"); |
2259 | 30.4k | trailer.erase("/XRefStm"); |
2260 | | |
2261 | 30.4k | return trailer; |
2262 | 30.4k | } |
2263 | | |
2264 | | // Make document extension level information direct as required by the spec. |
2265 | | void |
2266 | | QPDFWriter::Members::prepareFileForWrite() |
2267 | 9.09k | { |
2268 | 9.09k | pdf.fixDanglingReferences(); |
2269 | 9.09k | auto root = pdf.getRoot(); |
2270 | 9.09k | auto oh = root.getKey("/Extensions"); |
2271 | 9.09k | if (oh.isDictionary()) { |
2272 | 389 | const bool extensions_indirect = oh.isIndirect(); |
2273 | 389 | if (extensions_indirect) { |
2274 | 118 | QTC::TC("qpdf", "QPDFWriter make Extensions direct"); |
2275 | 118 | oh = root.replaceKeyAndGetNew("/Extensions", oh.shallowCopy()); |
2276 | 118 | } |
2277 | 389 | if (oh.hasKey("/ADBE")) { |
2278 | 237 | auto adbe = oh.getKey("/ADBE"); |
2279 | 237 | if (adbe.isIndirect()) { |
2280 | 161 | QTC::TC("qpdf", "QPDFWriter make ADBE direct", extensions_indirect ? 0 : 1); |
2281 | 161 | adbe.makeDirect(); |
2282 | 161 | oh.replaceKey("/ADBE", adbe); |
2283 | 161 | } |
2284 | 237 | } |
2285 | 389 | } |
2286 | 9.09k | } |
2287 | | |
2288 | | void |
2289 | | QPDFWriter::Members::initializeTables(size_t extra) |
2290 | 9.16k | { |
2291 | 9.16k | auto size = QIntC::to_size(tableSize() + 100) + extra; |
2292 | 9.16k | obj.resize(size); |
2293 | 9.16k | new_obj.resize(size); |
2294 | 9.16k | } |
2295 | | |
2296 | | void |
2297 | | QPDFWriter::Members::doWriteSetup() |
2298 | 9.16k | { |
2299 | 9.16k | if (did_write_setup) { |
2300 | 0 | return; |
2301 | 0 | } |
2302 | 9.16k | did_write_setup = true; |
2303 | | |
2304 | | // Do preliminary setup |
2305 | | |
2306 | 9.16k | if (linearized) { |
2307 | 9.16k | qdf_mode = false; |
2308 | 9.16k | } |
2309 | | |
2310 | 9.16k | if (pclm) { |
2311 | 0 | stream_decode_level = qpdf_dl_none; |
2312 | 0 | compress_streams = false; |
2313 | 0 | encryption = nullptr; |
2314 | 0 | } |
2315 | | |
2316 | 9.16k | if (qdf_mode) { |
2317 | 0 | if (!normalize_content_set) { |
2318 | 0 | normalize_content = true; |
2319 | 0 | } |
2320 | 0 | if (!compress_streams_set) { |
2321 | 0 | compress_streams = false; |
2322 | 0 | } |
2323 | 0 | if (!stream_decode_level_set) { |
2324 | 0 | stream_decode_level = qpdf_dl_generalized; |
2325 | 0 | } |
2326 | 0 | } |
2327 | | |
2328 | 9.16k | if (encryption) { |
2329 | | // Encryption has been explicitly set |
2330 | 9.16k | preserve_encryption = false; |
2331 | 9.16k | } else if (normalize_content || pclm || qdf_mode) { |
2332 | | // Encryption makes looking at contents pretty useless. If the user explicitly encrypted |
2333 | | // though, we still obey that. |
2334 | 0 | preserve_encryption = false; |
2335 | 0 | } |
2336 | | |
2337 | 9.16k | if (preserve_encryption) { |
2338 | 0 | copyEncryptionParameters(pdf); |
2339 | 0 | } |
2340 | | |
2341 | 9.16k | if (!forced_pdf_version.empty()) { |
2342 | 0 | int major = 0; |
2343 | 0 | int minor = 0; |
2344 | 0 | parseVersion(forced_pdf_version, major, minor); |
2345 | 0 | disableIncompatibleEncryption(major, minor, forced_extension_level); |
2346 | 0 | if (compareVersions(major, minor, 1, 5) < 0) { |
2347 | 0 | object_stream_mode = qpdf_o_disable; |
2348 | 0 | } |
2349 | 0 | } |
2350 | | |
2351 | 9.16k | if (qdf_mode || normalize_content) { |
2352 | 0 | initializeSpecialStreams(); |
2353 | 0 | } |
2354 | | |
2355 | 9.16k | if (qdf_mode) { |
2356 | | // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing |
2357 | | // recomputed stream length data. Certain streams such as object streams, xref streams, and |
2358 | | // hint streams always get direct stream lengths. |
2359 | 0 | direct_stream_lengths = false; |
2360 | 0 | } |
2361 | | |
2362 | 9.16k | switch (object_stream_mode) { |
2363 | 0 | case qpdf_o_disable: |
2364 | 0 | initializeTables(); |
2365 | 0 | obj.streams_empty = true; |
2366 | 0 | break; |
2367 | | |
2368 | 9.16k | case qpdf_o_preserve: |
2369 | 9.16k | initializeTables(); |
2370 | 9.16k | preserveObjectStreams(); |
2371 | 9.16k | break; |
2372 | | |
2373 | 0 | case qpdf_o_generate: |
2374 | 0 | generateObjectStreams(); |
2375 | 0 | break; |
2376 | | |
2377 | | // no default so gcc will warn for missing case tag |
2378 | 9.16k | } |
2379 | | |
2380 | 9.14k | if (!obj.streams_empty) { |
2381 | 1.27k | if (linearized) { |
2382 | | // Page dictionaries are not allowed to be compressed objects. |
2383 | 2.00k | for (auto& page: pdf.getAllPages()) { |
2384 | 2.00k | if (obj[page].object_stream > 0) { |
2385 | 145 | obj[page].object_stream = 0; |
2386 | 145 | } |
2387 | 2.00k | } |
2388 | 1.27k | } |
2389 | | |
2390 | 1.27k | if (linearized || encryption) { |
2391 | | // The document catalog is not allowed to be compressed in linearized files either. It |
2392 | | // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to |
2393 | | // handle encrypted files with compressed document catalogs, so we disable them in that |
2394 | | // case as well. |
2395 | 1.27k | if (obj[root_og].object_stream > 0) { |
2396 | 25 | obj[root_og].object_stream = 0; |
2397 | 25 | } |
2398 | 1.27k | } |
2399 | | |
2400 | | // Generate reverse mapping from object stream to objects |
2401 | 1.45M | obj.forEach([this](auto id, auto const& item) -> void { |
2402 | 1.45M | if (item.object_stream > 0) { |
2403 | 45.1k | auto& vec = object_stream_to_objects[item.object_stream]; |
2404 | 45.1k | vec.emplace_back(id, item.gen); |
2405 | 45.1k | if (max_ostream_index < vec.size()) { |
2406 | 18.3k | ++max_ostream_index; |
2407 | 18.3k | } |
2408 | 45.1k | } |
2409 | 1.45M | }); |
2410 | 1.27k | --max_ostream_index; |
2411 | | |
2412 | 1.27k | if (object_stream_to_objects.empty()) { |
2413 | 384 | obj.streams_empty = true; |
2414 | 891 | } else { |
2415 | 891 | w.setMinimumPDFVersion("1.5"); |
2416 | 891 | } |
2417 | 1.27k | } |
2418 | | |
2419 | 9.14k | setMinimumPDFVersion(pdf.getPDFVersion(), pdf.getExtensionLevel()); |
2420 | 9.14k | final_pdf_version = min_pdf_version; |
2421 | 9.14k | final_extension_level = min_extension_level; |
2422 | 9.14k | if (!forced_pdf_version.empty()) { |
2423 | 0 | final_pdf_version = forced_pdf_version; |
2424 | 0 | final_extension_level = forced_extension_level; |
2425 | 0 | } |
2426 | 9.14k | } |
2427 | | |
2428 | | void |
2429 | | QPDFWriter::write() |
2430 | 9.16k | { |
2431 | 9.16k | m->write(); |
2432 | 9.16k | } |
2433 | | |
2434 | | void |
2435 | | QPDFWriter::Members::write() |
2436 | 9.16k | { |
2437 | 9.16k | doWriteSetup(); |
2438 | | |
2439 | | // Set up progress reporting. For linearized files, we write two passes. events_expected is an |
2440 | | // approximation, but it's good enough for progress reporting, which is mostly a guess anyway. |
2441 | 9.16k | events_expected = QIntC::to_int(pdf.getObjectCount() * (linearized ? 2 : 1)); |
2442 | | |
2443 | 9.16k | prepareFileForWrite(); |
2444 | | |
2445 | 9.16k | if (linearized) { |
2446 | 9.07k | writeLinearized(); |
2447 | 9.07k | } else { |
2448 | 93 | writeStandard(); |
2449 | 93 | } |
2450 | | |
2451 | 9.16k | pipeline->finish(); |
2452 | 9.16k | if (close_file) { |
2453 | 0 | fclose(file); |
2454 | 0 | } |
2455 | 9.16k | file = nullptr; |
2456 | 9.16k | if (buffer_pipeline) { |
2457 | 0 | output_buffer = buffer_pipeline->getBuffer(); |
2458 | 0 | buffer_pipeline = nullptr; |
2459 | 0 | } |
2460 | 9.16k | indicateProgress(false, true); |
2461 | 9.16k | } |
2462 | | |
2463 | | QPDFObjGen |
2464 | | QPDFWriter::getRenumberedObjGen(QPDFObjGen og) |
2465 | 0 | { |
2466 | 0 | return {m->obj[og].renumber, 0}; |
2467 | 0 | } |
2468 | | |
2469 | | std::map<QPDFObjGen, QPDFXRefEntry> |
2470 | | QPDFWriter::getWrittenXRefTable() |
2471 | 0 | { |
2472 | 0 | return m->getWrittenXRefTable(); |
2473 | 0 | } |
2474 | | |
2475 | | std::map<QPDFObjGen, QPDFXRefEntry> |
2476 | | QPDFWriter::Members::getWrittenXRefTable() |
2477 | 0 | { |
2478 | 0 | std::map<QPDFObjGen, QPDFXRefEntry> result; |
2479 | |
|
2480 | 0 | auto it = result.begin(); |
2481 | 0 | new_obj.forEach([&it, &result](auto id, auto const& item) -> void { |
2482 | 0 | if (item.xref.getType() != 0) { |
2483 | 0 | it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref); |
2484 | 0 | } |
2485 | 0 | }); |
2486 | 0 | return result; |
2487 | 0 | } |
2488 | | |
2489 | | void |
2490 | | QPDFWriter::Members::enqueuePart(std::vector<QPDFObjectHandle>& part) |
2491 | 43.0k | { |
2492 | 133k | for (auto const& oh: part) { |
2493 | 133k | enqueueObject(oh); |
2494 | 133k | } |
2495 | 43.0k | } |
2496 | | |
2497 | | void |
2498 | | QPDFWriter::Members::writeEncryptionDictionary() |
2499 | 15.3k | { |
2500 | 15.3k | encryption_dict_objid = openObject(encryption_dict_objid); |
2501 | 15.3k | auto& enc = *encryption; |
2502 | 15.3k | auto const V = enc.getV(); |
2503 | | |
2504 | 15.3k | write("<<"); |
2505 | 15.3k | if (V >= 4) { |
2506 | 15.3k | write(" /CF << /StdCF << /AuthEvent /DocOpen /CFM "); |
2507 | 15.3k | write(encrypt_use_aes ? ((V < 5) ? "/AESV2" : "/AESV3") : "/V2"); |
2508 | | // The PDF spec says the /Length key is optional, but the PDF previewer on some versions of |
2509 | | // MacOS won't open encrypted files without it. |
2510 | 15.3k | write((V < 5) ? " /Length 16 >> >>" : " /Length 32 >> >>"); |
2511 | 15.3k | if (!encryption->getEncryptMetadata()) { |
2512 | 0 | write(" /EncryptMetadata false"); |
2513 | 0 | } |
2514 | 15.3k | } |
2515 | 15.3k | write(" /Filter /Standard /Length ").write(enc.getLengthBytes() * 8); |
2516 | 15.3k | write(" /O ").write_string(enc.getO(), true); |
2517 | 15.3k | if (V >= 4) { |
2518 | 15.3k | write(" /OE ").write_string(enc.getOE(), true); |
2519 | 15.3k | } |
2520 | 15.3k | write(" /P ").write(enc.getP()); |
2521 | 15.3k | if (V >= 5) { |
2522 | 15.3k | write(" /Perms ").write_string(enc.getPerms(), true); |
2523 | 15.3k | } |
2524 | 15.3k | write(" /R ").write(enc.getR()); |
2525 | | |
2526 | 15.3k | if (V >= 4) { |
2527 | 15.3k | write(" /StmF /StdCF /StrF /StdCF"); |
2528 | 15.3k | } |
2529 | 15.3k | write(" /U ").write_string(enc.getU(), true); |
2530 | 15.3k | if (V >= 4) { |
2531 | 15.3k | write(" /UE ").write_string(enc.getUE(), true); |
2532 | 15.3k | } |
2533 | 15.3k | write(" /V ").write(enc.getV()).write(" >>"); |
2534 | 15.3k | closeObject(encryption_dict_objid); |
2535 | 15.3k | } |
2536 | | |
2537 | | std::string |
2538 | | QPDFWriter::getFinalVersion() |
2539 | 0 | { |
2540 | 0 | m->doWriteSetup(); |
2541 | 0 | return m->final_pdf_version; |
2542 | 0 | } |
2543 | | |
2544 | | void |
2545 | | QPDFWriter::Members::writeHeader() |
2546 | 15.4k | { |
2547 | 15.4k | write("%PDF-").write(final_pdf_version); |
2548 | 15.4k | if (pclm) { |
2549 | | // PCLm version |
2550 | 0 | write("\n%PCLm 1.0\n"); |
2551 | 15.4k | } else { |
2552 | | // This string of binary characters would not be valid UTF-8, so it really should be treated |
2553 | | // as binary. |
2554 | 15.4k | write("\n%\xbf\xf7\xa2\xfe\n"); |
2555 | 15.4k | } |
2556 | 15.4k | write_qdf("%QDF-1.0\n\n"); |
2557 | | |
2558 | | // Note: do not write extra header text here. Linearized PDFs must include the entire |
2559 | | // linearization parameter dictionary within the first 1024 characters of the PDF file, so for |
2560 | | // linearized files, we have to write extra header text after the linearization parameter |
2561 | | // dictionary. |
2562 | 15.4k | } |
2563 | | |
2564 | | void |
2565 | | QPDFWriter::Members::writeHintStream(int hint_id) |
2566 | 7.50k | { |
2567 | 7.50k | std::string hint_buffer; |
2568 | 7.50k | int S = 0; |
2569 | 7.50k | int O = 0; |
2570 | 7.50k | bool compressed = compress_streams; |
2571 | 7.50k | generateHintStream(new_obj, obj, hint_buffer, S, O, compressed); |
2572 | | |
2573 | 7.50k | openObject(hint_id); |
2574 | 7.50k | setDataKey(hint_id); |
2575 | | |
2576 | 7.50k | size_t hlen = hint_buffer.size(); |
2577 | | |
2578 | 7.50k | write("<< "); |
2579 | 7.50k | if (compressed) { |
2580 | 7.50k | write("/Filter /FlateDecode "); |
2581 | 7.50k | } |
2582 | 7.50k | write("/S ").write(S); |
2583 | 7.50k | if (O) { |
2584 | 249 | write(" /O ").write(O); |
2585 | 249 | } |
2586 | 7.50k | adjustAESStreamLength(hlen); |
2587 | 7.50k | write(" /Length ").write(hlen); |
2588 | 7.50k | write(" >>\nstream\n").write_encrypted(hint_buffer); |
2589 | | |
2590 | 7.50k | if (encryption) { |
2591 | 7.50k | QTC::TC("qpdf", "QPDFWriter encrypted hint stream"); |
2592 | 7.50k | } |
2593 | | |
2594 | 7.50k | write(hint_buffer.empty() || hint_buffer.back() != '\n' ? "\nendstream" : "endstream"); |
2595 | 7.50k | closeObject(hint_id); |
2596 | 7.50k | } |
2597 | | |
2598 | | qpdf_offset_t |
2599 | | QPDFWriter::Members::writeXRefTable(trailer_e which, int first, int last, int size) |
2600 | 0 | { |
2601 | | // There are too many extra arguments to replace overloaded function with defaults in the header |
2602 | | // file...too much risk of leaving something off. |
2603 | 0 | return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0); |
2604 | 0 | } |
2605 | | |
2606 | | qpdf_offset_t |
2607 | | QPDFWriter::Members::writeXRefTable( |
2608 | | trailer_e which, |
2609 | | int first, |
2610 | | int last, |
2611 | | int size, |
2612 | | qpdf_offset_t prev, |
2613 | | bool suppress_offsets, |
2614 | | int hint_id, |
2615 | | qpdf_offset_t hint_offset, |
2616 | | qpdf_offset_t hint_length, |
2617 | | int linearization_pass) |
2618 | 27.9k | { |
2619 | 27.9k | write("xref\n").write(first).write(" ").write(last - first + 1); |
2620 | 27.9k | qpdf_offset_t space_before_zero = pipeline->getCount(); |
2621 | 27.9k | write("\n"); |
2622 | 27.9k | if (first == 0) { |
2623 | 13.8k | write("0000000000 65535 f \n"); |
2624 | 13.8k | ++first; |
2625 | 13.8k | } |
2626 | 240k | for (int i = first; i <= last; ++i) { |
2627 | 212k | qpdf_offset_t offset = 0; |
2628 | 212k | if (!suppress_offsets) { |
2629 | 139k | offset = new_obj[i].xref.getOffset(); |
2630 | 139k | if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) { |
2631 | 37.4k | offset += hint_length; |
2632 | 37.4k | } |
2633 | 139k | } |
2634 | 212k | write(QUtil::int_to_string(offset, 10)).write(" 00000 n \n"); |
2635 | 212k | } |
2636 | 27.9k | writeTrailer(which, size, false, prev, linearization_pass); |
2637 | 27.9k | write("\n"); |
2638 | 27.9k | return space_before_zero; |
2639 | 27.9k | } |
2640 | | |
2641 | | qpdf_offset_t |
2642 | | QPDFWriter::Members::writeXRefStream( |
2643 | | int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size) |
2644 | 0 | { |
2645 | | // There are too many extra arguments to replace overloaded function with defaults in the header |
2646 | | // file...too much risk of leaving something off. |
2647 | 0 | return writeXRefStream( |
2648 | 0 | objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false, 0); |
2649 | 0 | } |
2650 | | |
2651 | | qpdf_offset_t |
2652 | | QPDFWriter::Members::writeXRefStream( |
2653 | | int xref_id, |
2654 | | int max_id, |
2655 | | qpdf_offset_t max_offset, |
2656 | | trailer_e which, |
2657 | | int first, |
2658 | | int last, |
2659 | | int size, |
2660 | | qpdf_offset_t prev, |
2661 | | int hint_id, |
2662 | | qpdf_offset_t hint_offset, |
2663 | | qpdf_offset_t hint_length, |
2664 | | bool skip_compression, |
2665 | | int linearization_pass) |
2666 | 2.49k | { |
2667 | 2.49k | qpdf_offset_t xref_offset = pipeline->getCount(); |
2668 | 2.49k | qpdf_offset_t space_before_zero = xref_offset - 1; |
2669 | | |
2670 | | // field 1 contains offsets and object stream identifiers |
2671 | 2.49k | unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id)); |
2672 | | |
2673 | | // field 2 contains object stream indices |
2674 | 2.49k | unsigned int f2_size = bytesNeeded(QIntC::to_longlong(max_ostream_index)); |
2675 | | |
2676 | 2.49k | unsigned int esize = 1 + f1_size + f2_size; |
2677 | | |
2678 | | // Must store in xref table in advance of writing the actual data rather than waiting for |
2679 | | // openObject to do it. |
2680 | 2.49k | new_obj[xref_id].xref = QPDFXRefEntry(pipeline->getCount()); |
2681 | | |
2682 | 2.49k | std::string xref_data; |
2683 | 2.49k | const bool compressed = compress_streams && !qdf_mode; |
2684 | 2.49k | { |
2685 | 2.49k | auto pp_xref = pipeline_stack.activate(xref_data); |
2686 | | |
2687 | 141k | for (int i = first; i <= last; ++i) { |
2688 | 138k | QPDFXRefEntry& e = new_obj[i].xref; |
2689 | 138k | switch (e.getType()) { |
2690 | 30.5k | case 0: |
2691 | 30.5k | writeBinary(0, 1); |
2692 | 30.5k | writeBinary(0, f1_size); |
2693 | 30.5k | writeBinary(0, f2_size); |
2694 | 30.5k | break; |
2695 | | |
2696 | 48.4k | case 1: |
2697 | 48.4k | { |
2698 | 48.4k | qpdf_offset_t offset = e.getOffset(); |
2699 | 48.4k | if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) { |
2700 | 11.5k | offset += hint_length; |
2701 | 11.5k | } |
2702 | 48.4k | writeBinary(1, 1); |
2703 | 48.4k | writeBinary(QIntC::to_ulonglong(offset), f1_size); |
2704 | 48.4k | writeBinary(0, f2_size); |
2705 | 48.4k | } |
2706 | 48.4k | break; |
2707 | | |
2708 | 59.5k | case 2: |
2709 | 59.5k | writeBinary(2, 1); |
2710 | 59.5k | writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size); |
2711 | 59.5k | writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size); |
2712 | 59.5k | break; |
2713 | | |
2714 | 0 | default: |
2715 | 0 | throw std::logic_error("invalid type writing xref stream"); |
2716 | 0 | break; |
2717 | 138k | } |
2718 | 138k | } |
2719 | 2.49k | } |
2720 | | |
2721 | 2.49k | if (compressed) { |
2722 | 2.49k | xref_data = pl::pipe<Pl_PNGFilter>(xref_data, Pl_PNGFilter::a_encode, esize); |
2723 | 2.49k | if (!skip_compression) { |
2724 | | // Write the stream dictionary for compression but don't actually compress. This |
2725 | | // helps us with computation of padding for pass 1 of linearization. |
2726 | 1.16k | xref_data = pl::pipe<Pl_Flate>(xref_data, Pl_Flate::a_deflate); |
2727 | 1.16k | } |
2728 | 2.49k | } |
2729 | | |
2730 | 2.49k | openObject(xref_id); |
2731 | 2.49k | write("<<").write_qdf("\n ").write(" /Type /XRef").write_qdf("\n "); |
2732 | 2.49k | write(" /Length ").write(xref_data.size()); |
2733 | 2.49k | if (compressed) { |
2734 | 2.49k | write_qdf("\n ").write(" /Filter /FlateDecode").write_qdf("\n "); |
2735 | 2.49k | write(" /DecodeParms << /Columns ").write(esize).write(" /Predictor 12 >>"); |
2736 | 2.49k | } |
2737 | 2.49k | write_qdf("\n ").write(" /W [ 1 ").write(f1_size).write(" ").write(f2_size).write(" ]"); |
2738 | 2.49k | if (!(first == 0 && last == (size - 1))) { |
2739 | 1.32k | write(" /Index [ ").write(first).write(" ").write(last - first + 1).write(" ]"); |
2740 | 1.32k | } |
2741 | 2.49k | writeTrailer(which, size, true, prev, linearization_pass); |
2742 | 2.49k | write("\nstream\n").write(xref_data).write("\nendstream"); |
2743 | 2.49k | closeObject(xref_id); |
2744 | 2.49k | return space_before_zero; |
2745 | 2.49k | } |
2746 | | |
2747 | | size_t |
2748 | | QPDFWriter::Members::calculateXrefStreamPadding(qpdf_offset_t xref_bytes) |
2749 | 1.32k | { |
2750 | | // This routine is called right after a linearization first pass xref stream has been written |
2751 | | // without compression. Calculate the amount of padding that would be required in the worst |
2752 | | // case, assuming the number of uncompressed bytes remains the same. The worst case for zlib is |
2753 | | // that the output is larger than the input by 6 bytes plus 5 bytes per 16K, and then we'll add |
2754 | | // 10 extra bytes for number length increases. |
2755 | | |
2756 | 1.32k | return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384))); |
2757 | 1.32k | } |
2758 | | |
2759 | | void |
2760 | | QPDFWriter::Members::writeLinearized() |
2761 | 9.07k | { |
2762 | | // Optimize file and enqueue objects in order |
2763 | | |
2764 | 9.07k | std::map<int, int> stream_cache; |
2765 | | |
2766 | 45.5k | auto skip_stream_parameters = [this, &stream_cache](QPDFObjectHandle& stream) { |
2767 | 45.5k | if (auto& result = stream_cache[stream.getObjectID()]) { |
2768 | 24.2k | return result; |
2769 | 24.2k | } else { |
2770 | 21.2k | return result = will_filter_stream(stream) ? 2 : 1; |
2771 | 21.2k | } |
2772 | 45.5k | }; |
2773 | | |
2774 | 9.07k | optimize(obj, skip_stream_parameters); |
2775 | | |
2776 | 9.07k | std::vector<QPDFObjectHandle> part4; |
2777 | 9.07k | std::vector<QPDFObjectHandle> part6; |
2778 | 9.07k | std::vector<QPDFObjectHandle> part7; |
2779 | 9.07k | std::vector<QPDFObjectHandle> part8; |
2780 | 9.07k | std::vector<QPDFObjectHandle> part9; |
2781 | 9.07k | getLinearizedParts(obj, part4, part6, part7, part8, part9); |
2782 | | |
2783 | | // Object number sequence: |
2784 | | // |
2785 | | // second half |
2786 | | // second half uncompressed objects |
2787 | | // second half xref stream, if any |
2788 | | // second half compressed objects |
2789 | | // first half |
2790 | | // linearization dictionary |
2791 | | // first half xref stream, if any |
2792 | | // part 4 uncompresesd objects |
2793 | | // encryption dictionary, if any |
2794 | | // hint stream |
2795 | | // part 6 uncompressed objects |
2796 | | // first half compressed objects |
2797 | | // |
2798 | | |
2799 | | // Second half objects |
2800 | 9.07k | int second_half_uncompressed = QIntC::to_int(part7.size() + part8.size() + part9.size()); |
2801 | 9.07k | int second_half_first_obj = 1; |
2802 | 9.07k | int after_second_half = 1 + second_half_uncompressed; |
2803 | 9.07k | next_objid = after_second_half; |
2804 | 9.07k | int second_half_xref = 0; |
2805 | 9.07k | bool need_xref_stream = !obj.streams_empty; |
2806 | 9.07k | if (need_xref_stream) { |
2807 | 843 | second_half_xref = next_objid++; |
2808 | 843 | } |
2809 | | // Assign numbers to all compressed objects in the second half. |
2810 | 9.07k | std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9}; |
2811 | 35.2k | for (int i = 0; i < 3; ++i) { |
2812 | 56.7k | for (auto const& oh: *vecs2[i]) { |
2813 | 56.7k | assignCompressedObjectNumbers(oh.getObjGen()); |
2814 | 56.7k | } |
2815 | 26.1k | } |
2816 | 9.07k | int second_half_end = next_objid - 1; |
2817 | 9.07k | int second_trailer_size = next_objid; |
2818 | | |
2819 | | // First half objects |
2820 | 9.07k | int first_half_start = next_objid; |
2821 | 9.07k | int lindict_id = next_objid++; |
2822 | 9.07k | int first_half_xref = 0; |
2823 | 9.07k | if (need_xref_stream) { |
2824 | 843 | first_half_xref = next_objid++; |
2825 | 843 | } |
2826 | 9.07k | int part4_first_obj = next_objid; |
2827 | 9.07k | next_objid += QIntC::to_int(part4.size()); |
2828 | 9.07k | int after_part4 = next_objid; |
2829 | 9.07k | if (encryption) { |
2830 | 8.71k | encryption_dict_objid = next_objid++; |
2831 | 8.71k | } |
2832 | 9.07k | int hint_id = next_objid++; |
2833 | 9.07k | int part6_first_obj = next_objid; |
2834 | 9.07k | next_objid += QIntC::to_int(part6.size()); |
2835 | 9.07k | int after_part6 = next_objid; |
2836 | | // Assign numbers to all compressed objects in the first half |
2837 | 9.07k | std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6}; |
2838 | 26.4k | for (int i = 0; i < 2; ++i) { |
2839 | 76.9k | for (auto const& oh: *vecs1[i]) { |
2840 | 76.9k | assignCompressedObjectNumbers(oh.getObjGen()); |
2841 | 76.9k | } |
2842 | 17.4k | } |
2843 | 9.07k | int first_half_end = next_objid - 1; |
2844 | 9.07k | int first_trailer_size = next_objid; |
2845 | | |
2846 | 9.07k | int part4_end_marker = part4.back().getObjectID(); |
2847 | 9.07k | int part6_end_marker = part6.back().getObjectID(); |
2848 | 9.07k | qpdf_offset_t space_before_zero = 0; |
2849 | 9.07k | qpdf_offset_t file_size = 0; |
2850 | 9.07k | qpdf_offset_t part6_end_offset = 0; |
2851 | 9.07k | qpdf_offset_t first_half_max_obj_offset = 0; |
2852 | 9.07k | qpdf_offset_t second_xref_offset = 0; |
2853 | 9.07k | qpdf_offset_t first_xref_end = 0; |
2854 | 9.07k | qpdf_offset_t second_xref_end = 0; |
2855 | | |
2856 | 9.07k | next_objid = part4_first_obj; |
2857 | 9.07k | enqueuePart(part4); |
2858 | 9.07k | if (next_objid != after_part4) { |
2859 | | // This can happen with very botched files as in the fuzzer test. There are likely some |
2860 | | // faulty assumptions in calculateLinearizationData |
2861 | 3 | throw std::runtime_error("error encountered after writing part 4 of linearized data"); |
2862 | 3 | } |
2863 | 9.06k | next_objid = part6_first_obj; |
2864 | 9.06k | enqueuePart(part6); |
2865 | 9.06k | if (next_objid != after_part6) { |
2866 | 96 | throw std::runtime_error("error encountered after writing part 6 of linearized data"); |
2867 | 96 | } |
2868 | 8.97k | next_objid = second_half_first_obj; |
2869 | 8.97k | enqueuePart(part7); |
2870 | 8.97k | enqueuePart(part8); |
2871 | 8.97k | enqueuePart(part9); |
2872 | 8.97k | if (next_objid != after_second_half) { |
2873 | 653 | throw std::runtime_error("error encountered after writing part 9 of linearized data"); |
2874 | 653 | } |
2875 | | |
2876 | 8.32k | qpdf_offset_t hint_length = 0; |
2877 | 8.32k | std::string hint_buffer; |
2878 | | |
2879 | | // Write file in two passes. Part numbers refer to PDF spec 1.4. |
2880 | | |
2881 | 8.32k | FILE* lin_pass1_file = nullptr; |
2882 | 8.32k | auto pp_pass1 = pipeline_stack.popper(); |
2883 | 8.32k | auto pp_md5 = pipeline_stack.popper(); |
2884 | 15.4k | for (int pass: {1, 2}) { |
2885 | 15.4k | if (pass == 1) { |
2886 | 7.91k | if (!lin_pass1_filename.empty()) { |
2887 | 0 | lin_pass1_file = QUtil::safe_fopen(lin_pass1_filename.c_str(), "wb"); |
2888 | 0 | pipeline_stack.activate( |
2889 | 0 | pp_pass1, |
2890 | 0 | std::make_unique<Pl_StdioFile>("linearization pass1", lin_pass1_file)); |
2891 | 7.91k | } else { |
2892 | 7.91k | pipeline_stack.activate(pp_pass1, true); |
2893 | 7.91k | } |
2894 | 7.91k | if (deterministic_id) { |
2895 | 0 | pipeline_stack.activate_md5(pp_md5); |
2896 | 0 | } |
2897 | 7.91k | } |
2898 | | |
2899 | | // Part 1: header |
2900 | | |
2901 | 15.4k | writeHeader(); |
2902 | | |
2903 | | // Part 2: linearization parameter dictionary. Save enough space to write real dictionary. |
2904 | | // 200 characters is enough space if all numerical values in the parameter dictionary that |
2905 | | // contain offsets are 20 digits long plus a few extra characters for safety. The entire |
2906 | | // linearization parameter dictionary must appear within the first 1024 characters of the |
2907 | | // file. |
2908 | | |
2909 | 15.4k | qpdf_offset_t pos = pipeline->getCount(); |
2910 | 15.4k | openObject(lindict_id); |
2911 | 15.4k | write("<<"); |
2912 | 15.4k | if (pass == 2) { |
2913 | 7.50k | std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages(); |
2914 | 7.50k | int first_page_object = obj[pages.at(0)].renumber; |
2915 | | |
2916 | 7.50k | write(" /Linearized 1 /L ").write(file_size + hint_length); |
2917 | | // Implementation note 121 states that a space is mandatory after this open bracket. |
2918 | 7.50k | write(" /H [ ").write(new_obj[hint_id].xref.getOffset()).write(" "); |
2919 | 7.50k | write(hint_length); |
2920 | 7.50k | write(" ] /O ").write(first_page_object); |
2921 | 7.50k | write(" /E ").write(part6_end_offset + hint_length); |
2922 | 7.50k | write(" /N ").write(pages.size()); |
2923 | 7.50k | write(" /T ").write(space_before_zero + hint_length); |
2924 | 7.50k | } |
2925 | 15.4k | write(" >>"); |
2926 | 15.4k | closeObject(lindict_id); |
2927 | 15.4k | static int const pad = 200; |
2928 | 15.4k | write(QIntC::to_size(pos - pipeline->getCount() + pad), ' ').write("\n"); |
2929 | | |
2930 | | // If the user supplied any additional header text, write it here after the linearization |
2931 | | // parameter dictionary. |
2932 | 15.4k | write(extra_header_text); |
2933 | | |
2934 | | // Part 3: first page cross reference table and trailer. |
2935 | | |
2936 | 15.4k | qpdf_offset_t first_xref_offset = pipeline->getCount(); |
2937 | 15.4k | qpdf_offset_t hint_offset = 0; |
2938 | 15.4k | if (pass == 2) { |
2939 | 7.50k | hint_offset = new_obj[hint_id].xref.getOffset(); |
2940 | 7.50k | } |
2941 | 15.4k | if (need_xref_stream) { |
2942 | | // Must pad here too. |
2943 | 1.32k | if (pass == 1) { |
2944 | | // Set first_half_max_obj_offset to a value large enough to force four bytes to be |
2945 | | // reserved for each file offset. This would provide adequate space for the xref |
2946 | | // stream as long as the last object in page 1 starts with in the first 4 GB of the |
2947 | | // file, which is extremely likely. In the second pass, we will know the actual |
2948 | | // value for this, but it's okay if it's smaller. |
2949 | 743 | first_half_max_obj_offset = 1 << 25; |
2950 | 743 | } |
2951 | 1.32k | pos = pipeline->getCount(); |
2952 | 1.32k | writeXRefStream( |
2953 | 1.32k | first_half_xref, |
2954 | 1.32k | first_half_end, |
2955 | 1.32k | first_half_max_obj_offset, |
2956 | 1.32k | t_lin_first, |
2957 | 1.32k | first_half_start, |
2958 | 1.32k | first_half_end, |
2959 | 1.32k | first_trailer_size, |
2960 | 1.32k | hint_length + second_xref_offset, |
2961 | 1.32k | hint_id, |
2962 | 1.32k | hint_offset, |
2963 | 1.32k | hint_length, |
2964 | 1.32k | (pass == 1), |
2965 | 1.32k | pass); |
2966 | 1.32k | qpdf_offset_t endpos = pipeline->getCount(); |
2967 | 1.32k | if (pass == 1) { |
2968 | | // Pad so we have enough room for the real xref stream. |
2969 | 742 | write(calculateXrefStreamPadding(endpos - pos), ' '); |
2970 | 742 | first_xref_end = pipeline->getCount(); |
2971 | 742 | } else { |
2972 | | // Pad so that the next object starts at the same place as in pass 1. |
2973 | 586 | write(QIntC::to_size(first_xref_end - endpos), ' '); |
2974 | | |
2975 | 586 | if (pipeline->getCount() != first_xref_end) { |
2976 | 0 | throw std::logic_error( |
2977 | 0 | "insufficient padding for first pass xref stream; first_xref_end=" + |
2978 | 0 | std::to_string(first_xref_end) + "; endpos=" + std::to_string(endpos)); |
2979 | 0 | } |
2980 | 586 | } |
2981 | 1.32k | write("\n"); |
2982 | 14.0k | } else { |
2983 | 14.0k | writeXRefTable( |
2984 | 14.0k | t_lin_first, |
2985 | 14.0k | first_half_start, |
2986 | 14.0k | first_half_end, |
2987 | 14.0k | first_trailer_size, |
2988 | 14.0k | hint_length + second_xref_offset, |
2989 | 14.0k | (pass == 1), |
2990 | 14.0k | hint_id, |
2991 | 14.0k | hint_offset, |
2992 | 14.0k | hint_length, |
2993 | 14.0k | pass); |
2994 | 14.0k | write("startxref\n0\n%%EOF\n"); |
2995 | 14.0k | } |
2996 | | |
2997 | | // Parts 4 through 9 |
2998 | | |
2999 | 230k | for (auto const& cur_object: object_queue) { |
3000 | 230k | if (cur_object.getObjectID() == part6_end_marker) { |
3001 | 15.2k | first_half_max_obj_offset = pipeline->getCount(); |
3002 | 15.2k | } |
3003 | 230k | writeObject(cur_object); |
3004 | 230k | if (cur_object.getObjectID() == part4_end_marker) { |
3005 | 15.3k | if (encryption) { |
3006 | 15.3k | writeEncryptionDictionary(); |
3007 | 15.3k | } |
3008 | 15.3k | if (pass == 1) { |
3009 | 7.85k | new_obj[hint_id].xref = QPDFXRefEntry(pipeline->getCount()); |
3010 | 7.85k | } else { |
3011 | | // Part 5: hint stream |
3012 | 7.50k | write(hint_buffer); |
3013 | 7.50k | } |
3014 | 15.3k | } |
3015 | 230k | if (cur_object.getObjectID() == part6_end_marker) { |
3016 | 15.1k | part6_end_offset = pipeline->getCount(); |
3017 | 15.1k | } |
3018 | 230k | } |
3019 | | |
3020 | | // Part 10: overflow hint stream -- not used |
3021 | | |
3022 | | // Part 11: main cross reference table and trailer |
3023 | | |
3024 | 15.4k | second_xref_offset = pipeline->getCount(); |
3025 | 15.4k | if (need_xref_stream) { |
3026 | 1.16k | pos = pipeline->getCount(); |
3027 | 1.16k | space_before_zero = writeXRefStream( |
3028 | 1.16k | second_half_xref, |
3029 | 1.16k | second_half_end, |
3030 | 1.16k | second_xref_offset, |
3031 | 1.16k | t_lin_second, |
3032 | 1.16k | 0, |
3033 | 1.16k | second_half_end, |
3034 | 1.16k | second_trailer_size, |
3035 | 1.16k | 0, |
3036 | 1.16k | 0, |
3037 | 1.16k | 0, |
3038 | 1.16k | 0, |
3039 | 1.16k | (pass == 1), |
3040 | 1.16k | pass); |
3041 | 1.16k | qpdf_offset_t endpos = pipeline->getCount(); |
3042 | | |
3043 | 1.16k | if (pass == 1) { |
3044 | | // Pad so we have enough room for the real xref stream. See comments for previous |
3045 | | // xref stream on how we calculate the padding. |
3046 | 585 | write(calculateXrefStreamPadding(endpos - pos), ' ').write("\n"); |
3047 | 585 | second_xref_end = pipeline->getCount(); |
3048 | 585 | } else { |
3049 | | // Make the file size the same. |
3050 | 584 | auto padding = |
3051 | 584 | QIntC::to_size(second_xref_end + hint_length - 1 - pipeline->getCount()); |
3052 | 584 | write(padding, ' ').write("\n"); |
3053 | | |
3054 | | // If this assertion fails, maybe we didn't have enough padding above. |
3055 | 584 | if (pipeline->getCount() != second_xref_end + hint_length) { |
3056 | 0 | throw std::logic_error( |
3057 | 0 | "count mismatch after xref stream; possible insufficient padding?"); |
3058 | 0 | } |
3059 | 584 | } |
3060 | 14.2k | } else { |
3061 | 14.2k | space_before_zero = writeXRefTable( |
3062 | 14.2k | t_lin_second, 0, second_half_end, second_trailer_size, 0, false, 0, 0, 0, pass); |
3063 | 14.2k | } |
3064 | 15.4k | write("startxref\n").write(first_xref_offset).write("\n%%EOF\n"); |
3065 | | |
3066 | 15.4k | if (pass == 1) { |
3067 | 7.50k | if (deterministic_id) { |
3068 | 0 | QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1); |
3069 | 0 | computeDeterministicIDData(); |
3070 | 0 | pp_md5.pop(); |
3071 | 0 | } |
3072 | | |
3073 | | // Close first pass pipeline |
3074 | 7.50k | file_size = pipeline->getCount(); |
3075 | 7.50k | pp_pass1.pop(); |
3076 | | |
3077 | | // Save hint offset since it will be set to zero by calling openObject. |
3078 | 7.50k | qpdf_offset_t hint_offset1 = new_obj[hint_id].xref.getOffset(); |
3079 | | |
3080 | | // Write hint stream to a buffer |
3081 | 7.50k | { |
3082 | 7.50k | auto pp_hint = pipeline_stack.activate(hint_buffer); |
3083 | 7.50k | writeHintStream(hint_id); |
3084 | 7.50k | } |
3085 | 7.50k | hint_length = QIntC::to_offset(hint_buffer.size()); |
3086 | | |
3087 | | // Restore hint offset |
3088 | 7.50k | new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1); |
3089 | 7.50k | if (lin_pass1_file) { |
3090 | | // Write some debugging information |
3091 | 0 | fprintf( |
3092 | 0 | lin_pass1_file, "%% hint_offset=%s\n", std::to_string(hint_offset1).c_str()); |
3093 | 0 | fprintf(lin_pass1_file, "%% hint_length=%s\n", std::to_string(hint_length).c_str()); |
3094 | 0 | fprintf( |
3095 | 0 | lin_pass1_file, |
3096 | 0 | "%% second_xref_offset=%s\n", |
3097 | 0 | std::to_string(second_xref_offset).c_str()); |
3098 | 0 | fprintf( |
3099 | 0 | lin_pass1_file, |
3100 | 0 | "%% second_xref_end=%s\n", |
3101 | 0 | std::to_string(second_xref_end).c_str()); |
3102 | 0 | fclose(lin_pass1_file); |
3103 | 0 | lin_pass1_file = nullptr; |
3104 | 0 | } |
3105 | 7.50k | } |
3106 | 15.4k | } |
3107 | 8.32k | } |
3108 | | |
3109 | | void |
3110 | | QPDFWriter::Members::enqueueObjectsStandard() |
3111 | 0 | { |
3112 | 0 | if (preserve_unreferenced_objects) { |
3113 | 0 | for (auto const& oh: pdf.getAllObjects()) { |
3114 | 0 | enqueueObject(oh); |
3115 | 0 | } |
3116 | 0 | } |
3117 | | |
3118 | | // Put root first on queue. |
3119 | 0 | auto trailer = trimmed_trailer(); |
3120 | 0 | enqueueObject(trailer["/Root"]); |
3121 | | |
3122 | | // Next place any other objects referenced from the trailer dictionary into the queue, handling |
3123 | | // direct objects recursively. Root is already there, so enqueuing it a second time is a no-op. |
3124 | 0 | for (auto& item: trailer) { |
3125 | 0 | if (!item.second.null()) { |
3126 | 0 | enqueueObject(item.second); |
3127 | 0 | } |
3128 | 0 | } |
3129 | 0 | } |
3130 | | |
3131 | | void |
3132 | | QPDFWriter::Members::enqueueObjectsPCLm() |
3133 | 0 | { |
3134 | | // Image transform stream content for page strip images. Each of this new stream has to come |
3135 | | // after every page image strip written in the pclm file. |
3136 | 0 | std::string image_transform_content = "q /image Do Q\n"; |
3137 | | |
3138 | | // enqueue all pages first |
3139 | 0 | std::vector<QPDFObjectHandle> all = pdf.getAllPages(); |
3140 | 0 | for (auto& page: all) { |
3141 | | // enqueue page |
3142 | 0 | enqueueObject(page); |
3143 | | |
3144 | | // enqueue page contents stream |
3145 | 0 | enqueueObject(page.getKey("/Contents")); |
3146 | | |
3147 | | // enqueue all the strips for each page |
3148 | 0 | QPDFObjectHandle strips = page.getKey("/Resources").getKey("/XObject"); |
3149 | 0 | for (auto& image: strips.as_dictionary()) { |
3150 | 0 | if (!image.second.null()) { |
3151 | 0 | enqueueObject(image.second); |
3152 | 0 | enqueueObject(QPDFObjectHandle::newStream(&pdf, image_transform_content)); |
3153 | 0 | } |
3154 | 0 | } |
3155 | 0 | } |
3156 | |
|
3157 | 0 | enqueueObject(trimmed_trailer()["/Root"]); |
3158 | 0 | } |
3159 | | |
3160 | | void |
3161 | | QPDFWriter::Members::indicateProgress(bool decrement, bool finished) |
3162 | 371k | { |
3163 | 371k | if (decrement) { |
3164 | 69.0k | --events_seen; |
3165 | 69.0k | return; |
3166 | 69.0k | } |
3167 | | |
3168 | 302k | ++events_seen; |
3169 | | |
3170 | 302k | if (!progress_reporter.get()) { |
3171 | 302k | return; |
3172 | 302k | } |
3173 | | |
3174 | 0 | if (finished || events_seen >= next_progress_report) { |
3175 | 0 | int percentage = |
3176 | 0 | (finished ? 100 |
3177 | 0 | : next_progress_report == 0 |
3178 | 0 | ? 0 |
3179 | 0 | : std::min(99, 1 + ((100 * events_seen) / events_expected))); |
3180 | 0 | progress_reporter->reportProgress(percentage); |
3181 | 0 | } |
3182 | 0 | int increment = std::max(1, (events_expected / 100)); |
3183 | 0 | while (events_seen >= next_progress_report) { |
3184 | 0 | next_progress_report += increment; |
3185 | 0 | } |
3186 | 0 | } |
3187 | | |
3188 | | void |
3189 | | QPDFWriter::registerProgressReporter(std::shared_ptr<ProgressReporter> pr) |
3190 | 0 | { |
3191 | 0 | m->progress_reporter = pr; |
3192 | 0 | } |
3193 | | |
3194 | | void |
3195 | | QPDFWriter::Members::writeStandard() |
3196 | 0 | { |
3197 | 0 | auto pp_md5 = pipeline_stack.popper(); |
3198 | 0 | if (deterministic_id) { |
3199 | 0 | pipeline_stack.activate_md5(pp_md5); |
3200 | 0 | } |
3201 | | |
3202 | | // Start writing |
3203 | |
|
3204 | 0 | writeHeader(); |
3205 | 0 | write(extra_header_text); |
3206 | |
|
3207 | 0 | if (pclm) { |
3208 | 0 | enqueueObjectsPCLm(); |
3209 | 0 | } else { |
3210 | 0 | enqueueObjectsStandard(); |
3211 | 0 | } |
3212 | | |
3213 | | // Now start walking queue, outputting each object. |
3214 | 0 | while (object_queue_front < object_queue.size()) { |
3215 | 0 | QPDFObjectHandle cur_object = object_queue.at(object_queue_front); |
3216 | 0 | ++object_queue_front; |
3217 | 0 | writeObject(cur_object); |
3218 | 0 | } |
3219 | | |
3220 | | // Write out the encryption dictionary, if any |
3221 | 0 | if (encryption) { |
3222 | 0 | writeEncryptionDictionary(); |
3223 | 0 | } |
3224 | | |
3225 | | // Now write out xref. next_objid is now the number of objects. |
3226 | 0 | qpdf_offset_t xref_offset = pipeline->getCount(); |
3227 | 0 | if (object_stream_to_objects.empty()) { |
3228 | | // Write regular cross-reference table |
3229 | 0 | writeXRefTable(t_normal, 0, next_objid - 1, next_objid); |
3230 | 0 | } else { |
3231 | | // Write cross-reference stream. |
3232 | 0 | int xref_id = next_objid++; |
3233 | 0 | writeXRefStream(xref_id, xref_id, xref_offset, t_normal, 0, next_objid - 1, next_objid); |
3234 | 0 | } |
3235 | 0 | write("startxref\n").write(xref_offset).write("\n%%EOF\n"); |
3236 | |
|
3237 | 0 | if (deterministic_id) { |
3238 | 0 | QTC::TC( |
3239 | 0 | "qpdf", |
3240 | 0 | "QPDFWriter standard deterministic ID", |
3241 | 0 | object_stream_to_objects.empty() ? 0 : 1); |
3242 | 0 | } |
3243 | 0 | } |