/src/qpdf/libqpdf/QPDF_linearization.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // See doc/linearization. |
2 | | |
3 | | #include <qpdf/QPDF.hh> |
4 | | |
5 | | #include <qpdf/BitStream.hh> |
6 | | #include <qpdf/BitWriter.hh> |
7 | | #include <qpdf/Pl_Buffer.hh> |
8 | | #include <qpdf/Pl_Count.hh> |
9 | | #include <qpdf/Pl_Flate.hh> |
10 | | #include <qpdf/QPDFExc.hh> |
11 | | #include <qpdf/QPDFLogger.hh> |
12 | | #include <qpdf/QPDFWriter_private.hh> |
13 | | #include <qpdf/QTC.hh> |
14 | | #include <qpdf/QUtil.hh> |
15 | | |
16 | | #include <algorithm> |
17 | | #include <cmath> |
18 | | #include <cstring> |
19 | | |
20 | | template <class T, class int_type> |
21 | | static void |
22 | | load_vector_int( |
23 | | BitStream& bit_stream, int nitems, std::vector<T>& vec, int bits_wanted, int_type T::*field) |
24 | 0 | { |
25 | 0 | bool append = vec.empty(); |
26 | | // nitems times, read bits_wanted from the given bit stream, storing results in the ith vector |
27 | | // entry. |
28 | |
|
29 | 0 | for (size_t i = 0; i < QIntC::to_size(nitems); ++i) { |
30 | 0 | if (append) { |
31 | 0 | vec.push_back(T()); |
32 | 0 | } |
33 | 0 | vec.at(i).*field = bit_stream.getBitsInt(QIntC::to_size(bits_wanted)); |
34 | 0 | } |
35 | 0 | if (QIntC::to_int(vec.size()) != nitems) { |
36 | 0 | throw std::logic_error("vector has wrong size in load_vector_int"); |
37 | 0 | } |
38 | | // The PDF spec says that each hint table starts at a byte boundary. Each "row" actually must |
39 | | // start on a byte boundary. |
40 | 0 | bit_stream.skipToNextByte(); |
41 | 0 | } Unexecuted instantiation: QPDF_linearization.cc:void load_vector_int<QPDF::HPageOffsetEntry, int>(BitStream&, int, std::__1::vector<QPDF::HPageOffsetEntry, std::__1::allocator<QPDF::HPageOffsetEntry> >&, int, int QPDF::HPageOffsetEntry::*) Unexecuted instantiation: QPDF_linearization.cc:void load_vector_int<QPDF::HPageOffsetEntry, long long>(BitStream&, int, std::__1::vector<QPDF::HPageOffsetEntry, std::__1::allocator<QPDF::HPageOffsetEntry> >&, int, long long QPDF::HPageOffsetEntry::*) Unexecuted instantiation: QPDF_linearization.cc:void load_vector_int<QPDF::HSharedObjectEntry, int>(BitStream&, int, std::__1::vector<QPDF::HSharedObjectEntry, std::__1::allocator<QPDF::HSharedObjectEntry> >&, int, int QPDF::HSharedObjectEntry::*) |
42 | | |
43 | | template <class T> |
44 | | static void |
45 | | load_vector_vector( |
46 | | BitStream& bit_stream, |
47 | | int nitems1, |
48 | | std::vector<T>& vec1, |
49 | | int T::*nitems2, |
50 | | int bits_wanted, |
51 | | std::vector<int> T::*vec2) |
52 | 0 | { |
53 | | // nitems1 times, read nitems2 (from the ith element of vec1) items into the vec2 vector field |
54 | | // of the ith item of vec1. |
55 | 0 | for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) { |
56 | 0 | for (int i2 = 0; i2 < vec1.at(i1).*nitems2; ++i2) { |
57 | 0 | (vec1.at(i1).*vec2).push_back(bit_stream.getBitsInt(QIntC::to_size(bits_wanted))); |
58 | 0 | } |
59 | 0 | } |
60 | 0 | bit_stream.skipToNextByte(); |
61 | 0 | } |
62 | | |
63 | | void |
64 | | QPDF::linearizationWarning(std::string_view msg) |
65 | 0 | { |
66 | 0 | m->linearization_warnings = true; |
67 | 0 | warn(qpdf_e_linearization, "", 0, std::string(msg)); |
68 | 0 | } |
69 | | |
70 | | bool |
71 | | QPDF::checkLinearization() |
72 | 0 | { |
73 | 0 | bool result = false; |
74 | 0 | try { |
75 | 0 | readLinearizationData(); |
76 | 0 | result = checkLinearizationInternal(); |
77 | 0 | } catch (std::runtime_error& e) { |
78 | 0 | linearizationWarning( |
79 | 0 | "error encountered while checking linearization data: " + std::string(e.what())); |
80 | 0 | } |
81 | 0 | return result; |
82 | 0 | } |
83 | | |
84 | | bool |
85 | | QPDF::isLinearized() |
86 | 0 | { |
87 | | // If the first object in the file is a dictionary with a suitable /Linearized key and has an /L |
88 | | // key that accurately indicates the file size, initialize m->lindict and return true. |
89 | | |
90 | | // A linearized PDF spec's first object will be contained within the first 1024 bytes of the |
91 | | // file and will be a dictionary with a valid /Linearized key. This routine looks for that and |
92 | | // does no additional validation. |
93 | | |
94 | | // The PDF spec says the linearization dictionary must be completely contained within the first |
95 | | // 1024 bytes of the file. Add a byte for a null terminator. |
96 | 0 | static int const tbuf_size = 1025; |
97 | |
|
98 | 0 | auto b = std::make_unique<char[]>(tbuf_size); |
99 | 0 | char* buf = b.get(); |
100 | 0 | m->file->seek(0, SEEK_SET); |
101 | 0 | memset(buf, '\0', tbuf_size); |
102 | 0 | m->file->read(buf, tbuf_size - 1); |
103 | |
|
104 | 0 | int lindict_obj = -1; |
105 | 0 | char* p = buf; |
106 | 0 | while (lindict_obj == -1) { |
107 | | // Find a digit or end of buffer |
108 | 0 | while (((p - buf) < tbuf_size) && (!QUtil::is_digit(*p))) { |
109 | 0 | ++p; |
110 | 0 | } |
111 | 0 | if (p - buf == tbuf_size) { |
112 | 0 | break; |
113 | 0 | } |
114 | | // Seek to the digit. Then skip over digits for a potential |
115 | | // next iteration. |
116 | 0 | m->file->seek(p - buf, SEEK_SET); |
117 | 0 | while (((p - buf) < tbuf_size) && QUtil::is_digit(*p)) { |
118 | 0 | ++p; |
119 | 0 | } |
120 | |
|
121 | 0 | QPDFTokenizer::Token t1 = readToken(m->file); |
122 | 0 | if (t1.isInteger() && readToken(m->file).isInteger() && readToken(m->file).isWord("obj") && |
123 | 0 | (readToken(m->file).getType() == QPDFTokenizer::tt_dict_open)) { |
124 | 0 | lindict_obj = toI(QUtil::string_to_ll(t1.getValue().c_str())); |
125 | 0 | } |
126 | 0 | } |
127 | |
|
128 | 0 | if (lindict_obj <= 0) { |
129 | 0 | return false; |
130 | 0 | } |
131 | | |
132 | 0 | auto candidate = getObjectByID(lindict_obj, 0); |
133 | 0 | if (!candidate.isDictionary()) { |
134 | 0 | return false; |
135 | 0 | } |
136 | | |
137 | 0 | QPDFObjectHandle linkey = candidate.getKey("/Linearized"); |
138 | 0 | if (!(linkey.isNumber() && (toI(floor(linkey.getNumericValue())) == 1))) { |
139 | 0 | return false; |
140 | 0 | } |
141 | | |
142 | 0 | QPDFObjectHandle L = candidate.getKey("/L"); |
143 | 0 | if (L.isInteger()) { |
144 | 0 | qpdf_offset_t Li = L.getIntValue(); |
145 | 0 | m->file->seek(0, SEEK_END); |
146 | 0 | if (Li != m->file->tell()) { |
147 | 0 | QTC::TC("qpdf", "QPDF /L mismatch"); |
148 | 0 | return false; |
149 | 0 | } else { |
150 | 0 | m->linp.file_size = Li; |
151 | 0 | } |
152 | 0 | } |
153 | | |
154 | 0 | m->lindict = candidate; |
155 | |
|
156 | 0 | return true; |
157 | 0 | } |
158 | | |
159 | | void |
160 | | QPDF::readLinearizationData() |
161 | 0 | { |
162 | | // This function throws an exception (which is trapped by checkLinearization()) for any errors |
163 | | // that prevent loading. |
164 | |
|
165 | 0 | if (!isLinearized()) { |
166 | 0 | throw std::logic_error("called readLinearizationData for file" |
167 | 0 | " that is not linearized"); |
168 | 0 | } |
169 | | |
170 | | // /L is read and stored in linp by isLinearized() |
171 | 0 | QPDFObjectHandle H = m->lindict.getKey("/H"); |
172 | 0 | QPDFObjectHandle O = m->lindict.getKey("/O"); |
173 | 0 | QPDFObjectHandle E = m->lindict.getKey("/E"); |
174 | 0 | QPDFObjectHandle N = m->lindict.getKey("/N"); |
175 | 0 | QPDFObjectHandle T = m->lindict.getKey("/T"); |
176 | 0 | QPDFObjectHandle P = m->lindict.getKey("/P"); |
177 | |
|
178 | 0 | if (!(H.isArray() && O.isInteger() && E.isInteger() && N.isInteger() && T.isInteger() && |
179 | 0 | (P.isInteger() || P.isNull()))) { |
180 | 0 | throw damagedPDF( |
181 | 0 | "linearization dictionary", |
182 | 0 | "some keys in linearization dictionary are of the wrong type"); |
183 | 0 | } |
184 | | |
185 | | // Hint table array: offset length [ offset length ] |
186 | 0 | size_t n_H_items = toS(H.getArrayNItems()); |
187 | 0 | if (!((n_H_items == 2) || (n_H_items == 4))) { |
188 | 0 | throw damagedPDF("linearization dictionary", "H has the wrong number of items"); |
189 | 0 | } |
190 | | |
191 | 0 | std::vector<int> H_items; |
192 | 0 | for (size_t i = 0; i < n_H_items; ++i) { |
193 | 0 | QPDFObjectHandle oh(H.getArrayItem(toI(i))); |
194 | 0 | if (oh.isInteger()) { |
195 | 0 | H_items.push_back(oh.getIntValueAsInt()); |
196 | 0 | } else { |
197 | 0 | throw damagedPDF("linearization dictionary", "some H items are of the wrong type"); |
198 | 0 | } |
199 | 0 | } |
200 | | |
201 | | // H: hint table offset/length for primary and overflow hint tables |
202 | 0 | int H0_offset = H_items.at(0); |
203 | 0 | int H0_length = H_items.at(1); |
204 | 0 | int H1_offset = 0; |
205 | 0 | int H1_length = 0; |
206 | 0 | if (H_items.size() == 4) { |
207 | | // Acrobat doesn't read or write these (as PDF 1.4), so we don't have a way to generate a |
208 | | // test case. |
209 | | // QTC::TC("qpdf", "QPDF overflow hint table"); |
210 | 0 | H1_offset = H_items.at(2); |
211 | 0 | H1_length = H_items.at(3); |
212 | 0 | } |
213 | | |
214 | | // P: first page number |
215 | 0 | int first_page = 0; |
216 | 0 | if (P.isInteger()) { |
217 | 0 | QTC::TC("qpdf", "QPDF P present in lindict"); |
218 | 0 | first_page = P.getIntValueAsInt(); |
219 | 0 | } else { |
220 | 0 | QTC::TC("qpdf", "QPDF P absent in lindict"); |
221 | 0 | } |
222 | | |
223 | | // Store linearization parameter data |
224 | | |
225 | | // Various places in the code use linp.npages, which is initialized from N, to pre-allocate |
226 | | // memory, so make sure it's accurate and bail right now if it's not. |
227 | 0 | if (N.getIntValue() != static_cast<long long>(getAllPages().size())) { |
228 | 0 | throw damagedPDF("linearization hint table", "/N does not match number of pages"); |
229 | 0 | } |
230 | | |
231 | | // file_size initialized by isLinearized() |
232 | 0 | m->linp.first_page_object = O.getIntValueAsInt(); |
233 | 0 | m->linp.first_page_end = E.getIntValue(); |
234 | 0 | m->linp.npages = N.getIntValueAsInt(); |
235 | 0 | m->linp.xref_zero_offset = T.getIntValue(); |
236 | 0 | m->linp.first_page = first_page; |
237 | 0 | m->linp.H_offset = H0_offset; |
238 | 0 | m->linp.H_length = H0_length; |
239 | | |
240 | | // Read hint streams |
241 | |
|
242 | 0 | Pl_Buffer pb("hint buffer"); |
243 | 0 | QPDFObjectHandle H0 = readHintStream(pb, H0_offset, toS(H0_length)); |
244 | 0 | if (H1_offset) { |
245 | 0 | (void)readHintStream(pb, H1_offset, toS(H1_length)); |
246 | 0 | } |
247 | | |
248 | | // PDF 1.4 hint tables that we ignore: |
249 | | |
250 | | // /T thumbnail |
251 | | // /A thread information |
252 | | // /E named destination |
253 | | // /V interactive form |
254 | | // /I information dictionary |
255 | | // /C logical structure |
256 | | // /L page label |
257 | | |
258 | | // Individual hint table offsets |
259 | 0 | QPDFObjectHandle HS = H0.getKey("/S"); // shared object |
260 | 0 | QPDFObjectHandle HO = H0.getKey("/O"); // outline |
261 | |
|
262 | 0 | auto hbp = pb.getBufferSharedPointer(); |
263 | 0 | Buffer* hb = hbp.get(); |
264 | 0 | unsigned char const* h_buf = hb->getBuffer(); |
265 | 0 | size_t h_size = hb->getSize(); |
266 | |
|
267 | 0 | readHPageOffset(BitStream(h_buf, h_size)); |
268 | |
|
269 | 0 | int HSi = HS.getIntValueAsInt(); |
270 | 0 | if ((HSi < 0) || (toS(HSi) >= h_size)) { |
271 | 0 | throw damagedPDF("linearization hint table", "/S (shared object) offset is out of bounds"); |
272 | 0 | } |
273 | 0 | readHSharedObject(BitStream(h_buf + HSi, h_size - toS(HSi))); |
274 | |
|
275 | 0 | if (HO.isInteger()) { |
276 | 0 | int HOi = HO.getIntValueAsInt(); |
277 | 0 | if ((HOi < 0) || (toS(HOi) >= h_size)) { |
278 | 0 | throw damagedPDF("linearization hint table", "/O (outline) offset is out of bounds"); |
279 | 0 | } |
280 | 0 | readHGeneric(BitStream(h_buf + HOi, h_size - toS(HOi)), m->outline_hints); |
281 | 0 | } |
282 | 0 | } |
283 | | |
284 | | QPDFObjectHandle |
285 | | QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) |
286 | 0 | { |
287 | 0 | QPDFObjGen og; |
288 | 0 | QPDFObjectHandle H = |
289 | 0 | readObjectAtOffset(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false); |
290 | 0 | ObjCache& oc = m->obj_cache[og]; |
291 | 0 | qpdf_offset_t min_end_offset = oc.end_before_space; |
292 | 0 | qpdf_offset_t max_end_offset = oc.end_after_space; |
293 | 0 | if (!H.isStream()) { |
294 | 0 | throw damagedPDF("linearization dictionary", "hint table is not a stream"); |
295 | 0 | } |
296 | | |
297 | 0 | QPDFObjectHandle Hdict = H.getDict(); |
298 | | |
299 | | // Some versions of Acrobat make /Length indirect and place it immediately after the stream, |
300 | | // increasing length to cover it, even though the specification says all objects in the |
301 | | // linearization parameter dictionary must be direct. We have to get the file position of the |
302 | | // end of length in this case. |
303 | 0 | QPDFObjectHandle length_obj = Hdict.getKey("/Length"); |
304 | 0 | if (length_obj.isIndirect()) { |
305 | 0 | QTC::TC("qpdf", "QPDF hint table length indirect"); |
306 | | // Force resolution |
307 | 0 | (void)length_obj.getIntValue(); |
308 | 0 | ObjCache& oc2 = m->obj_cache[length_obj.getObjGen()]; |
309 | 0 | min_end_offset = oc2.end_before_space; |
310 | 0 | max_end_offset = oc2.end_after_space; |
311 | 0 | } else { |
312 | 0 | QTC::TC("qpdf", "QPDF hint table length direct"); |
313 | 0 | } |
314 | 0 | qpdf_offset_t computed_end = offset + toO(length); |
315 | 0 | if ((computed_end < min_end_offset) || (computed_end > max_end_offset)) { |
316 | 0 | linearizationWarning( |
317 | 0 | "expected = " + std::to_string(computed_end) + |
318 | 0 | "; actual = " + std::to_string(min_end_offset) + ".." + std::to_string(max_end_offset)); |
319 | 0 | throw damagedPDF("linearization dictionary", "hint table length mismatch"); |
320 | 0 | } |
321 | 0 | H.pipeStreamData(&pl, 0, qpdf_dl_specialized); |
322 | 0 | return Hdict; |
323 | 0 | } |
324 | | |
325 | | void |
326 | | QPDF::readHPageOffset(BitStream h) |
327 | 0 | { |
328 | | // All comments referring to the PDF spec refer to the spec for version 1.4. |
329 | |
|
330 | 0 | HPageOffset& t = m->page_offset_hints; |
331 | |
|
332 | 0 | t.min_nobjects = h.getBitsInt(32); // 1 |
333 | 0 | t.first_page_offset = h.getBitsInt(32); // 2 |
334 | 0 | t.nbits_delta_nobjects = h.getBitsInt(16); // 3 |
335 | 0 | t.min_page_length = h.getBitsInt(32); // 4 |
336 | 0 | t.nbits_delta_page_length = h.getBitsInt(16); // 5 |
337 | 0 | t.min_content_offset = h.getBitsInt(32); // 6 |
338 | 0 | t.nbits_delta_content_offset = h.getBitsInt(16); // 7 |
339 | 0 | t.min_content_length = h.getBitsInt(32); // 8 |
340 | 0 | t.nbits_delta_content_length = h.getBitsInt(16); // 9 |
341 | 0 | t.nbits_nshared_objects = h.getBitsInt(16); // 10 |
342 | 0 | t.nbits_shared_identifier = h.getBitsInt(16); // 11 |
343 | 0 | t.nbits_shared_numerator = h.getBitsInt(16); // 12 |
344 | 0 | t.shared_denominator = h.getBitsInt(16); // 13 |
345 | |
|
346 | 0 | std::vector<HPageOffsetEntry>& entries = t.entries; |
347 | 0 | entries.clear(); |
348 | 0 | int nitems = m->linp.npages; |
349 | 0 | load_vector_int(h, nitems, entries, t.nbits_delta_nobjects, &HPageOffsetEntry::delta_nobjects); |
350 | 0 | load_vector_int( |
351 | 0 | h, nitems, entries, t.nbits_delta_page_length, &HPageOffsetEntry::delta_page_length); |
352 | 0 | load_vector_int( |
353 | 0 | h, nitems, entries, t.nbits_nshared_objects, &HPageOffsetEntry::nshared_objects); |
354 | 0 | load_vector_vector( |
355 | 0 | h, |
356 | 0 | nitems, |
357 | 0 | entries, |
358 | 0 | &HPageOffsetEntry::nshared_objects, |
359 | 0 | t.nbits_shared_identifier, |
360 | 0 | &HPageOffsetEntry::shared_identifiers); |
361 | 0 | load_vector_vector( |
362 | 0 | h, |
363 | 0 | nitems, |
364 | 0 | entries, |
365 | 0 | &HPageOffsetEntry::nshared_objects, |
366 | 0 | t.nbits_shared_numerator, |
367 | 0 | &HPageOffsetEntry::shared_numerators); |
368 | 0 | load_vector_int( |
369 | 0 | h, nitems, entries, t.nbits_delta_content_offset, &HPageOffsetEntry::delta_content_offset); |
370 | 0 | load_vector_int( |
371 | 0 | h, nitems, entries, t.nbits_delta_content_length, &HPageOffsetEntry::delta_content_length); |
372 | 0 | } |
373 | | |
374 | | void |
375 | | QPDF::readHSharedObject(BitStream h) |
376 | 0 | { |
377 | 0 | HSharedObject& t = m->shared_object_hints; |
378 | |
|
379 | 0 | t.first_shared_obj = h.getBitsInt(32); // 1 |
380 | 0 | t.first_shared_offset = h.getBitsInt(32); // 2 |
381 | 0 | t.nshared_first_page = h.getBitsInt(32); // 3 |
382 | 0 | t.nshared_total = h.getBitsInt(32); // 4 |
383 | 0 | t.nbits_nobjects = h.getBitsInt(16); // 5 |
384 | 0 | t.min_group_length = h.getBitsInt(32); // 6 |
385 | 0 | t.nbits_delta_group_length = h.getBitsInt(16); // 7 |
386 | |
|
387 | 0 | QTC::TC( |
388 | 0 | "qpdf", |
389 | 0 | "QPDF lin nshared_total > nshared_first_page", |
390 | 0 | (t.nshared_total > t.nshared_first_page) ? 1 : 0); |
391 | |
|
392 | 0 | std::vector<HSharedObjectEntry>& entries = t.entries; |
393 | 0 | entries.clear(); |
394 | 0 | int nitems = t.nshared_total; |
395 | 0 | load_vector_int( |
396 | 0 | h, nitems, entries, t.nbits_delta_group_length, &HSharedObjectEntry::delta_group_length); |
397 | 0 | load_vector_int(h, nitems, entries, 1, &HSharedObjectEntry::signature_present); |
398 | 0 | for (size_t i = 0; i < toS(nitems); ++i) { |
399 | 0 | if (entries.at(i).signature_present) { |
400 | | // Skip 128-bit MD5 hash. These are not supported by acrobat, so they should probably |
401 | | // never be there. We have no test case for this. |
402 | 0 | for (int j = 0; j < 4; ++j) { |
403 | 0 | (void)h.getBits(32); |
404 | 0 | } |
405 | 0 | } |
406 | 0 | } |
407 | 0 | load_vector_int(h, nitems, entries, t.nbits_nobjects, &HSharedObjectEntry::nobjects_minus_one); |
408 | 0 | } |
409 | | |
410 | | void |
411 | | QPDF::readHGeneric(BitStream h, HGeneric& t) |
412 | 0 | { |
413 | 0 | t.first_object = h.getBitsInt(32); // 1 |
414 | 0 | t.first_object_offset = h.getBitsInt(32); // 2 |
415 | 0 | t.nobjects = h.getBitsInt(32); // 3 |
416 | 0 | t.group_length = h.getBitsInt(32); // 4 |
417 | 0 | } |
418 | | |
419 | | bool |
420 | | QPDF::checkLinearizationInternal() |
421 | 0 | { |
422 | | // All comments referring to the PDF spec refer to the spec for version 1.4. |
423 | | |
424 | | // Check all values in linearization parameter dictionary |
425 | |
|
426 | 0 | LinParameters& p = m->linp; |
427 | | |
428 | | // L: file size in bytes -- checked by isLinearized |
429 | | |
430 | | // O: object number of first page |
431 | 0 | std::vector<QPDFObjectHandle> const& pages = getAllPages(); |
432 | 0 | if (p.first_page_object != pages.at(0).getObjectID()) { |
433 | 0 | QTC::TC("qpdf", "QPDF err /O mismatch"); |
434 | 0 | linearizationWarning("first page object (/O) mismatch"); |
435 | 0 | } |
436 | | |
437 | | // N: number of pages |
438 | 0 | int npages = toI(pages.size()); |
439 | 0 | if (p.npages != npages) { |
440 | | // Not tested in the test suite |
441 | 0 | linearizationWarning("page count (/N) mismatch"); |
442 | 0 | } |
443 | |
|
444 | 0 | for (size_t i = 0; i < toS(npages); ++i) { |
445 | 0 | QPDFObjectHandle const& page = pages.at(i); |
446 | 0 | QPDFObjGen og(page.getObjGen()); |
447 | 0 | if (m->xref_table[og].getType() == 2) { |
448 | 0 | linearizationWarning( |
449 | 0 | "page dictionary for page " + std::to_string(i) + " is compressed"); |
450 | 0 | } |
451 | 0 | } |
452 | | |
453 | | // T: offset of whitespace character preceding xref entry for object 0 |
454 | 0 | m->file->seek(p.xref_zero_offset, SEEK_SET); |
455 | 0 | while (true) { |
456 | 0 | char ch; |
457 | 0 | m->file->read(&ch, 1); |
458 | 0 | if (!((ch == ' ') || (ch == '\r') || (ch == '\n'))) { |
459 | 0 | m->file->seek(-1, SEEK_CUR); |
460 | 0 | break; |
461 | 0 | } |
462 | 0 | } |
463 | 0 | if (m->file->tell() != m->first_xref_item_offset) { |
464 | 0 | QTC::TC("qpdf", "QPDF err /T mismatch"); |
465 | 0 | linearizationWarning( |
466 | 0 | "space before first xref item (/T) mismatch " |
467 | 0 | "(computed = " + |
468 | 0 | std::to_string(m->first_xref_item_offset) + |
469 | 0 | "; file = " + std::to_string(m->file->tell())); |
470 | 0 | } |
471 | | |
472 | | // P: first page number -- Implementation note 124 says Acrobat ignores this value, so we will |
473 | | // too. |
474 | | |
475 | | // Check numbering of compressed objects in each xref section. For linearized files, all |
476 | | // compressed objects are supposed to be at the end of the containing xref section if any object |
477 | | // streams are in use. |
478 | |
|
479 | 0 | if (m->uncompressed_after_compressed) { |
480 | 0 | linearizationWarning("linearized file contains an uncompressed object after a compressed " |
481 | 0 | "one in a cross-reference stream"); |
482 | 0 | } |
483 | | |
484 | | // Further checking requires optimization and order calculation. Don't allow optimization to |
485 | | // make changes. If it has to, then the file is not properly linearized. We use the xref table |
486 | | // to figure out which objects are compressed and which are uncompressed. |
487 | 0 | { // local scope |
488 | 0 | std::map<int, int> object_stream_data; |
489 | 0 | for (auto const& iter: m->xref_table) { |
490 | 0 | QPDFObjGen const& og = iter.first; |
491 | 0 | QPDFXRefEntry const& entry = iter.second; |
492 | 0 | if (entry.getType() == 2) { |
493 | 0 | object_stream_data[og.getObj()] = entry.getObjStreamNumber(); |
494 | 0 | } |
495 | 0 | } |
496 | 0 | optimize(object_stream_data, false); |
497 | 0 | calculateLinearizationData(object_stream_data); |
498 | 0 | } |
499 | | |
500 | | // E: offset of end of first page -- Implementation note 123 says Acrobat includes on extra |
501 | | // object here by mistake. pdlin fails to place thumbnail images in section 9, so when |
502 | | // thumbnails are present, it also gets the wrong value for /E. It also doesn't count outlines |
503 | | // here when it should even though it places them in part 6. This code fails to put thread |
504 | | // information dictionaries in part 9, so it actually gets the wrong value for E when threads |
505 | | // are present. In that case, it would probably agree with pdlin. As of this writing, the test |
506 | | // suite doesn't contain any files with threads. |
507 | |
|
508 | 0 | if (m->part6.empty()) { |
509 | 0 | stopOnError("linearization part 6 unexpectedly empty"); |
510 | 0 | } |
511 | 0 | qpdf_offset_t min_E = -1; |
512 | 0 | qpdf_offset_t max_E = -1; |
513 | 0 | for (auto const& oh: m->part6) { |
514 | 0 | QPDFObjGen og(oh.getObjGen()); |
515 | 0 | if (m->obj_cache.count(og) == 0) { |
516 | | // All objects have to have been dereferenced to be classified. |
517 | 0 | throw std::logic_error("linearization part6 object not in cache"); |
518 | 0 | } |
519 | 0 | ObjCache const& oc = m->obj_cache[og]; |
520 | 0 | min_E = std::max(min_E, oc.end_before_space); |
521 | 0 | max_E = std::max(max_E, oc.end_after_space); |
522 | 0 | } |
523 | 0 | if ((p.first_page_end < min_E) || (p.first_page_end > max_E)) { |
524 | 0 | QTC::TC("qpdf", "QPDF warn /E mismatch"); |
525 | 0 | linearizationWarning( |
526 | 0 | "end of first page section (/E) mismatch: /E = " + std::to_string(p.first_page_end) + |
527 | 0 | "; computed = " + std::to_string(min_E) + ".." + std::to_string(max_E)); |
528 | 0 | } |
529 | | |
530 | | // Check hint tables |
531 | |
|
532 | 0 | std::map<int, int> shared_idx_to_obj; |
533 | 0 | checkHSharedObject(pages, shared_idx_to_obj); |
534 | 0 | checkHPageOffset(pages, shared_idx_to_obj); |
535 | 0 | checkHOutlines(); |
536 | |
|
537 | 0 | return !m->linearization_warnings; |
538 | 0 | } |
539 | | |
540 | | qpdf_offset_t |
541 | | QPDF::maxEnd(ObjUser const& ou) |
542 | 0 | { |
543 | 0 | if (m->obj_user_to_objects.count(ou) == 0) { |
544 | 0 | stopOnError("no entry in object user table for requested object user"); |
545 | 0 | } |
546 | 0 | qpdf_offset_t end = 0; |
547 | 0 | for (auto const& og: m->obj_user_to_objects[ou]) { |
548 | 0 | if (m->obj_cache.count(og) == 0) { |
549 | 0 | stopOnError("unknown object referenced in object user table"); |
550 | 0 | } |
551 | 0 | end = std::max(end, m->obj_cache[og].end_after_space); |
552 | 0 | } |
553 | 0 | return end; |
554 | 0 | } |
555 | | |
556 | | qpdf_offset_t |
557 | | QPDF::getLinearizationOffset(QPDFObjGen const& og) |
558 | 0 | { |
559 | 0 | QPDFXRefEntry entry = m->xref_table[og]; |
560 | 0 | qpdf_offset_t result = 0; |
561 | 0 | switch (entry.getType()) { |
562 | 0 | case 1: |
563 | 0 | result = entry.getOffset(); |
564 | 0 | break; |
565 | | |
566 | 0 | case 2: |
567 | | // For compressed objects, return the offset of the object stream that contains them. |
568 | 0 | result = getLinearizationOffset(QPDFObjGen(entry.getObjStreamNumber(), 0)); |
569 | 0 | break; |
570 | | |
571 | 0 | default: |
572 | 0 | stopOnError("getLinearizationOffset called for xref entry not of type 1 or 2"); |
573 | 0 | break; |
574 | 0 | } |
575 | 0 | return result; |
576 | 0 | } |
577 | | |
578 | | QPDFObjectHandle |
579 | | QPDF::getUncompressedObject(QPDFObjectHandle& obj, std::map<int, int> const& object_stream_data) |
580 | 0 | { |
581 | 0 | if (obj.isNull() || (object_stream_data.count(obj.getObjectID()) == 0)) { |
582 | 0 | return obj; |
583 | 0 | } else { |
584 | 0 | int repl = (*(object_stream_data.find(obj.getObjectID()))).second; |
585 | 0 | return getObject(repl, 0); |
586 | 0 | } |
587 | 0 | } |
588 | | |
589 | | QPDFObjectHandle |
590 | | QPDF::getUncompressedObject(QPDFObjectHandle& oh, QPDFWriter::ObjTable const& obj) |
591 | 29.5k | { |
592 | 29.5k | if (obj.contains(oh)) { |
593 | 29.3k | if (auto id = obj[oh].object_stream; id > 0) { |
594 | 20 | return oh.isNull() ? oh : getObject(id, 0); |
595 | 20 | } |
596 | 29.3k | } |
597 | 29.5k | return oh; |
598 | 29.5k | } |
599 | | |
600 | | int |
601 | | QPDF::lengthNextN(int first_object, int n) |
602 | 0 | { |
603 | 0 | int length = 0; |
604 | 0 | for (int i = 0; i < n; ++i) { |
605 | 0 | QPDFObjGen og(first_object + i, 0); |
606 | 0 | if (m->xref_table.count(og) == 0) { |
607 | 0 | linearizationWarning( |
608 | 0 | "no xref table entry for " + std::to_string(first_object + i) + " 0"); |
609 | 0 | } else { |
610 | 0 | if (m->obj_cache.count(og) == 0) { |
611 | 0 | stopOnError("found unknown object while calculating length for linearization data"); |
612 | 0 | } |
613 | 0 | length += toI(m->obj_cache[og].end_after_space - getLinearizationOffset(og)); |
614 | 0 | } |
615 | 0 | } |
616 | 0 | return length; |
617 | 0 | } |
618 | | |
619 | | void |
620 | | QPDF::checkHPageOffset( |
621 | | std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& shared_idx_to_obj) |
622 | 0 | { |
623 | | // Implementation note 126 says Acrobat always sets delta_content_offset and |
624 | | // delta_content_length in the page offset header dictionary to 0. It also states that |
625 | | // min_content_offset in the per-page information is always 0, which is an incorrect value. |
626 | | |
627 | | // Implementation note 127 explains that Acrobat always sets item 8 (min_content_length) to |
628 | | // zero, item 9 (nbits_delta_content_length) to the value of item 5 (nbits_delta_page_length), |
629 | | // and item 7 of each per-page hint table (delta_content_length) to item 2 (delta_page_length) |
630 | | // of that entry. Acrobat ignores these values when reading files. |
631 | | |
632 | | // Empirically, it also seems that Acrobat sometimes puts items under a page's /Resources |
633 | | // dictionary in with shared objects even when they are private. |
634 | |
|
635 | 0 | int npages = toI(pages.size()); |
636 | 0 | qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset); |
637 | 0 | QPDFObjGen first_page_og(pages.at(0).getObjGen()); |
638 | 0 | if (m->xref_table.count(first_page_og) == 0) { |
639 | 0 | stopOnError("supposed first page object is not known"); |
640 | 0 | } |
641 | 0 | qpdf_offset_t offset = getLinearizationOffset(first_page_og); |
642 | 0 | if (table_offset != offset) { |
643 | 0 | linearizationWarning("first page object offset mismatch"); |
644 | 0 | } |
645 | |
|
646 | 0 | for (int pageno = 0; pageno < npages; ++pageno) { |
647 | 0 | QPDFObjGen page_og(pages.at(toS(pageno)).getObjGen()); |
648 | 0 | int first_object = page_og.getObj(); |
649 | 0 | if (m->xref_table.count(page_og) == 0) { |
650 | 0 | stopOnError("unknown object in page offset hint table"); |
651 | 0 | } |
652 | 0 | offset = getLinearizationOffset(page_og); |
653 | |
|
654 | 0 | HPageOffsetEntry& he = m->page_offset_hints.entries.at(toS(pageno)); |
655 | 0 | CHPageOffsetEntry& ce = m->c_page_offset_data.entries.at(toS(pageno)); |
656 | 0 | int h_nobjects = he.delta_nobjects + m->page_offset_hints.min_nobjects; |
657 | 0 | if (h_nobjects != ce.nobjects) { |
658 | | // This happens with pdlin when there are thumbnails. |
659 | 0 | linearizationWarning( |
660 | 0 | "object count mismatch for page " + std::to_string(pageno) + ": hint table = " + |
661 | 0 | std::to_string(h_nobjects) + "; computed = " + std::to_string(ce.nobjects)); |
662 | 0 | } |
663 | | |
664 | | // Use value for number of objects in hint table rather than computed value if there is a |
665 | | // discrepancy. |
666 | 0 | int length = lengthNextN(first_object, h_nobjects); |
667 | 0 | int h_length = toI(he.delta_page_length + m->page_offset_hints.min_page_length); |
668 | 0 | if (length != h_length) { |
669 | | // This condition almost certainly indicates a bad hint table or a bug in this code. |
670 | 0 | linearizationWarning( |
671 | 0 | "page length mismatch for page " + std::to_string(pageno) + ": hint table = " + |
672 | 0 | std::to_string(h_length) + "; computed length = " + std::to_string(length) + |
673 | 0 | " (offset = " + std::to_string(offset) + ")"); |
674 | 0 | } |
675 | |
|
676 | 0 | offset += h_length; |
677 | | |
678 | | // Translate shared object indexes to object numbers. |
679 | 0 | std::set<int> hint_shared; |
680 | 0 | std::set<int> computed_shared; |
681 | |
|
682 | 0 | if ((pageno == 0) && (he.nshared_objects > 0)) { |
683 | | // pdlin and Acrobat both do this even though the spec states clearly and unambiguously |
684 | | // that they should not. |
685 | 0 | linearizationWarning("page 0 has shared identifier entries"); |
686 | 0 | } |
687 | |
|
688 | 0 | for (size_t i = 0; i < toS(he.nshared_objects); ++i) { |
689 | 0 | int idx = he.shared_identifiers.at(i); |
690 | 0 | if (shared_idx_to_obj.count(idx) == 0) { |
691 | 0 | stopOnError("unable to get object for item in" |
692 | 0 | " shared objects hint table"); |
693 | 0 | } |
694 | 0 | hint_shared.insert(shared_idx_to_obj[idx]); |
695 | 0 | } |
696 | |
|
697 | 0 | for (size_t i = 0; i < toS(ce.nshared_objects); ++i) { |
698 | 0 | int idx = ce.shared_identifiers.at(i); |
699 | 0 | if (idx >= m->c_shared_object_data.nshared_total) { |
700 | 0 | stopOnError("index out of bounds for shared object hint table"); |
701 | 0 | } |
702 | 0 | int obj = m->c_shared_object_data.entries.at(toS(idx)).object; |
703 | 0 | computed_shared.insert(obj); |
704 | 0 | } |
705 | |
|
706 | 0 | for (int iter: hint_shared) { |
707 | 0 | if (!computed_shared.count(iter)) { |
708 | | // pdlin puts thumbnails here even though it shouldn't |
709 | 0 | linearizationWarning( |
710 | 0 | "page " + std::to_string(pageno) + ": shared object " + std::to_string(iter) + |
711 | 0 | ": in hint table but not computed list"); |
712 | 0 | } |
713 | 0 | } |
714 | |
|
715 | 0 | for (int iter: computed_shared) { |
716 | 0 | if (!hint_shared.count(iter)) { |
717 | | // Acrobat does not put some things including at least built-in fonts and procsets |
718 | | // here, at least in some cases. |
719 | 0 | linearizationWarning( |
720 | 0 | ("page " + std::to_string(pageno) + ": shared object " + std::to_string(iter) + |
721 | 0 | ": in computed list but not hint table")); |
722 | 0 | } |
723 | 0 | } |
724 | 0 | } |
725 | 0 | } |
726 | | |
727 | | void |
728 | | QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj) |
729 | 0 | { |
730 | | // Implementation note 125 says shared object groups always contain only one object. |
731 | | // Implementation note 128 says that Acrobat always nbits_nobjects to zero. Implementation note |
732 | | // 130 says that Acrobat does not support more than one shared object per group. These are all |
733 | | // consistent. |
734 | | |
735 | | // Implementation note 129 states that MD5 signatures are not implemented in Acrobat, so |
736 | | // signature_present must always be zero. |
737 | | |
738 | | // Implementation note 131 states that first_shared_obj and first_shared_offset have meaningless |
739 | | // values for single-page files. |
740 | | |
741 | | // Empirically, Acrobat and pdlin generate incorrect values for these whenever there are no |
742 | | // shared objects not referenced by the first page (i.e., nshared_total == nshared_first_page). |
743 | |
|
744 | 0 | HSharedObject& so = m->shared_object_hints; |
745 | 0 | if (so.nshared_total < so.nshared_first_page) { |
746 | 0 | linearizationWarning("shared object hint table: ntotal < nfirst_page"); |
747 | 0 | } else { |
748 | | // The first nshared_first_page objects are consecutive objects starting with the first page |
749 | | // object. The rest are consecutive starting from the first_shared_obj object. |
750 | 0 | int cur_object = pages.at(0).getObjectID(); |
751 | 0 | for (int i = 0; i < so.nshared_total; ++i) { |
752 | 0 | if (i == so.nshared_first_page) { |
753 | 0 | QTC::TC("qpdf", "QPDF lin check shared past first page"); |
754 | 0 | if (m->part8.empty()) { |
755 | 0 | linearizationWarning("part 8 is empty but nshared_total > " |
756 | 0 | "nshared_first_page"); |
757 | 0 | } else { |
758 | 0 | int obj = m->part8.at(0).getObjectID(); |
759 | 0 | if (obj != so.first_shared_obj) { |
760 | 0 | linearizationWarning( |
761 | 0 | "first shared object number mismatch: " |
762 | 0 | "hint table = " + |
763 | 0 | std::to_string(so.first_shared_obj) + |
764 | 0 | "; computed = " + std::to_string(obj)); |
765 | 0 | } |
766 | 0 | } |
767 | |
|
768 | 0 | cur_object = so.first_shared_obj; |
769 | |
|
770 | 0 | QPDFObjGen og(cur_object, 0); |
771 | 0 | if (m->xref_table.count(og) == 0) { |
772 | 0 | stopOnError("unknown object in shared object hint table"); |
773 | 0 | } |
774 | 0 | qpdf_offset_t offset = getLinearizationOffset(og); |
775 | 0 | qpdf_offset_t h_offset = adjusted_offset(so.first_shared_offset); |
776 | 0 | if (offset != h_offset) { |
777 | 0 | linearizationWarning( |
778 | 0 | "first shared object offset mismatch: hint table = " + |
779 | 0 | std::to_string(h_offset) + "; computed = " + std::to_string(offset)); |
780 | 0 | } |
781 | 0 | } |
782 | |
|
783 | 0 | idx_to_obj[i] = cur_object; |
784 | 0 | HSharedObjectEntry& se = so.entries.at(toS(i)); |
785 | 0 | int nobjects = se.nobjects_minus_one + 1; |
786 | 0 | int length = lengthNextN(cur_object, nobjects); |
787 | 0 | int h_length = so.min_group_length + se.delta_group_length; |
788 | 0 | if (length != h_length) { |
789 | 0 | linearizationWarning( |
790 | 0 | "shared object " + std::to_string(i) + " length mismatch: hint table = " + |
791 | 0 | std::to_string(h_length) + "; computed = " + std::to_string(length)); |
792 | 0 | } |
793 | 0 | cur_object += nobjects; |
794 | 0 | } |
795 | 0 | } |
796 | 0 | } |
797 | | |
798 | | void |
799 | | QPDF::checkHOutlines() |
800 | 0 | { |
801 | | // Empirically, Acrobat generates the correct value for the object number but incorrectly stores |
802 | | // the next object number's offset as the offset, at least when outlines appear in part 6. It |
803 | | // also generates an incorrect value for length (specifically, the length that would cover the |
804 | | // correct number of objects from the wrong starting place). pdlin appears to generate correct |
805 | | // values in those cases. |
806 | |
|
807 | 0 | if (m->c_outline_data.nobjects == m->outline_hints.nobjects) { |
808 | 0 | if (m->c_outline_data.nobjects == 0) { |
809 | 0 | return; |
810 | 0 | } |
811 | | |
812 | 0 | if (m->c_outline_data.first_object == m->outline_hints.first_object) { |
813 | | // Check length and offset. Acrobat gets these wrong. |
814 | 0 | QPDFObjectHandle outlines = getRoot().getKey("/Outlines"); |
815 | 0 | if (!outlines.isIndirect()) { |
816 | | // This case is not exercised in test suite since not permitted by the spec, but if |
817 | | // this does occur, the code below would fail. |
818 | 0 | linearizationWarning("/Outlines key of root dictionary is not indirect"); |
819 | 0 | return; |
820 | 0 | } |
821 | 0 | QPDFObjGen og(outlines.getObjGen()); |
822 | 0 | if (m->xref_table.count(og) == 0) { |
823 | 0 | stopOnError("unknown object in outlines hint table"); |
824 | 0 | } |
825 | 0 | qpdf_offset_t offset = getLinearizationOffset(og); |
826 | 0 | ObjUser ou(ObjUser::ou_root_key, "/Outlines"); |
827 | 0 | int length = toI(maxEnd(ou) - offset); |
828 | 0 | qpdf_offset_t table_offset = adjusted_offset(m->outline_hints.first_object_offset); |
829 | 0 | if (offset != table_offset) { |
830 | 0 | linearizationWarning( |
831 | 0 | "incorrect offset in outlines table: hint table = " + |
832 | 0 | std::to_string(table_offset) + "; computed = " + std::to_string(offset)); |
833 | 0 | } |
834 | 0 | int table_length = m->outline_hints.group_length; |
835 | 0 | if (length != table_length) { |
836 | 0 | linearizationWarning( |
837 | 0 | "incorrect length in outlines table: hint table = " + |
838 | 0 | std::to_string(table_length) + "; computed = " + std::to_string(length)); |
839 | 0 | } |
840 | 0 | } else { |
841 | 0 | linearizationWarning("incorrect first object number in outline " |
842 | 0 | "hints table."); |
843 | 0 | } |
844 | 0 | } else { |
845 | 0 | linearizationWarning("incorrect object count in outline hint table"); |
846 | 0 | } |
847 | 0 | } |
848 | | |
849 | | void |
850 | | QPDF::showLinearizationData() |
851 | 0 | { |
852 | 0 | try { |
853 | 0 | readLinearizationData(); |
854 | 0 | checkLinearizationInternal(); |
855 | 0 | dumpLinearizationDataInternal(); |
856 | 0 | } catch (QPDFExc& e) { |
857 | 0 | linearizationWarning(e.what()); |
858 | 0 | } |
859 | 0 | } |
860 | | |
861 | | void |
862 | | QPDF::dumpLinearizationDataInternal() |
863 | 0 | { |
864 | 0 | *m->log->getInfo() << m->file->getName() << ": linearization data:\n\n"; |
865 | |
|
866 | 0 | *m->log->getInfo() << "file_size: " << m->linp.file_size << "\n" |
867 | 0 | << "first_page_object: " << m->linp.first_page_object << "\n" |
868 | 0 | << "first_page_end: " << m->linp.first_page_end << "\n" |
869 | 0 | << "npages: " << m->linp.npages << "\n" |
870 | 0 | << "xref_zero_offset: " << m->linp.xref_zero_offset << "\n" |
871 | 0 | << "first_page: " << m->linp.first_page << "\n" |
872 | 0 | << "H_offset: " << m->linp.H_offset << "\n" |
873 | 0 | << "H_length: " << m->linp.H_length << "\n" |
874 | 0 | << "\n"; |
875 | |
|
876 | 0 | *m->log->getInfo() << "Page Offsets Hint Table\n\n"; |
877 | 0 | dumpHPageOffset(); |
878 | 0 | *m->log->getInfo() << "\nShared Objects Hint Table\n\n"; |
879 | 0 | dumpHSharedObject(); |
880 | |
|
881 | 0 | if (m->outline_hints.nobjects > 0) { |
882 | 0 | *m->log->getInfo() << "\nOutlines Hint Table\n\n"; |
883 | 0 | dumpHGeneric(m->outline_hints); |
884 | 0 | } |
885 | 0 | } |
886 | | |
887 | | qpdf_offset_t |
888 | | QPDF::adjusted_offset(qpdf_offset_t offset) |
889 | 0 | { |
890 | | // All offsets >= H_offset have to be increased by H_length since all hint table location values |
891 | | // disregard the hint table itself. |
892 | 0 | if (offset >= m->linp.H_offset) { |
893 | 0 | return offset + m->linp.H_length; |
894 | 0 | } |
895 | 0 | return offset; |
896 | 0 | } |
897 | | |
898 | | void |
899 | | QPDF::dumpHPageOffset() |
900 | 0 | { |
901 | 0 | HPageOffset& t = m->page_offset_hints; |
902 | 0 | *m->log->getInfo() << "min_nobjects: " << t.min_nobjects << "\n" |
903 | 0 | << "first_page_offset: " << adjusted_offset(t.first_page_offset) << "\n" |
904 | 0 | << "nbits_delta_nobjects: " << t.nbits_delta_nobjects << "\n" |
905 | 0 | << "min_page_length: " << t.min_page_length << "\n" |
906 | 0 | << "nbits_delta_page_length: " << t.nbits_delta_page_length << "\n" |
907 | 0 | << "min_content_offset: " << t.min_content_offset << "\n" |
908 | 0 | << "nbits_delta_content_offset: " << t.nbits_delta_content_offset << "\n" |
909 | 0 | << "min_content_length: " << t.min_content_length << "\n" |
910 | 0 | << "nbits_delta_content_length: " << t.nbits_delta_content_length << "\n" |
911 | 0 | << "nbits_nshared_objects: " << t.nbits_nshared_objects << "\n" |
912 | 0 | << "nbits_shared_identifier: " << t.nbits_shared_identifier << "\n" |
913 | 0 | << "nbits_shared_numerator: " << t.nbits_shared_numerator << "\n" |
914 | 0 | << "shared_denominator: " << t.shared_denominator << "\n"; |
915 | |
|
916 | 0 | for (size_t i1 = 0; i1 < toS(m->linp.npages); ++i1) { |
917 | 0 | HPageOffsetEntry& pe = t.entries.at(i1); |
918 | 0 | *m->log->getInfo() << "Page " << i1 << ":\n" |
919 | 0 | << " nobjects: " << pe.delta_nobjects + t.min_nobjects << "\n" |
920 | 0 | << " length: " << pe.delta_page_length + t.min_page_length |
921 | 0 | << "\n" |
922 | | // content offset is relative to page, not file |
923 | 0 | << " content_offset: " << pe.delta_content_offset + t.min_content_offset |
924 | 0 | << "\n" |
925 | 0 | << " content_length: " << pe.delta_content_length + t.min_content_length |
926 | 0 | << "\n" |
927 | 0 | << " nshared_objects: " << pe.nshared_objects << "\n"; |
928 | 0 | for (size_t i2 = 0; i2 < toS(pe.nshared_objects); ++i2) { |
929 | 0 | *m->log->getInfo() << " identifier " << i2 << ": " << pe.shared_identifiers.at(i2) |
930 | 0 | << "\n"; |
931 | 0 | *m->log->getInfo() << " numerator " << i2 << ": " << pe.shared_numerators.at(i2) |
932 | 0 | << "\n"; |
933 | 0 | } |
934 | 0 | } |
935 | 0 | } |
936 | | |
937 | | void |
938 | | QPDF::dumpHSharedObject() |
939 | 0 | { |
940 | 0 | HSharedObject& t = m->shared_object_hints; |
941 | 0 | *m->log->getInfo() << "first_shared_obj: " << t.first_shared_obj << "\n" |
942 | 0 | << "first_shared_offset: " << adjusted_offset(t.first_shared_offset) << "\n" |
943 | 0 | << "nshared_first_page: " << t.nshared_first_page << "\n" |
944 | 0 | << "nshared_total: " << t.nshared_total << "\n" |
945 | 0 | << "nbits_nobjects: " << t.nbits_nobjects << "\n" |
946 | 0 | << "min_group_length: " << t.min_group_length << "\n" |
947 | 0 | << "nbits_delta_group_length: " << t.nbits_delta_group_length << "\n"; |
948 | |
|
949 | 0 | for (size_t i = 0; i < toS(t.nshared_total); ++i) { |
950 | 0 | HSharedObjectEntry& se = t.entries.at(i); |
951 | 0 | *m->log->getInfo() << "Shared Object " << i << ":\n" |
952 | 0 | << " group length: " << se.delta_group_length + t.min_group_length |
953 | 0 | << "\n"; |
954 | | // PDF spec says signature present nobjects_minus_one are always 0, so print them only if |
955 | | // they have a non-zero value. |
956 | 0 | if (se.signature_present) { |
957 | 0 | *m->log->getInfo() << " signature present\n"; |
958 | 0 | } |
959 | 0 | if (se.nobjects_minus_one != 0) { |
960 | 0 | *m->log->getInfo() << " nobjects: " << se.nobjects_minus_one + 1 << "\n"; |
961 | 0 | } |
962 | 0 | } |
963 | 0 | } |
964 | | |
965 | | void |
966 | | QPDF::dumpHGeneric(HGeneric& t) |
967 | 0 | { |
968 | 0 | *m->log->getInfo() << "first_object: " << t.first_object << "\n" |
969 | 0 | << "first_object_offset: " << adjusted_offset(t.first_object_offset) << "\n" |
970 | 0 | << "nobjects: " << t.nobjects << "\n" |
971 | 0 | << "group_length: " << t.group_length << "\n"; |
972 | 0 | } |
973 | | |
974 | | template <typename T> |
975 | | void |
976 | | QPDF::calculateLinearizationData(T const& object_stream_data) |
977 | 3.62k | { |
978 | | // This function calculates the ordering of objects, divides them into the appropriate parts, |
979 | | // and computes some values for the linearization parameter dictionary and hint tables. The |
980 | | // file must be optimized (via calling optimize()) prior to calling this function. Note that |
981 | | // actual offsets and lengths are not computed here, but anything related to object ordering is. |
982 | | |
983 | 3.62k | if (m->object_to_obj_users.empty()) { |
984 | | // Note that we can't call optimize here because we don't know whether it should be called |
985 | | // with or without allow changes. |
986 | 0 | throw std::logic_error( |
987 | 0 | "INTERNAL ERROR: QPDF::calculateLinearizationData called before optimize()"); |
988 | 0 | } |
989 | | |
990 | | // Separate objects into the categories sufficient for us to determine which part of the |
991 | | // linearized file should contain the object. This categorization is useful for other purposes |
992 | | // as well. Part numbers refer to version 1.4 of the PDF spec. |
993 | | |
994 | | // Parts 1, 3, 5, 10, and 11 don't contain any objects from the original file (except the |
995 | | // trailer dictionary in part 11). |
996 | | |
997 | | // Part 4 is the document catalog (root) and the following root keys: /ViewerPreferences, |
998 | | // /PageMode, /Threads, /OpenAction, /AcroForm, /Encrypt. Note that Thread information |
999 | | // dictionaries are supposed to appear in part 9, but we are disregarding that recommendation |
1000 | | // for now. |
1001 | | |
1002 | | // Part 6 is the first page section. It includes all remaining objects referenced by the first |
1003 | | // page including shared objects but not including thumbnails. Additionally, if /PageMode is |
1004 | | // /Outlines, then information from /Outlines also appears here. |
1005 | | |
1006 | | // Part 7 contains remaining objects private to pages other than the first page. |
1007 | | |
1008 | | // Part 8 contains all remaining shared objects except those that are shared only within |
1009 | | // thumbnails. |
1010 | | |
1011 | | // Part 9 contains all remaining objects. |
1012 | | |
1013 | | // We sort objects into the following categories: |
1014 | | |
1015 | | // * open_document: part 4 |
1016 | | |
1017 | | // * first_page_private: part 6 |
1018 | | |
1019 | | // * first_page_shared: part 6 |
1020 | | |
1021 | | // * other_page_private: part 7 |
1022 | | |
1023 | | // * other_page_shared: part 8 |
1024 | | |
1025 | | // * thumbnail_private: part 9 |
1026 | | |
1027 | | // * thumbnail_shared: part 9 |
1028 | | |
1029 | | // * other: part 9 |
1030 | | |
1031 | | // * outlines: part 6 or 9 |
1032 | | |
1033 | 3.62k | m->part4.clear(); |
1034 | 3.62k | m->part6.clear(); |
1035 | 3.62k | m->part7.clear(); |
1036 | 3.62k | m->part8.clear(); |
1037 | 3.62k | m->part9.clear(); |
1038 | 3.62k | m->c_linp = LinParameters(); |
1039 | 3.62k | m->c_page_offset_data = CHPageOffset(); |
1040 | 3.62k | m->c_shared_object_data = CHSharedObject(); |
1041 | 3.62k | m->c_outline_data = HGeneric(); |
1042 | | |
1043 | 3.62k | QPDFObjectHandle root = getRoot(); |
1044 | 3.62k | bool outlines_in_first_page = false; |
1045 | 3.62k | QPDFObjectHandle pagemode = root.getKey("/PageMode"); |
1046 | 3.62k | QTC::TC("qpdf", "QPDF categorize pagemode present", pagemode.isName() ? 1 : 0); |
1047 | 3.62k | if (pagemode.isName()) { |
1048 | 682 | if (pagemode.getName() == "/UseOutlines") { |
1049 | 518 | if (root.hasKey("/Outlines")) { |
1050 | 375 | outlines_in_first_page = true; |
1051 | 375 | } else { |
1052 | 143 | QTC::TC("qpdf", "QPDF UseOutlines but no Outlines"); |
1053 | 143 | } |
1054 | 518 | } |
1055 | 682 | QTC::TC("qpdf", "QPDF categorize pagemode outlines", outlines_in_first_page ? 1 : 0); |
1056 | 682 | } |
1057 | | |
1058 | 3.62k | std::set<std::string> open_document_keys; |
1059 | 3.62k | open_document_keys.insert("/ViewerPreferences"); |
1060 | 3.62k | open_document_keys.insert("/PageMode"); |
1061 | 3.62k | open_document_keys.insert("/Threads"); |
1062 | 3.62k | open_document_keys.insert("/OpenAction"); |
1063 | 3.62k | open_document_keys.insert("/AcroForm"); |
1064 | | |
1065 | 3.62k | std::set<QPDFObjGen> lc_open_document; |
1066 | 3.62k | std::set<QPDFObjGen> lc_first_page_private; |
1067 | 3.62k | std::set<QPDFObjGen> lc_first_page_shared; |
1068 | 3.62k | std::set<QPDFObjGen> lc_other_page_private; |
1069 | 3.62k | std::set<QPDFObjGen> lc_other_page_shared; |
1070 | 3.62k | std::set<QPDFObjGen> lc_thumbnail_private; |
1071 | 3.62k | std::set<QPDFObjGen> lc_thumbnail_shared; |
1072 | 3.62k | std::set<QPDFObjGen> lc_other; |
1073 | 3.62k | std::set<QPDFObjGen> lc_outlines; |
1074 | 3.62k | std::set<QPDFObjGen> lc_root; |
1075 | | |
1076 | 91.5k | for (auto& oiter: m->object_to_obj_users) { |
1077 | 91.5k | QPDFObjGen const& og = oiter.first; |
1078 | 91.5k | std::set<ObjUser>& ous = oiter.second; |
1079 | | |
1080 | 91.5k | bool in_open_document = false; |
1081 | 91.5k | bool in_first_page = false; |
1082 | 91.5k | int other_pages = 0; |
1083 | 91.5k | int thumbs = 0; |
1084 | 91.5k | int others = 0; |
1085 | 91.5k | bool in_outlines = false; |
1086 | 91.5k | bool is_root = false; |
1087 | | |
1088 | 374k | for (auto const& ou: ous) { |
1089 | 374k | switch (ou.ou_type) { |
1090 | 180k | case ObjUser::ou_trailer_key: |
1091 | 180k | if (ou.key == "/Encrypt") { |
1092 | 170 | in_open_document = true; |
1093 | 180k | } else { |
1094 | 180k | ++others; |
1095 | 180k | } |
1096 | 180k | break; |
1097 | | |
1098 | 9.30k | case ObjUser::ou_thumb: |
1099 | 9.30k | ++thumbs; |
1100 | 9.30k | break; |
1101 | | |
1102 | 46.4k | case ObjUser::ou_root_key: |
1103 | 46.4k | if (open_document_keys.count(ou.key) > 0) { |
1104 | 15.2k | in_open_document = true; |
1105 | 31.1k | } else if (ou.key == "/Outlines") { |
1106 | 4.54k | in_outlines = true; |
1107 | 26.6k | } else { |
1108 | 26.6k | ++others; |
1109 | 26.6k | } |
1110 | 46.4k | break; |
1111 | | |
1112 | 134k | case ObjUser::ou_page: |
1113 | 134k | if (ou.pageno == 0) { |
1114 | 41.0k | in_first_page = true; |
1115 | 93.2k | } else { |
1116 | 93.2k | ++other_pages; |
1117 | 93.2k | } |
1118 | 134k | break; |
1119 | | |
1120 | 3.62k | case ObjUser::ou_root: |
1121 | 3.62k | is_root = true; |
1122 | 3.62k | break; |
1123 | | |
1124 | 0 | case ObjUser::ou_bad: |
1125 | 0 | stopOnError("INTERNAL ERROR: QPDF::calculateLinearizationData: " |
1126 | 0 | "invalid user type"); |
1127 | 0 | break; |
1128 | 374k | } |
1129 | 374k | } |
1130 | | |
1131 | 91.5k | if (is_root) { |
1132 | 3.62k | lc_root.insert(og); |
1133 | 87.9k | } else if (in_outlines) { |
1134 | 4.51k | lc_outlines.insert(og); |
1135 | 83.4k | } else if (in_open_document) { |
1136 | 15.3k | lc_open_document.insert(og); |
1137 | 68.0k | } else if ((in_first_page) && (others == 0) && (other_pages == 0) && (thumbs == 0)) { |
1138 | 15.2k | lc_first_page_private.insert(og); |
1139 | 52.8k | } else if (in_first_page) { |
1140 | 9.90k | lc_first_page_shared.insert(og); |
1141 | 42.9k | } else if ((other_pages == 1) && (others == 0) && (thumbs == 0)) { |
1142 | 23.6k | lc_other_page_private.insert(og); |
1143 | 23.6k | } else if (other_pages > 1) { |
1144 | 5.83k | lc_other_page_shared.insert(og); |
1145 | 13.5k | } else if ((thumbs == 1) && (others == 0)) { |
1146 | 2.06k | lc_thumbnail_private.insert(og); |
1147 | 11.4k | } else if (thumbs > 1) { |
1148 | 830 | lc_thumbnail_shared.insert(og); |
1149 | 10.6k | } else { |
1150 | 10.6k | lc_other.insert(og); |
1151 | 10.6k | } |
1152 | 91.5k | } |
1153 | | |
1154 | | // Generate ordering for objects in the output file. Sometimes we just dump right from a set |
1155 | | // into a vector. Rather than optimizing this by going straight into the vector, we'll leave |
1156 | | // these phases separate for now. That way, this section can be concerned only with ordering, |
1157 | | // and the above section can be considered only with categorization. Note that sets of |
1158 | | // QPDFObjGens are sorted by QPDFObjGen. In a linearized file, objects appear in sequence with |
1159 | | // the possible exception of hints tables which we won't see here anyway. That means that |
1160 | | // running calculateLinearizationData() on a linearized file should give results identical to |
1161 | | // the original file ordering. |
1162 | | |
1163 | | // We seem to traverse the page tree a lot in this code, but we can address this for a future |
1164 | | // code optimization if necessary. Premature optimization is the root of all evil. |
1165 | 3.62k | std::vector<QPDFObjectHandle> pages; |
1166 | 3.62k | { // local scope |
1167 | | // Map all page objects to the containing object stream. This should be a no-op in a |
1168 | | // properly linearized file. |
1169 | 15.2k | for (auto oh: getAllPages()) { |
1170 | 15.2k | pages.push_back(getUncompressedObject(oh, object_stream_data)); |
1171 | 15.2k | } |
1172 | 3.62k | } |
1173 | 3.62k | int npages = toI(pages.size()); |
1174 | | |
1175 | | // We will be initializing some values of the computed hint tables. Specifically, we can |
1176 | | // initialize any items that deal with object numbers or counts but not any items that deal with |
1177 | | // lengths or offsets. The code that writes linearized files will have to fill in these values |
1178 | | // during the first pass. The validation code can compute them relatively easily given the rest |
1179 | | // of the information. |
1180 | | |
1181 | | // npages is the size of the existing pages vector, which has been created by traversing the |
1182 | | // pages tree, and as such is a reasonable size. |
1183 | 3.62k | m->c_linp.npages = npages; |
1184 | 3.62k | m->c_page_offset_data.entries = std::vector<CHPageOffsetEntry>(toS(npages)); |
1185 | | |
1186 | | // Part 4: open document objects. We don't care about the order. |
1187 | | |
1188 | 3.62k | if (lc_root.size() != 1) { |
1189 | 0 | stopOnError("found other than one root while" |
1190 | 0 | " calculating linearization data"); |
1191 | 0 | } |
1192 | 3.62k | m->part4.push_back(getObject(*(lc_root.begin()))); |
1193 | 15.3k | for (auto const& og: lc_open_document) { |
1194 | 15.3k | m->part4.push_back(getObject(og)); |
1195 | 15.3k | } |
1196 | | |
1197 | | // Part 6: first page objects. Note: implementation note 124 states that Acrobat always treats |
1198 | | // page 0 as the first page for linearization regardless of /OpenAction. pdlin doesn't provide |
1199 | | // any option to set this and also disregards /OpenAction. We will do the same. |
1200 | | |
1201 | | // First, place the actual first page object itself. |
1202 | 3.62k | if (pages.empty()) { |
1203 | 20 | stopOnError("no pages found while calculating linearization data"); |
1204 | 20 | } |
1205 | 3.62k | QPDFObjGen first_page_og(pages.at(0).getObjGen()); |
1206 | 3.62k | if (!lc_first_page_private.count(first_page_og)) { |
1207 | 221 | stopOnError("INTERNAL ERROR: QPDF::calculateLinearizationData: first page " |
1208 | 221 | "object not in lc_first_page_private"); |
1209 | 221 | } |
1210 | 3.62k | lc_first_page_private.erase(first_page_og); |
1211 | 3.62k | m->c_linp.first_page_object = pages.at(0).getObjectID(); |
1212 | 3.62k | m->part6.push_back(pages.at(0)); |
1213 | | |
1214 | | // The PDF spec "recommends" an order for the rest of the objects, but we are going to disregard |
1215 | | // it except to the extent that it groups private and shared objects contiguously for the sake |
1216 | | // of hint tables. |
1217 | | |
1218 | 11.8k | for (auto const& og: lc_first_page_private) { |
1219 | 11.8k | m->part6.push_back(getObject(og)); |
1220 | 11.8k | } |
1221 | | |
1222 | 9.15k | for (auto const& og: lc_first_page_shared) { |
1223 | 9.15k | m->part6.push_back(getObject(og)); |
1224 | 9.15k | } |
1225 | | |
1226 | | // Place the outline dictionary if it goes in the first page section. |
1227 | 3.62k | if (outlines_in_first_page) { |
1228 | 370 | pushOutlinesToPart(m->part6, lc_outlines, object_stream_data); |
1229 | 370 | } |
1230 | | |
1231 | | // Fill in page offset hint table information for the first page. The PDF spec says that |
1232 | | // nshared_objects should be zero for the first page. pdlin does not appear to obey this, but |
1233 | | // it fills in garbage values for all the shared object identifiers on the first page. |
1234 | | |
1235 | 3.62k | m->c_page_offset_data.entries.at(0).nobjects = toI(m->part6.size()); |
1236 | | |
1237 | | // Part 7: other pages' private objects |
1238 | | |
1239 | | // For each page in order: |
1240 | 14.3k | for (size_t i = 1; i < toS(npages); ++i) { |
1241 | | // Place this page's page object |
1242 | | |
1243 | 10.7k | QPDFObjGen page_og(pages.at(i).getObjGen()); |
1244 | 10.7k | if (!lc_other_page_private.count(page_og)) { |
1245 | 111 | stopOnError( |
1246 | 111 | "INTERNAL ERROR: " |
1247 | 111 | "QPDF::calculateLinearizationData: page object for page " + |
1248 | 111 | std::to_string(i) + " not in lc_other_page_private"); |
1249 | 111 | } |
1250 | 10.7k | lc_other_page_private.erase(page_og); |
1251 | 10.7k | m->part7.push_back(pages.at(i)); |
1252 | | |
1253 | | // Place all non-shared objects referenced by this page, updating the page object count for |
1254 | | // the hint table. |
1255 | | |
1256 | 10.7k | m->c_page_offset_data.entries.at(i).nobjects = 1; |
1257 | | |
1258 | 10.7k | ObjUser ou(ObjUser::ou_page, toI(i)); |
1259 | 10.7k | if (m->obj_user_to_objects.count(ou) == 0) { |
1260 | 0 | stopOnError("found unreferenced page while" |
1261 | 0 | " calculating linearization data"); |
1262 | 0 | } |
1263 | 88.1k | for (auto const& og: m->obj_user_to_objects[ou]) { |
1264 | 88.1k | if (lc_other_page_private.count(og)) { |
1265 | 11.8k | lc_other_page_private.erase(og); |
1266 | 11.8k | m->part7.push_back(getObject(og)); |
1267 | 11.8k | ++m->c_page_offset_data.entries.at(i).nobjects; |
1268 | 11.8k | } |
1269 | 88.1k | } |
1270 | 10.7k | } |
1271 | | // That should have covered all part7 objects. |
1272 | 3.62k | if (!lc_other_page_private.empty()) { |
1273 | 0 | stopOnError("INTERNAL ERROR:" |
1274 | 0 | " QPDF::calculateLinearizationData: lc_other_page_private is " |
1275 | 0 | "not empty after generation of part7"); |
1276 | 0 | } |
1277 | | |
1278 | | // Part 8: other pages' shared objects |
1279 | | |
1280 | | // Order is unimportant. |
1281 | 5.35k | for (auto const& og: lc_other_page_shared) { |
1282 | 5.35k | m->part8.push_back(getObject(og)); |
1283 | 5.35k | } |
1284 | | |
1285 | | // Part 9: other objects |
1286 | | |
1287 | | // The PDF specification makes recommendations on ordering here. We follow them only to a |
1288 | | // limited extent. Specifically, we put the pages tree first, then private thumbnail objects in |
1289 | | // page order, then shared thumbnail objects, and then outlines (unless in part 6). After that, |
1290 | | // we throw all remaining objects in arbitrary order. |
1291 | | |
1292 | | // Place the pages tree. |
1293 | 3.62k | std::set<QPDFObjGen> pages_ogs = |
1294 | 3.62k | m->obj_user_to_objects[ObjUser(ObjUser::ou_root_key, "/Pages")]; |
1295 | 3.62k | if (pages_ogs.empty()) { |
1296 | 12 | stopOnError("found empty pages tree while" |
1297 | 12 | " calculating linearization data"); |
1298 | 12 | } |
1299 | 5.45k | for (auto const& og: pages_ogs) { |
1300 | 5.45k | if (lc_other.count(og)) { |
1301 | 2.51k | lc_other.erase(og); |
1302 | 2.51k | m->part9.push_back(getObject(og)); |
1303 | 2.51k | } |
1304 | 5.45k | } |
1305 | | |
1306 | | // Place private thumbnail images in page order. Slightly more information would be required if |
1307 | | // we were going to bother with thumbnail hint tables. |
1308 | 17.3k | for (size_t i = 0; i < toS(npages); ++i) { |
1309 | 13.7k | QPDFObjectHandle thumb = pages.at(i).getKey("/Thumb"); |
1310 | 13.7k | thumb = getUncompressedObject(thumb, object_stream_data); |
1311 | 13.7k | if (!thumb.isNull()) { |
1312 | | // Output the thumbnail itself |
1313 | 1.66k | QPDFObjGen thumb_og(thumb.getObjGen()); |
1314 | 1.66k | if (lc_thumbnail_private.count(thumb_og)) { |
1315 | 1.47k | lc_thumbnail_private.erase(thumb_og); |
1316 | 1.47k | m->part9.push_back(thumb); |
1317 | 1.47k | } else { |
1318 | | // No internal error this time...there's nothing to stop this object from having |
1319 | | // been referred to somewhere else outside of a page's /Thumb, and if it had been, |
1320 | | // there's nothing to prevent it from having been in some set other than |
1321 | | // lc_thumbnail_private. |
1322 | 190 | } |
1323 | 1.66k | std::set<QPDFObjGen>& ogs = m->obj_user_to_objects[ObjUser(ObjUser::ou_thumb, toI(i))]; |
1324 | 8.62k | for (auto const& og: ogs) { |
1325 | 8.62k | if (lc_thumbnail_private.count(og)) { |
1326 | 437 | lc_thumbnail_private.erase(og); |
1327 | 437 | m->part9.push_back(getObject(og)); |
1328 | 437 | } |
1329 | 8.62k | } |
1330 | 1.66k | } |
1331 | 13.7k | } |
1332 | 3.62k | if (!lc_thumbnail_private.empty()) { |
1333 | 7 | stopOnError("INTERNAL ERROR: QPDF::calculateLinearizationData: lc_thumbnail_private not " |
1334 | 7 | "empty after placing thumbnails"); |
1335 | 7 | } |
1336 | | |
1337 | | // Place shared thumbnail objects |
1338 | 3.62k | for (auto const& og: lc_thumbnail_shared) { |
1339 | 794 | m->part9.push_back(getObject(og)); |
1340 | 794 | } |
1341 | | |
1342 | | // Place outlines unless in first page |
1343 | 3.62k | if (!outlines_in_first_page) { |
1344 | 2.89k | pushOutlinesToPart(m->part9, lc_outlines, object_stream_data); |
1345 | 2.89k | } |
1346 | | |
1347 | | // Place all remaining objects |
1348 | 7.22k | for (auto const& og: lc_other) { |
1349 | 7.22k | m->part9.push_back(getObject(og)); |
1350 | 7.22k | } |
1351 | | |
1352 | | // Make sure we got everything exactly once. |
1353 | | |
1354 | 3.62k | size_t num_placed = |
1355 | 3.62k | m->part4.size() + m->part6.size() + m->part7.size() + m->part8.size() + m->part9.size(); |
1356 | 3.62k | size_t num_wanted = m->object_to_obj_users.size(); |
1357 | 3.62k | if (num_placed != num_wanted) { |
1358 | 47 | stopOnError( |
1359 | 47 | "INTERNAL ERROR: QPDF::calculateLinearizationData: wrong " |
1360 | 47 | "number of objects placed (num_placed = " + |
1361 | 47 | std::to_string(num_placed) + "; number of objects: " + std::to_string(num_wanted)); |
1362 | 47 | } |
1363 | | |
1364 | | // Calculate shared object hint table information including references to shared objects from |
1365 | | // page offset hint data. |
1366 | | |
1367 | | // The shared object hint table consists of all part 6 (whether shared or not) in order followed |
1368 | | // by all part 8 objects in order. Add the objects to shared object data keeping a map of |
1369 | | // object number to index. Then populate the shared object information for the pages. |
1370 | | |
1371 | | // Note that two objects never have the same object number, so we can map from object number |
1372 | | // only without regards to generation. |
1373 | 3.62k | std::map<int, int> obj_to_index; |
1374 | | |
1375 | 3.62k | m->c_shared_object_data.nshared_first_page = toI(m->part6.size()); |
1376 | 3.62k | m->c_shared_object_data.nshared_total = |
1377 | 3.62k | m->c_shared_object_data.nshared_first_page + toI(m->part8.size()); |
1378 | | |
1379 | 3.62k | std::vector<CHSharedObjectEntry>& shared = m->c_shared_object_data.entries; |
1380 | 26.6k | for (auto& oh: m->part6) { |
1381 | 26.6k | int obj = oh.getObjectID(); |
1382 | 26.6k | obj_to_index[obj] = toI(shared.size()); |
1383 | 26.6k | shared.emplace_back(obj); |
1384 | 26.6k | } |
1385 | 3.62k | QTC::TC("qpdf", "QPDF lin part 8 empty", m->part8.empty() ? 1 : 0); |
1386 | 3.62k | if (!m->part8.empty()) { |
1387 | 313 | m->c_shared_object_data.first_shared_obj = m->part8.at(0).getObjectID(); |
1388 | 5.34k | for (auto& oh: m->part8) { |
1389 | 5.34k | int obj = oh.getObjectID(); |
1390 | 5.34k | obj_to_index[obj] = toI(shared.size()); |
1391 | 5.34k | shared.emplace_back(obj); |
1392 | 5.34k | } |
1393 | 313 | } |
1394 | 3.62k | if (static_cast<size_t>(m->c_shared_object_data.nshared_total) != |
1395 | 3.62k | m->c_shared_object_data.entries.size()) { |
1396 | 0 | stopOnError("shared object hint table has wrong number of entries"); |
1397 | 0 | } |
1398 | | |
1399 | | // Now compute the list of shared objects for each page after the first page. |
1400 | | |
1401 | 14.0k | for (size_t i = 1; i < toS(npages); ++i) { |
1402 | 10.4k | CHPageOffsetEntry& pe = m->c_page_offset_data.entries.at(i); |
1403 | 10.4k | ObjUser ou(ObjUser::ou_page, toI(i)); |
1404 | 10.4k | if (m->obj_user_to_objects.count(ou) == 0) { |
1405 | 0 | stopOnError("found unreferenced page while" |
1406 | 0 | " calculating linearization data"); |
1407 | 0 | } |
1408 | 87.6k | for (auto const& og: m->obj_user_to_objects[ou]) { |
1409 | 87.6k | if ((m->object_to_obj_users[og].size() > 1) && (obj_to_index.count(og.getObj()) > 0)) { |
1410 | 50.7k | int idx = obj_to_index[og.getObj()]; |
1411 | 50.7k | ++pe.nshared_objects; |
1412 | 50.7k | pe.shared_identifiers.push_back(idx); |
1413 | 50.7k | } |
1414 | 87.6k | } |
1415 | 10.4k | } |
1416 | 3.62k | } Unexecuted instantiation: void QPDF::calculateLinearizationData<std::__1::map<int, int, std::__1::less<int>, std::__1::allocator<std::__1::pair<int const, int> > > >(std::__1::map<int, int, std::__1::less<int>, std::__1::allocator<std::__1::pair<int const, int> > > const&) void QPDF::calculateLinearizationData<QPDFWriter::ObjTable>(QPDFWriter::ObjTable const&) Line | Count | Source | 977 | 3.62k | { | 978 | | // This function calculates the ordering of objects, divides them into the appropriate parts, | 979 | | // and computes some values for the linearization parameter dictionary and hint tables. The | 980 | | // file must be optimized (via calling optimize()) prior to calling this function. Note that | 981 | | // actual offsets and lengths are not computed here, but anything related to object ordering is. | 982 | | | 983 | 3.62k | if (m->object_to_obj_users.empty()) { | 984 | | // Note that we can't call optimize here because we don't know whether it should be called | 985 | | // with or without allow changes. | 986 | 0 | throw std::logic_error( | 987 | 0 | "INTERNAL ERROR: QPDF::calculateLinearizationData called before optimize()"); | 988 | 0 | } | 989 | | | 990 | | // Separate objects into the categories sufficient for us to determine which part of the | 991 | | // linearized file should contain the object. This categorization is useful for other purposes | 992 | | // as well. Part numbers refer to version 1.4 of the PDF spec. | 993 | | | 994 | | // Parts 1, 3, 5, 10, and 11 don't contain any objects from the original file (except the | 995 | | // trailer dictionary in part 11). | 996 | | | 997 | | // Part 4 is the document catalog (root) and the following root keys: /ViewerPreferences, | 998 | | // /PageMode, /Threads, /OpenAction, /AcroForm, /Encrypt. Note that Thread information | 999 | | // dictionaries are supposed to appear in part 9, but we are disregarding that recommendation | 1000 | | // for now. | 1001 | | | 1002 | | // Part 6 is the first page section. It includes all remaining objects referenced by the first | 1003 | | // page including shared objects but not including thumbnails. Additionally, if /PageMode is | 1004 | | // /Outlines, then information from /Outlines also appears here. | 1005 | | | 1006 | | // Part 7 contains remaining objects private to pages other than the first page. | 1007 | | | 1008 | | // Part 8 contains all remaining shared objects except those that are shared only within | 1009 | | // thumbnails. | 1010 | | | 1011 | | // Part 9 contains all remaining objects. | 1012 | | | 1013 | | // We sort objects into the following categories: | 1014 | | | 1015 | | // * open_document: part 4 | 1016 | | | 1017 | | // * first_page_private: part 6 | 1018 | | | 1019 | | // * first_page_shared: part 6 | 1020 | | | 1021 | | // * other_page_private: part 7 | 1022 | | | 1023 | | // * other_page_shared: part 8 | 1024 | | | 1025 | | // * thumbnail_private: part 9 | 1026 | | | 1027 | | // * thumbnail_shared: part 9 | 1028 | | | 1029 | | // * other: part 9 | 1030 | | | 1031 | | // * outlines: part 6 or 9 | 1032 | | | 1033 | 3.62k | m->part4.clear(); | 1034 | 3.62k | m->part6.clear(); | 1035 | 3.62k | m->part7.clear(); | 1036 | 3.62k | m->part8.clear(); | 1037 | 3.62k | m->part9.clear(); | 1038 | 3.62k | m->c_linp = LinParameters(); | 1039 | 3.62k | m->c_page_offset_data = CHPageOffset(); | 1040 | 3.62k | m->c_shared_object_data = CHSharedObject(); | 1041 | 3.62k | m->c_outline_data = HGeneric(); | 1042 | | | 1043 | 3.62k | QPDFObjectHandle root = getRoot(); | 1044 | 3.62k | bool outlines_in_first_page = false; | 1045 | 3.62k | QPDFObjectHandle pagemode = root.getKey("/PageMode"); | 1046 | 3.62k | QTC::TC("qpdf", "QPDF categorize pagemode present", pagemode.isName() ? 1 : 0); | 1047 | 3.62k | if (pagemode.isName()) { | 1048 | 682 | if (pagemode.getName() == "/UseOutlines") { | 1049 | 518 | if (root.hasKey("/Outlines")) { | 1050 | 375 | outlines_in_first_page = true; | 1051 | 375 | } else { | 1052 | 143 | QTC::TC("qpdf", "QPDF UseOutlines but no Outlines"); | 1053 | 143 | } | 1054 | 518 | } | 1055 | 682 | QTC::TC("qpdf", "QPDF categorize pagemode outlines", outlines_in_first_page ? 1 : 0); | 1056 | 682 | } | 1057 | | | 1058 | 3.62k | std::set<std::string> open_document_keys; | 1059 | 3.62k | open_document_keys.insert("/ViewerPreferences"); | 1060 | 3.62k | open_document_keys.insert("/PageMode"); | 1061 | 3.62k | open_document_keys.insert("/Threads"); | 1062 | 3.62k | open_document_keys.insert("/OpenAction"); | 1063 | 3.62k | open_document_keys.insert("/AcroForm"); | 1064 | | | 1065 | 3.62k | std::set<QPDFObjGen> lc_open_document; | 1066 | 3.62k | std::set<QPDFObjGen> lc_first_page_private; | 1067 | 3.62k | std::set<QPDFObjGen> lc_first_page_shared; | 1068 | 3.62k | std::set<QPDFObjGen> lc_other_page_private; | 1069 | 3.62k | std::set<QPDFObjGen> lc_other_page_shared; | 1070 | 3.62k | std::set<QPDFObjGen> lc_thumbnail_private; | 1071 | 3.62k | std::set<QPDFObjGen> lc_thumbnail_shared; | 1072 | 3.62k | std::set<QPDFObjGen> lc_other; | 1073 | 3.62k | std::set<QPDFObjGen> lc_outlines; | 1074 | 3.62k | std::set<QPDFObjGen> lc_root; | 1075 | | | 1076 | 91.5k | for (auto& oiter: m->object_to_obj_users) { | 1077 | 91.5k | QPDFObjGen const& og = oiter.first; | 1078 | 91.5k | std::set<ObjUser>& ous = oiter.second; | 1079 | | | 1080 | 91.5k | bool in_open_document = false; | 1081 | 91.5k | bool in_first_page = false; | 1082 | 91.5k | int other_pages = 0; | 1083 | 91.5k | int thumbs = 0; | 1084 | 91.5k | int others = 0; | 1085 | 91.5k | bool in_outlines = false; | 1086 | 91.5k | bool is_root = false; | 1087 | | | 1088 | 374k | for (auto const& ou: ous) { | 1089 | 374k | switch (ou.ou_type) { | 1090 | 180k | case ObjUser::ou_trailer_key: | 1091 | 180k | if (ou.key == "/Encrypt") { | 1092 | 170 | in_open_document = true; | 1093 | 180k | } else { | 1094 | 180k | ++others; | 1095 | 180k | } | 1096 | 180k | break; | 1097 | | | 1098 | 9.30k | case ObjUser::ou_thumb: | 1099 | 9.30k | ++thumbs; | 1100 | 9.30k | break; | 1101 | | | 1102 | 46.4k | case ObjUser::ou_root_key: | 1103 | 46.4k | if (open_document_keys.count(ou.key) > 0) { | 1104 | 15.2k | in_open_document = true; | 1105 | 31.1k | } else if (ou.key == "/Outlines") { | 1106 | 4.54k | in_outlines = true; | 1107 | 26.6k | } else { | 1108 | 26.6k | ++others; | 1109 | 26.6k | } | 1110 | 46.4k | break; | 1111 | | | 1112 | 134k | case ObjUser::ou_page: | 1113 | 134k | if (ou.pageno == 0) { | 1114 | 41.0k | in_first_page = true; | 1115 | 93.2k | } else { | 1116 | 93.2k | ++other_pages; | 1117 | 93.2k | } | 1118 | 134k | break; | 1119 | | | 1120 | 3.62k | case ObjUser::ou_root: | 1121 | 3.62k | is_root = true; | 1122 | 3.62k | break; | 1123 | | | 1124 | 0 | case ObjUser::ou_bad: | 1125 | 0 | stopOnError("INTERNAL ERROR: QPDF::calculateLinearizationData: " | 1126 | 0 | "invalid user type"); | 1127 | 0 | break; | 1128 | 374k | } | 1129 | 374k | } | 1130 | | | 1131 | 91.5k | if (is_root) { | 1132 | 3.62k | lc_root.insert(og); | 1133 | 87.9k | } else if (in_outlines) { | 1134 | 4.51k | lc_outlines.insert(og); | 1135 | 83.4k | } else if (in_open_document) { | 1136 | 15.3k | lc_open_document.insert(og); | 1137 | 68.0k | } else if ((in_first_page) && (others == 0) && (other_pages == 0) && (thumbs == 0)) { | 1138 | 15.2k | lc_first_page_private.insert(og); | 1139 | 52.8k | } else if (in_first_page) { | 1140 | 9.90k | lc_first_page_shared.insert(og); | 1141 | 42.9k | } else if ((other_pages == 1) && (others == 0) && (thumbs == 0)) { | 1142 | 23.6k | lc_other_page_private.insert(og); | 1143 | 23.6k | } else if (other_pages > 1) { | 1144 | 5.83k | lc_other_page_shared.insert(og); | 1145 | 13.5k | } else if ((thumbs == 1) && (others == 0)) { | 1146 | 2.06k | lc_thumbnail_private.insert(og); | 1147 | 11.4k | } else if (thumbs > 1) { | 1148 | 830 | lc_thumbnail_shared.insert(og); | 1149 | 10.6k | } else { | 1150 | 10.6k | lc_other.insert(og); | 1151 | 10.6k | } | 1152 | 91.5k | } | 1153 | | | 1154 | | // Generate ordering for objects in the output file. Sometimes we just dump right from a set | 1155 | | // into a vector. Rather than optimizing this by going straight into the vector, we'll leave | 1156 | | // these phases separate for now. That way, this section can be concerned only with ordering, | 1157 | | // and the above section can be considered only with categorization. Note that sets of | 1158 | | // QPDFObjGens are sorted by QPDFObjGen. In a linearized file, objects appear in sequence with | 1159 | | // the possible exception of hints tables which we won't see here anyway. That means that | 1160 | | // running calculateLinearizationData() on a linearized file should give results identical to | 1161 | | // the original file ordering. | 1162 | | | 1163 | | // We seem to traverse the page tree a lot in this code, but we can address this for a future | 1164 | | // code optimization if necessary. Premature optimization is the root of all evil. | 1165 | 3.62k | std::vector<QPDFObjectHandle> pages; | 1166 | 3.62k | { // local scope | 1167 | | // Map all page objects to the containing object stream. This should be a no-op in a | 1168 | | // properly linearized file. | 1169 | 15.2k | for (auto oh: getAllPages()) { | 1170 | 15.2k | pages.push_back(getUncompressedObject(oh, object_stream_data)); | 1171 | 15.2k | } | 1172 | 3.62k | } | 1173 | 3.62k | int npages = toI(pages.size()); | 1174 | | | 1175 | | // We will be initializing some values of the computed hint tables. Specifically, we can | 1176 | | // initialize any items that deal with object numbers or counts but not any items that deal with | 1177 | | // lengths or offsets. The code that writes linearized files will have to fill in these values | 1178 | | // during the first pass. The validation code can compute them relatively easily given the rest | 1179 | | // of the information. | 1180 | | | 1181 | | // npages is the size of the existing pages vector, which has been created by traversing the | 1182 | | // pages tree, and as such is a reasonable size. | 1183 | 3.62k | m->c_linp.npages = npages; | 1184 | 3.62k | m->c_page_offset_data.entries = std::vector<CHPageOffsetEntry>(toS(npages)); | 1185 | | | 1186 | | // Part 4: open document objects. We don't care about the order. | 1187 | | | 1188 | 3.62k | if (lc_root.size() != 1) { | 1189 | 0 | stopOnError("found other than one root while" | 1190 | 0 | " calculating linearization data"); | 1191 | 0 | } | 1192 | 3.62k | m->part4.push_back(getObject(*(lc_root.begin()))); | 1193 | 15.3k | for (auto const& og: lc_open_document) { | 1194 | 15.3k | m->part4.push_back(getObject(og)); | 1195 | 15.3k | } | 1196 | | | 1197 | | // Part 6: first page objects. Note: implementation note 124 states that Acrobat always treats | 1198 | | // page 0 as the first page for linearization regardless of /OpenAction. pdlin doesn't provide | 1199 | | // any option to set this and also disregards /OpenAction. We will do the same. | 1200 | | | 1201 | | // First, place the actual first page object itself. | 1202 | 3.62k | if (pages.empty()) { | 1203 | 20 | stopOnError("no pages found while calculating linearization data"); | 1204 | 20 | } | 1205 | 3.62k | QPDFObjGen first_page_og(pages.at(0).getObjGen()); | 1206 | 3.62k | if (!lc_first_page_private.count(first_page_og)) { | 1207 | 221 | stopOnError("INTERNAL ERROR: QPDF::calculateLinearizationData: first page " | 1208 | 221 | "object not in lc_first_page_private"); | 1209 | 221 | } | 1210 | 3.62k | lc_first_page_private.erase(first_page_og); | 1211 | 3.62k | m->c_linp.first_page_object = pages.at(0).getObjectID(); | 1212 | 3.62k | m->part6.push_back(pages.at(0)); | 1213 | | | 1214 | | // The PDF spec "recommends" an order for the rest of the objects, but we are going to disregard | 1215 | | // it except to the extent that it groups private and shared objects contiguously for the sake | 1216 | | // of hint tables. | 1217 | | | 1218 | 11.8k | for (auto const& og: lc_first_page_private) { | 1219 | 11.8k | m->part6.push_back(getObject(og)); | 1220 | 11.8k | } | 1221 | | | 1222 | 9.15k | for (auto const& og: lc_first_page_shared) { | 1223 | 9.15k | m->part6.push_back(getObject(og)); | 1224 | 9.15k | } | 1225 | | | 1226 | | // Place the outline dictionary if it goes in the first page section. | 1227 | 3.62k | if (outlines_in_first_page) { | 1228 | 370 | pushOutlinesToPart(m->part6, lc_outlines, object_stream_data); | 1229 | 370 | } | 1230 | | | 1231 | | // Fill in page offset hint table information for the first page. The PDF spec says that | 1232 | | // nshared_objects should be zero for the first page. pdlin does not appear to obey this, but | 1233 | | // it fills in garbage values for all the shared object identifiers on the first page. | 1234 | | | 1235 | 3.62k | m->c_page_offset_data.entries.at(0).nobjects = toI(m->part6.size()); | 1236 | | | 1237 | | // Part 7: other pages' private objects | 1238 | | | 1239 | | // For each page in order: | 1240 | 14.3k | for (size_t i = 1; i < toS(npages); ++i) { | 1241 | | // Place this page's page object | 1242 | | | 1243 | 10.7k | QPDFObjGen page_og(pages.at(i).getObjGen()); | 1244 | 10.7k | if (!lc_other_page_private.count(page_og)) { | 1245 | 111 | stopOnError( | 1246 | 111 | "INTERNAL ERROR: " | 1247 | 111 | "QPDF::calculateLinearizationData: page object for page " + | 1248 | 111 | std::to_string(i) + " not in lc_other_page_private"); | 1249 | 111 | } | 1250 | 10.7k | lc_other_page_private.erase(page_og); | 1251 | 10.7k | m->part7.push_back(pages.at(i)); | 1252 | | | 1253 | | // Place all non-shared objects referenced by this page, updating the page object count for | 1254 | | // the hint table. | 1255 | | | 1256 | 10.7k | m->c_page_offset_data.entries.at(i).nobjects = 1; | 1257 | | | 1258 | 10.7k | ObjUser ou(ObjUser::ou_page, toI(i)); | 1259 | 10.7k | if (m->obj_user_to_objects.count(ou) == 0) { | 1260 | 0 | stopOnError("found unreferenced page while" | 1261 | 0 | " calculating linearization data"); | 1262 | 0 | } | 1263 | 88.1k | for (auto const& og: m->obj_user_to_objects[ou]) { | 1264 | 88.1k | if (lc_other_page_private.count(og)) { | 1265 | 11.8k | lc_other_page_private.erase(og); | 1266 | 11.8k | m->part7.push_back(getObject(og)); | 1267 | 11.8k | ++m->c_page_offset_data.entries.at(i).nobjects; | 1268 | 11.8k | } | 1269 | 88.1k | } | 1270 | 10.7k | } | 1271 | | // That should have covered all part7 objects. | 1272 | 3.62k | if (!lc_other_page_private.empty()) { | 1273 | 0 | stopOnError("INTERNAL ERROR:" | 1274 | 0 | " QPDF::calculateLinearizationData: lc_other_page_private is " | 1275 | 0 | "not empty after generation of part7"); | 1276 | 0 | } | 1277 | | | 1278 | | // Part 8: other pages' shared objects | 1279 | | | 1280 | | // Order is unimportant. | 1281 | 5.35k | for (auto const& og: lc_other_page_shared) { | 1282 | 5.35k | m->part8.push_back(getObject(og)); | 1283 | 5.35k | } | 1284 | | | 1285 | | // Part 9: other objects | 1286 | | | 1287 | | // The PDF specification makes recommendations on ordering here. We follow them only to a | 1288 | | // limited extent. Specifically, we put the pages tree first, then private thumbnail objects in | 1289 | | // page order, then shared thumbnail objects, and then outlines (unless in part 6). After that, | 1290 | | // we throw all remaining objects in arbitrary order. | 1291 | | | 1292 | | // Place the pages tree. | 1293 | 3.62k | std::set<QPDFObjGen> pages_ogs = | 1294 | 3.62k | m->obj_user_to_objects[ObjUser(ObjUser::ou_root_key, "/Pages")]; | 1295 | 3.62k | if (pages_ogs.empty()) { | 1296 | 12 | stopOnError("found empty pages tree while" | 1297 | 12 | " calculating linearization data"); | 1298 | 12 | } | 1299 | 5.45k | for (auto const& og: pages_ogs) { | 1300 | 5.45k | if (lc_other.count(og)) { | 1301 | 2.51k | lc_other.erase(og); | 1302 | 2.51k | m->part9.push_back(getObject(og)); | 1303 | 2.51k | } | 1304 | 5.45k | } | 1305 | | | 1306 | | // Place private thumbnail images in page order. Slightly more information would be required if | 1307 | | // we were going to bother with thumbnail hint tables. | 1308 | 17.3k | for (size_t i = 0; i < toS(npages); ++i) { | 1309 | 13.7k | QPDFObjectHandle thumb = pages.at(i).getKey("/Thumb"); | 1310 | 13.7k | thumb = getUncompressedObject(thumb, object_stream_data); | 1311 | 13.7k | if (!thumb.isNull()) { | 1312 | | // Output the thumbnail itself | 1313 | 1.66k | QPDFObjGen thumb_og(thumb.getObjGen()); | 1314 | 1.66k | if (lc_thumbnail_private.count(thumb_og)) { | 1315 | 1.47k | lc_thumbnail_private.erase(thumb_og); | 1316 | 1.47k | m->part9.push_back(thumb); | 1317 | 1.47k | } else { | 1318 | | // No internal error this time...there's nothing to stop this object from having | 1319 | | // been referred to somewhere else outside of a page's /Thumb, and if it had been, | 1320 | | // there's nothing to prevent it from having been in some set other than | 1321 | | // lc_thumbnail_private. | 1322 | 190 | } | 1323 | 1.66k | std::set<QPDFObjGen>& ogs = m->obj_user_to_objects[ObjUser(ObjUser::ou_thumb, toI(i))]; | 1324 | 8.62k | for (auto const& og: ogs) { | 1325 | 8.62k | if (lc_thumbnail_private.count(og)) { | 1326 | 437 | lc_thumbnail_private.erase(og); | 1327 | 437 | m->part9.push_back(getObject(og)); | 1328 | 437 | } | 1329 | 8.62k | } | 1330 | 1.66k | } | 1331 | 13.7k | } | 1332 | 3.62k | if (!lc_thumbnail_private.empty()) { | 1333 | 7 | stopOnError("INTERNAL ERROR: QPDF::calculateLinearizationData: lc_thumbnail_private not " | 1334 | 7 | "empty after placing thumbnails"); | 1335 | 7 | } | 1336 | | | 1337 | | // Place shared thumbnail objects | 1338 | 3.62k | for (auto const& og: lc_thumbnail_shared) { | 1339 | 794 | m->part9.push_back(getObject(og)); | 1340 | 794 | } | 1341 | | | 1342 | | // Place outlines unless in first page | 1343 | 3.62k | if (!outlines_in_first_page) { | 1344 | 2.89k | pushOutlinesToPart(m->part9, lc_outlines, object_stream_data); | 1345 | 2.89k | } | 1346 | | | 1347 | | // Place all remaining objects | 1348 | 7.22k | for (auto const& og: lc_other) { | 1349 | 7.22k | m->part9.push_back(getObject(og)); | 1350 | 7.22k | } | 1351 | | | 1352 | | // Make sure we got everything exactly once. | 1353 | | | 1354 | 3.62k | size_t num_placed = | 1355 | 3.62k | m->part4.size() + m->part6.size() + m->part7.size() + m->part8.size() + m->part9.size(); | 1356 | 3.62k | size_t num_wanted = m->object_to_obj_users.size(); | 1357 | 3.62k | if (num_placed != num_wanted) { | 1358 | 47 | stopOnError( | 1359 | 47 | "INTERNAL ERROR: QPDF::calculateLinearizationData: wrong " | 1360 | 47 | "number of objects placed (num_placed = " + | 1361 | 47 | std::to_string(num_placed) + "; number of objects: " + std::to_string(num_wanted)); | 1362 | 47 | } | 1363 | | | 1364 | | // Calculate shared object hint table information including references to shared objects from | 1365 | | // page offset hint data. | 1366 | | | 1367 | | // The shared object hint table consists of all part 6 (whether shared or not) in order followed | 1368 | | // by all part 8 objects in order. Add the objects to shared object data keeping a map of | 1369 | | // object number to index. Then populate the shared object information for the pages. | 1370 | | | 1371 | | // Note that two objects never have the same object number, so we can map from object number | 1372 | | // only without regards to generation. | 1373 | 3.62k | std::map<int, int> obj_to_index; | 1374 | | | 1375 | 3.62k | m->c_shared_object_data.nshared_first_page = toI(m->part6.size()); | 1376 | 3.62k | m->c_shared_object_data.nshared_total = | 1377 | 3.62k | m->c_shared_object_data.nshared_first_page + toI(m->part8.size()); | 1378 | | | 1379 | 3.62k | std::vector<CHSharedObjectEntry>& shared = m->c_shared_object_data.entries; | 1380 | 26.6k | for (auto& oh: m->part6) { | 1381 | 26.6k | int obj = oh.getObjectID(); | 1382 | 26.6k | obj_to_index[obj] = toI(shared.size()); | 1383 | 26.6k | shared.emplace_back(obj); | 1384 | 26.6k | } | 1385 | 3.62k | QTC::TC("qpdf", "QPDF lin part 8 empty", m->part8.empty() ? 1 : 0); | 1386 | 3.62k | if (!m->part8.empty()) { | 1387 | 313 | m->c_shared_object_data.first_shared_obj = m->part8.at(0).getObjectID(); | 1388 | 5.34k | for (auto& oh: m->part8) { | 1389 | 5.34k | int obj = oh.getObjectID(); | 1390 | 5.34k | obj_to_index[obj] = toI(shared.size()); | 1391 | 5.34k | shared.emplace_back(obj); | 1392 | 5.34k | } | 1393 | 313 | } | 1394 | 3.62k | if (static_cast<size_t>(m->c_shared_object_data.nshared_total) != | 1395 | 3.62k | m->c_shared_object_data.entries.size()) { | 1396 | 0 | stopOnError("shared object hint table has wrong number of entries"); | 1397 | 0 | } | 1398 | | | 1399 | | // Now compute the list of shared objects for each page after the first page. | 1400 | | | 1401 | 14.0k | for (size_t i = 1; i < toS(npages); ++i) { | 1402 | 10.4k | CHPageOffsetEntry& pe = m->c_page_offset_data.entries.at(i); | 1403 | 10.4k | ObjUser ou(ObjUser::ou_page, toI(i)); | 1404 | 10.4k | if (m->obj_user_to_objects.count(ou) == 0) { | 1405 | 0 | stopOnError("found unreferenced page while" | 1406 | 0 | " calculating linearization data"); | 1407 | 0 | } | 1408 | 87.6k | for (auto const& og: m->obj_user_to_objects[ou]) { | 1409 | 87.6k | if ((m->object_to_obj_users[og].size() > 1) && (obj_to_index.count(og.getObj()) > 0)) { | 1410 | 50.7k | int idx = obj_to_index[og.getObj()]; | 1411 | 50.7k | ++pe.nshared_objects; | 1412 | 50.7k | pe.shared_identifiers.push_back(idx); | 1413 | 50.7k | } | 1414 | 87.6k | } | 1415 | 10.4k | } | 1416 | 3.62k | } |
|
1417 | | |
1418 | | template <typename T> |
1419 | | void |
1420 | | QPDF::pushOutlinesToPart( |
1421 | | std::vector<QPDFObjectHandle>& part, |
1422 | | std::set<QPDFObjGen>& lc_outlines, |
1423 | | T const& object_stream_data) |
1424 | 3.26k | { |
1425 | 3.26k | QPDFObjectHandle root = getRoot(); |
1426 | 3.26k | QPDFObjectHandle outlines = root.getKey("/Outlines"); |
1427 | 3.26k | if (outlines.isNull()) { |
1428 | 2.72k | return; |
1429 | 2.72k | } |
1430 | 536 | outlines = getUncompressedObject(outlines, object_stream_data); |
1431 | 536 | QPDFObjGen outlines_og(outlines.getObjGen()); |
1432 | 536 | QTC::TC( |
1433 | 536 | "qpdf", |
1434 | 536 | "QPDF lin outlines in part", |
1435 | 536 | ((&part == (&m->part6)) ? 0 |
1436 | 536 | : (&part == (&m->part9)) ? 1 |
1437 | 166 | : 9999)); // can't happen |
1438 | 536 | m->c_outline_data.first_object = outlines_og.getObj(); |
1439 | 536 | m->c_outline_data.nobjects = 1; |
1440 | 536 | lc_outlines.erase(outlines_og); |
1441 | 536 | part.push_back(outlines); |
1442 | 3.86k | for (auto const& og: lc_outlines) { |
1443 | 3.86k | part.push_back(getObject(og)); |
1444 | 3.86k | ++m->c_outline_data.nobjects; |
1445 | 3.86k | } |
1446 | 536 | } Unexecuted instantiation: void QPDF::pushOutlinesToPart<std::__1::map<int, int, std::__1::less<int>, std::__1::allocator<std::__1::pair<int const, int> > > >(std::__1::vector<QPDFObjectHandle, std::__1::allocator<QPDFObjectHandle> >&, std::__1::set<QPDFObjGen, std::__1::less<QPDFObjGen>, std::__1::allocator<QPDFObjGen> >&, std::__1::map<int, int, std::__1::less<int>, std::__1::allocator<std::__1::pair<int const, int> > > const&) void QPDF::pushOutlinesToPart<QPDFWriter::ObjTable>(std::__1::vector<QPDFObjectHandle, std::__1::allocator<QPDFObjectHandle> >&, std::__1::set<QPDFObjGen, std::__1::less<QPDFObjGen>, std::__1::allocator<QPDFObjGen> >&, QPDFWriter::ObjTable const&) Line | Count | Source | 1424 | 3.26k | { | 1425 | 3.26k | QPDFObjectHandle root = getRoot(); | 1426 | 3.26k | QPDFObjectHandle outlines = root.getKey("/Outlines"); | 1427 | 3.26k | if (outlines.isNull()) { | 1428 | 2.72k | return; | 1429 | 2.72k | } | 1430 | 536 | outlines = getUncompressedObject(outlines, object_stream_data); | 1431 | 536 | QPDFObjGen outlines_og(outlines.getObjGen()); | 1432 | 536 | QTC::TC( | 1433 | 536 | "qpdf", | 1434 | 536 | "QPDF lin outlines in part", | 1435 | 536 | ((&part == (&m->part6)) ? 0 | 1436 | 536 | : (&part == (&m->part9)) ? 1 | 1437 | 166 | : 9999)); // can't happen | 1438 | 536 | m->c_outline_data.first_object = outlines_og.getObj(); | 1439 | 536 | m->c_outline_data.nobjects = 1; | 1440 | 536 | lc_outlines.erase(outlines_og); | 1441 | 536 | part.push_back(outlines); | 1442 | 3.86k | for (auto const& og: lc_outlines) { | 1443 | 3.86k | part.push_back(getObject(og)); | 1444 | 3.86k | ++m->c_outline_data.nobjects; | 1445 | 3.86k | } | 1446 | 536 | } |
|
1447 | | |
1448 | | void |
1449 | | QPDF::getLinearizedParts( |
1450 | | QPDFWriter::ObjTable const& obj, |
1451 | | std::vector<QPDFObjectHandle>& part4, |
1452 | | std::vector<QPDFObjectHandle>& part6, |
1453 | | std::vector<QPDFObjectHandle>& part7, |
1454 | | std::vector<QPDFObjectHandle>& part8, |
1455 | | std::vector<QPDFObjectHandle>& part9) |
1456 | 3.62k | { |
1457 | 3.62k | calculateLinearizationData(obj); |
1458 | 3.62k | part4 = m->part4; |
1459 | 3.62k | part6 = m->part6; |
1460 | 3.62k | part7 = m->part7; |
1461 | 3.62k | part8 = m->part8; |
1462 | 3.62k | part9 = m->part9; |
1463 | 3.62k | } |
1464 | | |
1465 | | static inline int |
1466 | | nbits(int val) |
1467 | 62.8k | { |
1468 | 62.8k | return (val == 0 ? 0 : (1 + nbits(val >> 1))); |
1469 | 62.8k | } |
1470 | | |
1471 | | int |
1472 | | QPDF::outputLengthNextN( |
1473 | | int in_object, int n, QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) |
1474 | 49.8k | { |
1475 | | // Figure out the length of a series of n consecutive objects in the output file starting with |
1476 | | // whatever object in_object from the input file mapped to. |
1477 | | |
1478 | 49.8k | int first = obj[in_object].renumber; |
1479 | 49.8k | int last = first + n; |
1480 | 49.8k | if (first <= 0) { |
1481 | 0 | stopOnError("found object that is not renumbered while writing linearization data"); |
1482 | 0 | } |
1483 | 49.8k | qpdf_offset_t length = 0; |
1484 | 159k | for (int i = first; i < last; ++i) { |
1485 | 110k | auto l = new_obj[i].length; |
1486 | 110k | if (l == 0) { |
1487 | 0 | stopOnError("found item with unknown length while writing linearization data"); |
1488 | 0 | } |
1489 | 110k | length += l; |
1490 | 110k | } |
1491 | 49.8k | return toI(length); |
1492 | 49.8k | } |
1493 | | |
1494 | | void |
1495 | | QPDF::calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) |
1496 | 2.92k | { |
1497 | | // Page Offset Hint Table |
1498 | | |
1499 | | // We are purposely leaving some values set to their initial zero values. |
1500 | | |
1501 | 2.92k | std::vector<QPDFObjectHandle> const& pages = getAllPages(); |
1502 | 2.92k | size_t npages = pages.size(); |
1503 | 2.92k | CHPageOffset& cph = m->c_page_offset_data; |
1504 | 2.92k | std::vector<CHPageOffsetEntry>& cphe = cph.entries; |
1505 | | |
1506 | | // Calculate minimum and maximum values for number of objects per page and page length. |
1507 | | |
1508 | 2.92k | int min_nobjects = cphe.at(0).nobjects; |
1509 | 2.92k | int max_nobjects = min_nobjects; |
1510 | 2.92k | int min_length = outputLengthNextN(pages.at(0).getObjectID(), min_nobjects, new_obj, obj); |
1511 | 2.92k | int max_length = min_length; |
1512 | 2.92k | int max_shared = cphe.at(0).nshared_objects; |
1513 | | |
1514 | 2.92k | HPageOffset& ph = m->page_offset_hints; |
1515 | 2.92k | std::vector<HPageOffsetEntry>& phe = ph.entries; |
1516 | | // npages is the size of the existing pages array. |
1517 | 2.92k | phe = std::vector<HPageOffsetEntry>(npages); |
1518 | | |
1519 | 15.8k | for (unsigned int i = 0; i < npages; ++i) { |
1520 | | // Calculate values for each page, assigning full values to the delta items. They will be |
1521 | | // adjusted later. |
1522 | | |
1523 | | // Repeat calculations for page 0 so we can assign to phe[i] without duplicating those |
1524 | | // assignments. |
1525 | | |
1526 | 12.9k | int nobjects = cphe.at(i).nobjects; |
1527 | 12.9k | int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, new_obj, obj); |
1528 | 12.9k | int nshared = cphe.at(i).nshared_objects; |
1529 | | |
1530 | 12.9k | min_nobjects = std::min(min_nobjects, nobjects); |
1531 | 12.9k | max_nobjects = std::max(max_nobjects, nobjects); |
1532 | 12.9k | min_length = std::min(min_length, length); |
1533 | 12.9k | max_length = std::max(max_length, length); |
1534 | 12.9k | max_shared = std::max(max_shared, nshared); |
1535 | | |
1536 | 12.9k | phe.at(i).delta_nobjects = nobjects; |
1537 | 12.9k | phe.at(i).delta_page_length = length; |
1538 | 12.9k | phe.at(i).nshared_objects = nshared; |
1539 | 12.9k | } |
1540 | | |
1541 | 2.92k | ph.min_nobjects = min_nobjects; |
1542 | 2.92k | ph.first_page_offset = new_obj[obj[pages.at(0)].renumber].xref.getOffset(); |
1543 | 2.92k | ph.nbits_delta_nobjects = nbits(max_nobjects - min_nobjects); |
1544 | 2.92k | ph.min_page_length = min_length; |
1545 | 2.92k | ph.nbits_delta_page_length = nbits(max_length - min_length); |
1546 | 2.92k | ph.nbits_nshared_objects = nbits(max_shared); |
1547 | 2.92k | ph.nbits_shared_identifier = nbits(m->c_shared_object_data.nshared_total); |
1548 | 2.92k | ph.shared_denominator = 4; // doesn't matter |
1549 | | |
1550 | | // It isn't clear how to compute content offset and content length. Since we are not |
1551 | | // interleaving page objects with the content stream, we'll use the same values for content |
1552 | | // length as page length. We will use 0 as content offset because this is what Adobe does |
1553 | | // (implementation note 127) and pdlin as well. |
1554 | 2.92k | ph.nbits_delta_content_length = ph.nbits_delta_page_length; |
1555 | 2.92k | ph.min_content_length = ph.min_page_length; |
1556 | | |
1557 | 15.8k | for (size_t i = 0; i < npages; ++i) { |
1558 | | // Adjust delta entries |
1559 | 12.9k | if ((phe.at(i).delta_nobjects < min_nobjects) || |
1560 | 12.9k | (phe.at(i).delta_page_length < min_length)) { |
1561 | 0 | stopOnError("found too small delta nobjects or delta page length while writing " |
1562 | 0 | "linearization data"); |
1563 | 0 | } |
1564 | 12.9k | phe.at(i).delta_nobjects -= min_nobjects; |
1565 | 12.9k | phe.at(i).delta_page_length -= min_length; |
1566 | 12.9k | phe.at(i).delta_content_length = phe.at(i).delta_page_length; |
1567 | | |
1568 | 62.9k | for (size_t j = 0; j < toS(cphe.at(i).nshared_objects); ++j) { |
1569 | 50.0k | phe.at(i).shared_identifiers.push_back(cphe.at(i).shared_identifiers.at(j)); |
1570 | 50.0k | phe.at(i).shared_numerators.push_back(0); |
1571 | 50.0k | } |
1572 | 12.9k | } |
1573 | 2.92k | } |
1574 | | |
1575 | | void |
1576 | | QPDF::calculateHSharedObject( |
1577 | | QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) |
1578 | 2.92k | { |
1579 | 2.92k | CHSharedObject& cso = m->c_shared_object_data; |
1580 | 2.92k | std::vector<CHSharedObjectEntry>& csoe = cso.entries; |
1581 | 2.92k | HSharedObject& so = m->shared_object_hints; |
1582 | 2.92k | std::vector<HSharedObjectEntry>& soe = so.entries; |
1583 | 2.92k | soe.clear(); |
1584 | | |
1585 | 2.92k | int min_length = outputLengthNextN(csoe.at(0).object, 1, new_obj, obj); |
1586 | 2.92k | int max_length = min_length; |
1587 | | |
1588 | 33.5k | for (size_t i = 0; i < toS(cso.nshared_total); ++i) { |
1589 | | // Assign absolute numbers to deltas; adjust later |
1590 | 30.6k | int length = outputLengthNextN(csoe.at(i).object, 1, new_obj, obj); |
1591 | 30.6k | min_length = std::min(min_length, length); |
1592 | 30.6k | max_length = std::max(max_length, length); |
1593 | 30.6k | soe.emplace_back(); |
1594 | 30.6k | soe.at(i).delta_group_length = length; |
1595 | 30.6k | } |
1596 | 2.92k | if (soe.size() != toS(cso.nshared_total)) { |
1597 | 0 | stopOnError("soe has wrong size after initialization"); |
1598 | 0 | } |
1599 | | |
1600 | 2.92k | so.nshared_total = cso.nshared_total; |
1601 | 2.92k | so.nshared_first_page = cso.nshared_first_page; |
1602 | 2.92k | if (so.nshared_total > so.nshared_first_page) { |
1603 | 292 | so.first_shared_obj = obj[cso.first_shared_obj].renumber; |
1604 | 292 | so.min_group_length = min_length; |
1605 | 292 | so.first_shared_offset = new_obj[so.first_shared_obj].xref.getOffset(); |
1606 | 292 | } |
1607 | 2.92k | so.min_group_length = min_length; |
1608 | 2.92k | so.nbits_delta_group_length = nbits(max_length - min_length); |
1609 | | |
1610 | 33.5k | for (size_t i = 0; i < toS(cso.nshared_total); ++i) { |
1611 | | // Adjust deltas |
1612 | 30.6k | if (soe.at(i).delta_group_length < min_length) { |
1613 | 0 | stopOnError("found too small group length while writing linearization data"); |
1614 | 0 | } |
1615 | 30.6k | soe.at(i).delta_group_length -= min_length; |
1616 | 30.6k | } |
1617 | 2.92k | } |
1618 | | |
1619 | | void |
1620 | | QPDF::calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) |
1621 | 2.92k | { |
1622 | 2.92k | HGeneric& cho = m->c_outline_data; |
1623 | | |
1624 | 2.92k | if (cho.nobjects == 0) { |
1625 | 2.45k | return; |
1626 | 2.45k | } |
1627 | | |
1628 | 471 | HGeneric& ho = m->outline_hints; |
1629 | | |
1630 | 471 | ho.first_object = obj[cho.first_object].renumber; |
1631 | 471 | ho.first_object_offset = new_obj[ho.first_object].xref.getOffset(); |
1632 | 471 | ho.nobjects = cho.nobjects; |
1633 | 471 | ho.group_length = outputLengthNextN(cho.first_object, ho.nobjects, new_obj, obj); |
1634 | 471 | } |
1635 | | |
1636 | | template <class T, class int_type> |
1637 | | static void |
1638 | | write_vector_int(BitWriter& w, int nitems, std::vector<T>& vec, int bits, int_type T::*field) |
1639 | 23.3k | { |
1640 | | // nitems times, write bits bits from the given field of the ith vector to the given bit writer. |
1641 | | |
1642 | 179k | for (size_t i = 0; i < QIntC::to_size(nitems); ++i) { |
1643 | 156k | w.writeBits(QIntC::to_ulonglong(vec.at(i).*field), QIntC::to_size(bits)); |
1644 | 156k | } |
1645 | | // The PDF spec says that each hint table starts at a byte boundary. Each "row" actually must |
1646 | | // start on a byte boundary. |
1647 | 23.3k | w.flush(); |
1648 | 23.3k | } QPDF_linearization.cc:void write_vector_int<QPDF::HPageOffsetEntry, int>(BitWriter&, int, std::__1::vector<QPDF::HPageOffsetEntry, std::__1::allocator<QPDF::HPageOffsetEntry> >&, int, int QPDF::HPageOffsetEntry::*) Line | Count | Source | 1639 | 5.84k | { | 1640 | | // nitems times, write bits bits from the given field of the ith vector to the given bit writer. | 1641 | | | 1642 | 31.6k | for (size_t i = 0; i < QIntC::to_size(nitems); ++i) { | 1643 | 25.8k | w.writeBits(QIntC::to_ulonglong(vec.at(i).*field), QIntC::to_size(bits)); | 1644 | 25.8k | } | 1645 | | // The PDF spec says that each hint table starts at a byte boundary. Each "row" actually must | 1646 | | // start on a byte boundary. | 1647 | 5.84k | w.flush(); | 1648 | 5.84k | } |
QPDF_linearization.cc:void write_vector_int<QPDF::HPageOffsetEntry, long long>(BitWriter&, int, std::__1::vector<QPDF::HPageOffsetEntry, std::__1::allocator<QPDF::HPageOffsetEntry> >&, int, long long QPDF::HPageOffsetEntry::*) Line | Count | Source | 1639 | 8.76k | { | 1640 | | // nitems times, write bits bits from the given field of the ith vector to the given bit writer. | 1641 | | | 1642 | 47.4k | for (size_t i = 0; i < QIntC::to_size(nitems); ++i) { | 1643 | 38.7k | w.writeBits(QIntC::to_ulonglong(vec.at(i).*field), QIntC::to_size(bits)); | 1644 | 38.7k | } | 1645 | | // The PDF spec says that each hint table starts at a byte boundary. Each "row" actually must | 1646 | | // start on a byte boundary. | 1647 | 8.76k | w.flush(); | 1648 | 8.76k | } |
QPDF_linearization.cc:void write_vector_int<QPDF::HSharedObjectEntry, int>(BitWriter&, int, std::__1::vector<QPDF::HSharedObjectEntry, std::__1::allocator<QPDF::HSharedObjectEntry> >&, int, int QPDF::HSharedObjectEntry::*) Line | Count | Source | 1639 | 8.76k | { | 1640 | | // nitems times, write bits bits from the given field of the ith vector to the given bit writer. | 1641 | | | 1642 | 100k | for (size_t i = 0; i < QIntC::to_size(nitems); ++i) { | 1643 | 92.0k | w.writeBits(QIntC::to_ulonglong(vec.at(i).*field), QIntC::to_size(bits)); | 1644 | 92.0k | } | 1645 | | // The PDF spec says that each hint table starts at a byte boundary. Each "row" actually must | 1646 | | // start on a byte boundary. | 1647 | 8.76k | w.flush(); | 1648 | 8.76k | } |
|
1649 | | |
1650 | | template <class T> |
1651 | | static void |
1652 | | write_vector_vector( |
1653 | | BitWriter& w, |
1654 | | int nitems1, |
1655 | | std::vector<T>& vec1, |
1656 | | int T::*nitems2, |
1657 | | int bits, |
1658 | | std::vector<int> T::*vec2) |
1659 | 5.84k | { |
1660 | | // nitems1 times, write nitems2 (from the ith element of vec1) items from the vec2 vector field |
1661 | | // of the ith item of vec1. |
1662 | 31.6k | for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) { |
1663 | 125k | for (size_t i2 = 0; i2 < QIntC::to_size(vec1.at(i1).*nitems2); ++i2) { |
1664 | 100k | w.writeBits(QIntC::to_ulonglong((vec1.at(i1).*vec2).at(i2)), QIntC::to_size(bits)); |
1665 | 100k | } |
1666 | 25.8k | } |
1667 | 5.84k | w.flush(); |
1668 | 5.84k | } |
1669 | | |
1670 | | void |
1671 | | QPDF::writeHPageOffset(BitWriter& w) |
1672 | 2.92k | { |
1673 | 2.92k | HPageOffset& t = m->page_offset_hints; |
1674 | | |
1675 | 2.92k | w.writeBitsInt(t.min_nobjects, 32); // 1 |
1676 | 2.92k | w.writeBits(toULL(t.first_page_offset), 32); // 2 |
1677 | 2.92k | w.writeBitsInt(t.nbits_delta_nobjects, 16); // 3 |
1678 | 2.92k | w.writeBitsInt(t.min_page_length, 32); // 4 |
1679 | 2.92k | w.writeBitsInt(t.nbits_delta_page_length, 16); // 5 |
1680 | 2.92k | w.writeBits(toULL(t.min_content_offset), 32); // 6 |
1681 | 2.92k | w.writeBitsInt(t.nbits_delta_content_offset, 16); // 7 |
1682 | 2.92k | w.writeBitsInt(t.min_content_length, 32); // 8 |
1683 | 2.92k | w.writeBitsInt(t.nbits_delta_content_length, 16); // 9 |
1684 | 2.92k | w.writeBitsInt(t.nbits_nshared_objects, 16); // 10 |
1685 | 2.92k | w.writeBitsInt(t.nbits_shared_identifier, 16); // 11 |
1686 | 2.92k | w.writeBitsInt(t.nbits_shared_numerator, 16); // 12 |
1687 | 2.92k | w.writeBitsInt(t.shared_denominator, 16); // 13 |
1688 | | |
1689 | 2.92k | int nitems = toI(getAllPages().size()); |
1690 | 2.92k | std::vector<HPageOffsetEntry>& entries = t.entries; |
1691 | | |
1692 | 2.92k | write_vector_int(w, nitems, entries, t.nbits_delta_nobjects, &HPageOffsetEntry::delta_nobjects); |
1693 | 2.92k | write_vector_int( |
1694 | 2.92k | w, nitems, entries, t.nbits_delta_page_length, &HPageOffsetEntry::delta_page_length); |
1695 | 2.92k | write_vector_int( |
1696 | 2.92k | w, nitems, entries, t.nbits_nshared_objects, &HPageOffsetEntry::nshared_objects); |
1697 | 2.92k | write_vector_vector( |
1698 | 2.92k | w, |
1699 | 2.92k | nitems, |
1700 | 2.92k | entries, |
1701 | 2.92k | &HPageOffsetEntry::nshared_objects, |
1702 | 2.92k | t.nbits_shared_identifier, |
1703 | 2.92k | &HPageOffsetEntry::shared_identifiers); |
1704 | 2.92k | write_vector_vector( |
1705 | 2.92k | w, |
1706 | 2.92k | nitems, |
1707 | 2.92k | entries, |
1708 | 2.92k | &HPageOffsetEntry::nshared_objects, |
1709 | 2.92k | t.nbits_shared_numerator, |
1710 | 2.92k | &HPageOffsetEntry::shared_numerators); |
1711 | 2.92k | write_vector_int( |
1712 | 2.92k | w, nitems, entries, t.nbits_delta_content_offset, &HPageOffsetEntry::delta_content_offset); |
1713 | 2.92k | write_vector_int( |
1714 | 2.92k | w, nitems, entries, t.nbits_delta_content_length, &HPageOffsetEntry::delta_content_length); |
1715 | 2.92k | } |
1716 | | |
1717 | | void |
1718 | | QPDF::writeHSharedObject(BitWriter& w) |
1719 | 2.92k | { |
1720 | 2.92k | HSharedObject& t = m->shared_object_hints; |
1721 | | |
1722 | 2.92k | w.writeBitsInt(t.first_shared_obj, 32); // 1 |
1723 | 2.92k | w.writeBits(toULL(t.first_shared_offset), 32); // 2 |
1724 | 2.92k | w.writeBitsInt(t.nshared_first_page, 32); // 3 |
1725 | 2.92k | w.writeBitsInt(t.nshared_total, 32); // 4 |
1726 | 2.92k | w.writeBitsInt(t.nbits_nobjects, 16); // 5 |
1727 | 2.92k | w.writeBitsInt(t.min_group_length, 32); // 6 |
1728 | 2.92k | w.writeBitsInt(t.nbits_delta_group_length, 16); // 7 |
1729 | | |
1730 | 2.92k | QTC::TC( |
1731 | 2.92k | "qpdf", |
1732 | 2.92k | "QPDF lin write nshared_total > nshared_first_page", |
1733 | 2.92k | (t.nshared_total > t.nshared_first_page) ? 1 : 0); |
1734 | | |
1735 | 2.92k | int nitems = t.nshared_total; |
1736 | 2.92k | std::vector<HSharedObjectEntry>& entries = t.entries; |
1737 | | |
1738 | 2.92k | write_vector_int( |
1739 | 2.92k | w, nitems, entries, t.nbits_delta_group_length, &HSharedObjectEntry::delta_group_length); |
1740 | 2.92k | write_vector_int(w, nitems, entries, 1, &HSharedObjectEntry::signature_present); |
1741 | 33.5k | for (size_t i = 0; i < toS(nitems); ++i) { |
1742 | | // If signature were present, we'd have to write a 128-bit hash. |
1743 | 30.6k | if (entries.at(i).signature_present != 0) { |
1744 | 0 | stopOnError("found unexpected signature present" |
1745 | 0 | " while writing linearization data"); |
1746 | 0 | } |
1747 | 30.6k | } |
1748 | 2.92k | write_vector_int(w, nitems, entries, t.nbits_nobjects, &HSharedObjectEntry::nobjects_minus_one); |
1749 | 2.92k | } |
1750 | | |
1751 | | void |
1752 | | QPDF::writeHGeneric(BitWriter& w, HGeneric& t) |
1753 | 471 | { |
1754 | 471 | w.writeBitsInt(t.first_object, 32); // 1 |
1755 | 471 | w.writeBits(toULL(t.first_object_offset), 32); // 2 |
1756 | 471 | w.writeBitsInt(t.nobjects, 32); // 3 |
1757 | 471 | w.writeBitsInt(t.group_length, 32); // 4 |
1758 | 471 | } |
1759 | | |
1760 | | void |
1761 | | QPDF::generateHintStream( |
1762 | | QPDFWriter::NewObjTable const& new_obj, |
1763 | | QPDFWriter::ObjTable const& obj, |
1764 | | std::shared_ptr<Buffer>& hint_buffer, |
1765 | | int& S, |
1766 | | int& O, |
1767 | | bool compressed) |
1768 | 2.92k | { |
1769 | | // Populate actual hint table values |
1770 | 2.92k | calculateHPageOffset(new_obj, obj); |
1771 | 2.92k | calculateHSharedObject(new_obj, obj); |
1772 | 2.92k | calculateHOutline(new_obj, obj); |
1773 | | |
1774 | | // Write the hint stream itself into a compressed memory buffer. Write through a counter so we |
1775 | | // can get offsets. |
1776 | 2.92k | Pl_Buffer hint_stream("hint stream"); |
1777 | 2.92k | Pipeline* next = &hint_stream; |
1778 | 2.92k | std::shared_ptr<Pipeline> flate; |
1779 | 2.92k | if (compressed) { |
1780 | 2.92k | flate = |
1781 | 2.92k | std::make_shared<Pl_Flate>("compress hint stream", &hint_stream, Pl_Flate::a_deflate); |
1782 | 2.92k | next = flate.get(); |
1783 | 2.92k | } |
1784 | 2.92k | Pl_Count c("count", next); |
1785 | 2.92k | BitWriter w(&c); |
1786 | | |
1787 | 2.92k | writeHPageOffset(w); |
1788 | 2.92k | S = toI(c.getCount()); |
1789 | 2.92k | writeHSharedObject(w); |
1790 | 2.92k | O = 0; |
1791 | 2.92k | if (m->outline_hints.nobjects > 0) { |
1792 | 471 | O = toI(c.getCount()); |
1793 | 471 | writeHGeneric(w, m->outline_hints); |
1794 | 471 | } |
1795 | 2.92k | c.finish(); |
1796 | | |
1797 | 2.92k | hint_buffer = hint_stream.getBufferSharedPointer(); |
1798 | 2.92k | } |