/src/qpdf/libqpdf/QPDFParser.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/QPDFParser.hh> |
2 | | |
3 | | #include <qpdf/BufferInputSource.hh> |
4 | | #include <qpdf/QPDF.hh> |
5 | | #include <qpdf/QPDFObjGen.hh> |
6 | | #include <qpdf/QPDFObjectHandle.hh> |
7 | | #include <qpdf/QPDFObject_private.hh> |
8 | | #include <qpdf/QPDFTokenizer_private.hh> |
9 | | #include <qpdf/QTC.hh> |
10 | | #include <qpdf/QUtil.hh> |
11 | | |
12 | | #include <memory> |
13 | | |
14 | | using namespace std::literals; |
15 | | using namespace qpdf; |
16 | | |
17 | | using ObjectPtr = std::shared_ptr<QPDFObject>; |
18 | | |
19 | | QPDFObjectHandle |
20 | | QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context) |
21 | 36.2k | { |
22 | 36.2k | qpdf::Tokenizer tokenizer; |
23 | 36.2k | bool empty = false; |
24 | 36.2k | return QPDFParser( |
25 | 36.2k | input, |
26 | 36.2k | make_description(input.getName(), object_description), |
27 | 36.2k | object_description, |
28 | 36.2k | tokenizer, |
29 | 36.2k | nullptr, |
30 | 36.2k | context, |
31 | 36.2k | false) |
32 | 36.2k | .parse(empty, false); |
33 | 36.2k | } |
34 | | |
35 | | QPDFObjectHandle |
36 | | QPDFParser::parse_content( |
37 | | InputSource& input, |
38 | | std::shared_ptr<QPDFObject::Description> sp_description, |
39 | | qpdf::Tokenizer& tokenizer, |
40 | | QPDF* context) |
41 | 2.91M | { |
42 | 2.91M | bool empty = false; |
43 | 2.91M | return QPDFParser( |
44 | 2.91M | input, std::move(sp_description), "content", tokenizer, nullptr, context, true) |
45 | 2.91M | .parse(empty, true); |
46 | 2.91M | } |
47 | | |
48 | | QPDFObjectHandle |
49 | | QPDFParser::parse( |
50 | | InputSource& input, |
51 | | std::string const& object_description, |
52 | | QPDFTokenizer& tokenizer, |
53 | | bool& empty, |
54 | | QPDFObjectHandle::StringDecrypter* decrypter, |
55 | | QPDF* context) |
56 | 0 | { |
57 | 0 | return QPDFParser( |
58 | 0 | input, |
59 | 0 | make_description(input.getName(), object_description), |
60 | 0 | object_description, |
61 | 0 | *tokenizer.m, |
62 | 0 | decrypter, |
63 | 0 | context, |
64 | 0 | false) |
65 | 0 | .parse(empty, false); |
66 | 0 | } |
67 | | |
68 | | std::pair<QPDFObjectHandle, bool> |
69 | | QPDFParser::parse( |
70 | | InputSource& input, |
71 | | std::string const& object_description, |
72 | | qpdf::Tokenizer& tokenizer, |
73 | | QPDFObjectHandle::StringDecrypter* decrypter, |
74 | | QPDF& context, |
75 | | bool sanity_checks) |
76 | 156k | { |
77 | 156k | bool empty{false}; |
78 | 156k | auto result = QPDFParser( |
79 | 156k | input, |
80 | 156k | make_description(input.getName(), object_description), |
81 | 156k | object_description, |
82 | 156k | tokenizer, |
83 | 156k | decrypter, |
84 | 156k | &context, |
85 | 156k | true, |
86 | 156k | 0, |
87 | 156k | 0, |
88 | 156k | sanity_checks) |
89 | 156k | .parse(empty, false); |
90 | 156k | return {result, empty}; |
91 | 156k | } |
92 | | |
93 | | std::pair<QPDFObjectHandle, bool> |
94 | | QPDFParser::parse( |
95 | | is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) |
96 | 12.1k | { |
97 | 12.1k | bool empty{false}; |
98 | 12.1k | auto result = QPDFParser( |
99 | 12.1k | input, |
100 | 12.1k | std::make_shared<QPDFObject::Description>( |
101 | 12.1k | QPDFObject::ObjStreamDescr(stream_id, obj_id)), |
102 | 12.1k | "", |
103 | 12.1k | tokenizer, |
104 | 12.1k | nullptr, |
105 | 12.1k | &context, |
106 | 12.1k | true, |
107 | 12.1k | stream_id, |
108 | 12.1k | obj_id) |
109 | 12.1k | .parse(empty, false); |
110 | 12.1k | return {result, empty}; |
111 | 12.1k | } |
112 | | |
113 | | QPDFObjectHandle |
114 | | QPDFParser::parse(bool& empty, bool content_stream) |
115 | 3.12M | { |
116 | | // This method must take care not to resolve any objects. Don't check the type of any object |
117 | | // without first ensuring that it is a direct object. Otherwise, doing so may have the side |
118 | | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
119 | | // logic error to be thrown from QPDF::inParse(). |
120 | | |
121 | 3.12M | QPDF::ParseGuard pg(context); |
122 | 3.12M | empty = false; |
123 | 3.12M | start = input.tell(); |
124 | | |
125 | 3.12M | if (!tokenizer.nextToken(input, object_description)) { |
126 | 11.9k | warn(tokenizer.getErrorMessage()); |
127 | 11.9k | } |
128 | | |
129 | 3.12M | switch (tokenizer.getType()) { |
130 | 3.92k | case QPDFTokenizer::tt_eof: |
131 | 3.92k | if (content_stream) { |
132 | | // In content stream mode, leave object uninitialized to indicate EOF |
133 | 3.37k | return {}; |
134 | 3.37k | } |
135 | 546 | QTC::TC("qpdf", "QPDFParser eof in parse"); |
136 | 546 | warn("unexpected EOF"); |
137 | 546 | return {QPDFObject::create<QPDF_Null>()}; |
138 | | |
139 | 11.8k | case QPDFTokenizer::tt_bad: |
140 | 11.8k | QTC::TC("qpdf", "QPDFParser bad token in parse"); |
141 | 11.8k | return {QPDFObject::create<QPDF_Null>()}; |
142 | | |
143 | 595 | case QPDFTokenizer::tt_brace_open: |
144 | 3.05k | case QPDFTokenizer::tt_brace_close: |
145 | 3.05k | QTC::TC("qpdf", "QPDFParser bad brace"); |
146 | 3.05k | warn("treating unexpected brace token as null"); |
147 | 3.05k | return {QPDFObject::create<QPDF_Null>()}; |
148 | | |
149 | 4.40k | case QPDFTokenizer::tt_array_close: |
150 | 4.40k | QTC::TC("qpdf", "QPDFParser bad array close"); |
151 | 4.40k | warn("treating unexpected array close token as null"); |
152 | 4.40k | return {QPDFObject::create<QPDF_Null>()}; |
153 | | |
154 | 975 | case QPDFTokenizer::tt_dict_close: |
155 | 975 | QTC::TC("qpdf", "QPDFParser bad dictionary close"); |
156 | 975 | warn("unexpected dictionary close token"); |
157 | 975 | return {QPDFObject::create<QPDF_Null>()}; |
158 | | |
159 | 10.9k | case QPDFTokenizer::tt_array_open: |
160 | 146k | case QPDFTokenizer::tt_dict_open: |
161 | 146k | stack.clear(); |
162 | 146k | stack.emplace_back( |
163 | 146k | input, |
164 | 146k | (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key); |
165 | 146k | frame = &stack.back(); |
166 | 146k | return parseRemainder(content_stream); |
167 | | |
168 | 3.31k | case QPDFTokenizer::tt_bool: |
169 | 3.31k | return withDescription<QPDF_Bool>(tokenizer.getValue() == "true"); |
170 | | |
171 | 216 | case QPDFTokenizer::tt_null: |
172 | 216 | return {QPDFObject::create<QPDF_Null>()}; |
173 | | |
174 | 188k | case QPDFTokenizer::tt_integer: |
175 | 188k | return withDescription<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); |
176 | | |
177 | 71.9k | case QPDFTokenizer::tt_real: |
178 | 71.9k | return withDescription<QPDF_Real>(tokenizer.getValue()); |
179 | | |
180 | 79.8k | case QPDFTokenizer::tt_name: |
181 | 79.8k | return withDescription<QPDF_Name>(tokenizer.getValue()); |
182 | | |
183 | 2.57M | case QPDFTokenizer::tt_word: |
184 | 2.57M | { |
185 | 2.57M | auto const& value = tokenizer.getValue(); |
186 | 2.57M | if (content_stream) { |
187 | 2.56M | return withDescription<QPDF_Operator>(value); |
188 | 2.56M | } else if (value == "endobj") { |
189 | | // We just saw endobj without having read anything. Treat this as a null and do |
190 | | // not move the input source's offset. |
191 | 449 | input.seek(input.getLastOffset(), SEEK_SET); |
192 | 449 | empty = true; |
193 | 449 | return {QPDFObject::create<QPDF_Null>()}; |
194 | 7.70k | } else { |
195 | 7.70k | QTC::TC("qpdf", "QPDFParser treat word as string"); |
196 | 7.70k | warn("unknown token while reading object; treating as string"); |
197 | 7.70k | return withDescription<QPDF_String>(value); |
198 | 7.70k | } |
199 | 2.57M | } |
200 | | |
201 | 33.1k | case QPDFTokenizer::tt_string: |
202 | 33.1k | if (decrypter) { |
203 | 339 | std::string s{tokenizer.getValue()}; |
204 | 339 | decrypter->decryptString(s); |
205 | 339 | return withDescription<QPDF_String>(s); |
206 | 32.8k | } else { |
207 | 32.8k | return withDescription<QPDF_String>(tokenizer.getValue()); |
208 | 32.8k | } |
209 | | |
210 | 0 | default: |
211 | 0 | warn("treating unknown token type as null while reading object"); |
212 | 0 | return {QPDFObject::create<QPDF_Null>()}; |
213 | 3.12M | } |
214 | 3.12M | } |
215 | | |
216 | | QPDFObjectHandle |
217 | | QPDFParser::parseRemainder(bool content_stream) |
218 | 146k | { |
219 | | // This method must take care not to resolve any objects. Don't check the type of any object |
220 | | // without first ensuring that it is a direct object. Otherwise, doing so may have the side |
221 | | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
222 | | // logic error to be thrown from QPDF::inParse(). |
223 | | |
224 | 146k | bad_count = 0; |
225 | 146k | bool b_contents = false; |
226 | | |
227 | 11.1M | while (true) { |
228 | 11.1M | if (!tokenizer.nextToken(input, object_description)) { |
229 | 55.2k | warn(tokenizer.getErrorMessage()); |
230 | 55.2k | } |
231 | 11.1M | ++good_count; // optimistically |
232 | | |
233 | 11.1M | if (int_count != 0) { |
234 | | // Special handling of indirect references. Treat integer tokens as part of an indirect |
235 | | // reference until proven otherwise. |
236 | 1.29M | if (tokenizer.getType() == QPDFTokenizer::tt_integer) { |
237 | 651k | if (++int_count > 2) { |
238 | | // Process the oldest buffered integer. |
239 | 158k | addInt(int_count); |
240 | 158k | } |
241 | 651k | last_offset_buffer[int_count % 2] = input.getLastOffset(); |
242 | 651k | int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str()); |
243 | 651k | continue; |
244 | | |
245 | 651k | } else if ( |
246 | 640k | int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word && |
247 | 640k | tokenizer.getValue() == "R") { |
248 | 419k | if (context == nullptr) { |
249 | 0 | QTC::TC("qpdf", "QPDFParser indirect without context"); |
250 | 0 | throw std::logic_error( |
251 | 0 | "QPDFParser::parse called without context on an object " |
252 | 0 | "with indirect references"); |
253 | 0 | } |
254 | 419k | auto id = QIntC::to_int(int_buffer[(int_count - 1) % 2]); |
255 | 419k | auto gen = QIntC::to_int(int_buffer[(int_count) % 2]); |
256 | 419k | if (!(id < 1 || gen < 0 || gen >= 65535)) { |
257 | 414k | add(QPDF::ParseGuard::getObject(context, id, gen, parse_pdf)); |
258 | 414k | } else { |
259 | 4.89k | QTC::TC("qpdf", "QPDFParser invalid objgen"); |
260 | 4.89k | addNull(); |
261 | 4.89k | } |
262 | 419k | int_count = 0; |
263 | 419k | continue; |
264 | | |
265 | 419k | } else if (int_count > 0) { |
266 | | // Process the buffered integers before processing the current token. |
267 | 221k | if (int_count > 1) { |
268 | 74.2k | addInt(int_count - 1); |
269 | 74.2k | } |
270 | 221k | addInt(int_count); |
271 | 221k | int_count = 0; |
272 | 221k | } |
273 | 1.29M | } |
274 | | |
275 | 10.0M | switch (tokenizer.getType()) { |
276 | 8.12k | case QPDFTokenizer::tt_eof: |
277 | 8.12k | warn("parse error while reading object"); |
278 | 8.12k | if (content_stream) { |
279 | | // In content stream mode, leave object uninitialized to indicate EOF |
280 | 350 | return {}; |
281 | 350 | } |
282 | 7.77k | QTC::TC("qpdf", "QPDFParser eof in parseRemainder"); |
283 | 7.77k | warn("unexpected EOF"); |
284 | 7.77k | return {QPDFObject::create<QPDF_Null>()}; |
285 | | |
286 | 52.3k | case QPDFTokenizer::tt_bad: |
287 | 52.3k | QTC::TC("qpdf", "QPDFParser bad token in parseRemainder"); |
288 | 52.3k | if (tooManyBadTokens()) { |
289 | 1.98k | return {QPDFObject::create<QPDF_Null>()}; |
290 | 1.98k | } |
291 | 50.3k | addNull(); |
292 | 50.3k | continue; |
293 | | |
294 | 2.85k | case QPDFTokenizer::tt_brace_open: |
295 | 4.64k | case QPDFTokenizer::tt_brace_close: |
296 | 4.64k | QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder"); |
297 | 4.64k | warn("treating unexpected brace token as null"); |
298 | 4.64k | if (tooManyBadTokens()) { |
299 | 356 | return {QPDFObject::create<QPDF_Null>()}; |
300 | 356 | } |
301 | 4.29k | addNull(); |
302 | 4.29k | continue; |
303 | | |
304 | 117k | case QPDFTokenizer::tt_array_close: |
305 | 117k | if ((bad_count || sanity_checks) && !max_bad_count) { |
306 | | // Trigger warning. |
307 | 115 | (void)tooManyBadTokens(); |
308 | 115 | return {QPDFObject::create<QPDF_Null>()}; |
309 | 115 | } |
310 | 117k | if (frame->state == st_array) { |
311 | 112k | auto object = frame->null_count > 100 |
312 | 112k | ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true) |
313 | 112k | : QPDFObject::create<QPDF_Array>(std::move(frame->olist)); |
314 | 112k | setDescription(object, frame->offset - 1); |
315 | | // The `offset` points to the next of "[". Set the rewind offset to point to the |
316 | | // beginning of "[". This has been explicitly tested with whitespace surrounding the |
317 | | // array start delimiter. getLastOffset points to the array end token and therefore |
318 | | // can't be used here. |
319 | 112k | if (stack.size() <= 1) { |
320 | 5.19k | return object; |
321 | 5.19k | } |
322 | 107k | stack.pop_back(); |
323 | 107k | frame = &stack.back(); |
324 | 107k | add(std::move(object)); |
325 | 107k | } else { |
326 | 5.06k | QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder"); |
327 | 5.06k | if (sanity_checks) { |
328 | | // During sanity checks, assume nesting of containers is corrupt and object is |
329 | | // unusable. |
330 | 2.41k | warn("unexpected array close token; giving up on reading object"); |
331 | 2.41k | return {QPDFObject::create<QPDF_Null>()}; |
332 | 2.41k | } |
333 | 2.65k | warn("treating unexpected array close token as null"); |
334 | 2.65k | if (tooManyBadTokens()) { |
335 | 212 | return {QPDFObject::create<QPDF_Null>()}; |
336 | 212 | } |
337 | 2.44k | addNull(); |
338 | 2.44k | } |
339 | 109k | continue; |
340 | | |
341 | 224k | case QPDFTokenizer::tt_dict_close: |
342 | 224k | if ((bad_count || sanity_checks) && !max_bad_count) { |
343 | | // Trigger warning. |
344 | 129 | (void)tooManyBadTokens(); |
345 | 129 | return {QPDFObject::create<QPDF_Null>()}; |
346 | 129 | } |
347 | 224k | if (frame->state <= st_dictionary_value) { |
348 | | // Attempt to recover more or less gracefully from invalid dictionaries. |
349 | 221k | auto& dict = frame->dict; |
350 | | |
351 | 221k | if (frame->state == st_dictionary_value) { |
352 | 6.10k | QTC::TC("qpdf", "QPDFParser no val for last key"); |
353 | 6.10k | warn( |
354 | 6.10k | frame->offset, |
355 | 6.10k | "dictionary ended prematurely; using null as value for last key"); |
356 | 6.10k | dict[frame->key] = QPDFObject::create<QPDF_Null>(); |
357 | 6.10k | } |
358 | 221k | if (!frame->olist.empty()) { |
359 | 50.0k | if (sanity_checks) { |
360 | 44.7k | warn( |
361 | 44.7k | frame->offset, |
362 | 44.7k | "expected dictionary keys but found non-name objects; ignoring"); |
363 | 44.7k | } else { |
364 | 5.22k | fixMissingKeys(); |
365 | 5.22k | } |
366 | 50.0k | } |
367 | | |
368 | 221k | if (!frame->contents_string.empty() && dict.contains("/Type") && |
369 | 221k | dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") && |
370 | 221k | dict.contains("/Contents") && dict["/Contents"].isString()) { |
371 | 0 | dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string); |
372 | 0 | dict["/Contents"].setParsedOffset(frame->contents_offset); |
373 | 0 | } |
374 | 221k | auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict)); |
375 | 221k | setDescription(object, frame->offset - 2); |
376 | | // The `offset` points to the next of "<<". Set the rewind offset to point to the |
377 | | // beginning of "<<". This has been explicitly tested with whitespace surrounding |
378 | | // the dictionary start delimiter. getLastOffset points to the dictionary end token |
379 | | // and therefore can't be used here. |
380 | 221k | if (stack.size() <= 1) { |
381 | 114k | return object; |
382 | 114k | } |
383 | 106k | stack.pop_back(); |
384 | 106k | frame = &stack.back(); |
385 | 106k | add(std::move(object)); |
386 | 106k | } else { |
387 | 2.82k | QTC::TC("qpdf", "QPDFParser bad dictionary close in parseRemainder"); |
388 | 2.82k | if (sanity_checks) { |
389 | | // During sanity checks, assume nesting of containers is corrupt and object is |
390 | | // unusable. |
391 | 1.76k | warn("unexpected dictionary close token; giving up on reading object"); |
392 | 1.76k | return {QPDFObject::create<QPDF_Null>()}; |
393 | 1.76k | } |
394 | 1.05k | warn("unexpected dictionary close token"); |
395 | 1.05k | if (tooManyBadTokens()) { |
396 | 37 | return {QPDFObject::create<QPDF_Null>()}; |
397 | 37 | } |
398 | 1.01k | addNull(); |
399 | 1.01k | } |
400 | 107k | continue; |
401 | | |
402 | 265k | case QPDFTokenizer::tt_array_open: |
403 | 403k | case QPDFTokenizer::tt_dict_open: |
404 | 403k | if (stack.size() > 499) { |
405 | 261 | QTC::TC("qpdf", "QPDFParser too deep"); |
406 | 261 | warn("ignoring excessively deeply nested data structure"); |
407 | 261 | return {QPDFObject::create<QPDF_Null>()}; |
408 | 403k | } else { |
409 | 403k | b_contents = false; |
410 | 403k | stack.emplace_back( |
411 | 403k | input, |
412 | 403k | (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array |
413 | 403k | : st_dictionary_key); |
414 | 403k | frame = &stack.back(); |
415 | 403k | continue; |
416 | 403k | } |
417 | | |
418 | 15.4k | case QPDFTokenizer::tt_bool: |
419 | 15.4k | addScalar<QPDF_Bool>(tokenizer.getValue() == "true"); |
420 | 15.4k | continue; |
421 | | |
422 | 120k | case QPDFTokenizer::tt_null: |
423 | 120k | addNull(); |
424 | 120k | continue; |
425 | | |
426 | 862k | case QPDFTokenizer::tt_integer: |
427 | 862k | if (!content_stream) { |
428 | | // Buffer token in case it is part of an indirect reference. |
429 | 641k | last_offset_buffer[1] = input.getLastOffset(); |
430 | 641k | int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str()); |
431 | 641k | int_count = 1; |
432 | 641k | } else { |
433 | 221k | addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); |
434 | 221k | } |
435 | 862k | continue; |
436 | | |
437 | 94.5k | case QPDFTokenizer::tt_real: |
438 | 94.5k | addScalar<QPDF_Real>(tokenizer.getValue()); |
439 | 94.5k | continue; |
440 | | |
441 | 7.36M | case QPDFTokenizer::tt_name: |
442 | 7.36M | if (frame->state == st_dictionary_key) { |
443 | 770k | frame->key = tokenizer.getValue(); |
444 | 770k | frame->state = st_dictionary_value; |
445 | 770k | b_contents = decrypter && frame->key == "/Contents"; |
446 | 770k | continue; |
447 | 6.59M | } else { |
448 | 6.59M | addScalar<QPDF_Name>(tokenizer.getValue()); |
449 | 6.59M | } |
450 | 6.59M | continue; |
451 | | |
452 | 6.59M | case QPDFTokenizer::tt_word: |
453 | 648k | if (content_stream) { |
454 | 429k | addScalar<QPDF_Operator>(tokenizer.getValue()); |
455 | 429k | continue; |
456 | 429k | } |
457 | | |
458 | 218k | if (sanity_checks) { |
459 | 193k | if (tokenizer.getValue() == "endobj" || tokenizer.getValue() == "endstream") { |
460 | | // During sanity checks, assume an unexpected endobj or endstream indicates that |
461 | | // we are parsing past the end of the object. |
462 | 2.82k | warn( |
463 | 2.82k | "unexpected 'endobj' or 'endstream' while reading object; giving up on " |
464 | 2.82k | "reading object"); |
465 | 2.82k | return {QPDFObject::create<QPDF_Null>()}; |
466 | 2.82k | } |
467 | | |
468 | 190k | warn("unknown token while reading object; treating as null"); |
469 | 190k | if (tooManyBadTokens()) { |
470 | 4.41k | return {QPDFObject::create<QPDF_Null>()}; |
471 | 4.41k | } |
472 | 185k | addNull(); |
473 | 185k | continue; |
474 | 190k | } |
475 | | |
476 | 25.7k | QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder"); |
477 | 25.7k | warn("unknown token while reading object; treating as string"); |
478 | 25.7k | if (tooManyBadTokens()) { |
479 | 1.33k | return {QPDFObject::create<QPDF_Null>()}; |
480 | 1.33k | } |
481 | 24.4k | addScalar<QPDF_String>(tokenizer.getValue()); |
482 | | |
483 | 24.4k | continue; |
484 | | |
485 | 148k | case QPDFTokenizer::tt_string: |
486 | 148k | { |
487 | 148k | auto const& val = tokenizer.getValue(); |
488 | 148k | if (decrypter) { |
489 | 48.5k | if (b_contents) { |
490 | 172 | frame->contents_string = val; |
491 | 172 | frame->contents_offset = input.getLastOffset(); |
492 | 172 | b_contents = false; |
493 | 172 | } |
494 | 48.5k | std::string s{val}; |
495 | 48.5k | decrypter->decryptString(s); |
496 | 48.5k | addScalar<QPDF_String>(s); |
497 | 100k | } else { |
498 | 100k | addScalar<QPDF_String>(val); |
499 | 100k | } |
500 | 148k | } |
501 | 148k | continue; |
502 | | |
503 | 0 | default: |
504 | 0 | warn("treating unknown token type as null while reading object"); |
505 | 0 | if (tooManyBadTokens()) { |
506 | 0 | return {QPDFObject::create<QPDF_Null>()}; |
507 | 0 | } |
508 | 0 | addNull(); |
509 | 10.0M | } |
510 | 10.0M | } |
511 | 146k | } |
512 | | |
513 | | void |
514 | | QPDFParser::add(std::shared_ptr<QPDFObject>&& obj) |
515 | 7.93M | { |
516 | 7.93M | if (frame->state != st_dictionary_value) { |
517 | | // If state is st_dictionary_key then there is a missing key. Push onto olist for |
518 | | // processing once the tt_dict_close token has been found. |
519 | 7.21M | frame->olist.emplace_back(std::move(obj)); |
520 | 7.21M | } else { |
521 | 717k | if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) { |
522 | 64.1k | warnDuplicateKey(); |
523 | 64.1k | } |
524 | 717k | frame->state = st_dictionary_key; |
525 | 717k | } |
526 | 7.93M | } |
527 | | |
528 | | void |
529 | | QPDFParser::addNull() |
530 | 368k | { |
531 | 368k | const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>(); |
532 | | |
533 | 368k | if (frame->state != st_dictionary_value) { |
534 | | // If state is st_dictionary_key then there is a missing key. Push onto olist for |
535 | | // processing once the tt_dict_close token has been found. |
536 | 335k | frame->olist.emplace_back(null_obj); |
537 | 335k | } else { |
538 | 32.3k | if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) { |
539 | 2.71k | warnDuplicateKey(); |
540 | 2.71k | } |
541 | 32.3k | frame->state = st_dictionary_key; |
542 | 32.3k | } |
543 | 368k | ++frame->null_count; |
544 | 368k | } |
545 | | |
546 | | void |
547 | | QPDFParser::addInt(int count) |
548 | 453k | { |
549 | 453k | auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]); |
550 | 453k | obj->setDescription(context, description, last_offset_buffer[count % 2]); |
551 | 453k | add(std::move(obj)); |
552 | 453k | } |
553 | | |
554 | | template <typename T, typename... Args> |
555 | | void |
556 | | QPDFParser::addScalar(Args&&... args) |
557 | 7.52M | { |
558 | 7.52M | if ((bad_count || sanity_checks) && |
559 | 7.52M | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { |
560 | | // Stop adding scalars. We are going to abort when the close token or a bad token is |
561 | | // encountered. |
562 | 674k | max_bad_count = 0; |
563 | 674k | return; |
564 | 674k | } |
565 | 6.85M | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); |
566 | 6.85M | obj->setDescription(context, description, input.getLastOffset()); |
567 | 6.85M | add(std::move(obj)); |
568 | 6.85M | } void QPDFParser::addScalar<QPDF_Bool, bool>(bool&&) Line | Count | Source | 557 | 15.4k | { | 558 | 15.4k | if ((bad_count || sanity_checks) && | 559 | 15.4k | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 560 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 561 | | // encountered. | 562 | 1.78k | max_bad_count = 0; | 563 | 1.78k | return; | 564 | 1.78k | } | 565 | 13.6k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 566 | 13.6k | obj->setDescription(context, description, input.getLastOffset()); | 567 | 13.6k | add(std::move(obj)); | 568 | 13.6k | } |
void QPDFParser::addScalar<QPDF_Integer, long long>(long long&&) Line | Count | Source | 557 | 221k | { | 558 | 221k | if ((bad_count || sanity_checks) && | 559 | 221k | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 560 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 561 | | // encountered. | 562 | 3.31k | max_bad_count = 0; | 563 | 3.31k | return; | 564 | 3.31k | } | 565 | 217k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 566 | 217k | obj->setDescription(context, description, input.getLastOffset()); | 567 | 217k | add(std::move(obj)); | 568 | 217k | } |
void QPDFParser::addScalar<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 557 | 94.5k | { | 558 | 94.5k | if ((bad_count || sanity_checks) && | 559 | 94.5k | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 560 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 561 | | // encountered. | 562 | 910 | max_bad_count = 0; | 563 | 910 | return; | 564 | 910 | } | 565 | 93.6k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 566 | 93.6k | obj->setDescription(context, description, input.getLastOffset()); | 567 | 93.6k | add(std::move(obj)); | 568 | 93.6k | } |
void QPDFParser::addScalar<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 557 | 6.59M | { | 558 | 6.59M | if ((bad_count || sanity_checks) && | 559 | 6.59M | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 560 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 561 | | // encountered. | 562 | 641k | max_bad_count = 0; | 563 | 641k | return; | 564 | 641k | } | 565 | 5.95M | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 566 | 5.95M | obj->setDescription(context, description, input.getLastOffset()); | 567 | 5.95M | add(std::move(obj)); | 568 | 5.95M | } |
void QPDFParser::addScalar<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 557 | 429k | { | 558 | 429k | if ((bad_count || sanity_checks) && | 559 | 429k | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 560 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 561 | | // encountered. | 562 | 26.4k | max_bad_count = 0; | 563 | 26.4k | return; | 564 | 26.4k | } | 565 | 402k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 566 | 402k | obj->setDescription(context, description, input.getLastOffset()); | 567 | 402k | add(std::move(obj)); | 568 | 402k | } |
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 557 | 124k | { | 558 | 124k | if ((bad_count || sanity_checks) && | 559 | 124k | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 560 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 561 | | // encountered. | 562 | 284 | max_bad_count = 0; | 563 | 284 | return; | 564 | 284 | } | 565 | 124k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 566 | 124k | obj->setDescription(context, description, input.getLastOffset()); | 567 | 124k | add(std::move(obj)); | 568 | 124k | } |
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 557 | 48.4k | { | 558 | 48.4k | if ((bad_count || sanity_checks) && | 559 | 48.4k | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 560 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 561 | | // encountered. | 562 | 241 | max_bad_count = 0; | 563 | 241 | return; | 564 | 241 | } | 565 | 48.2k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 566 | 48.2k | obj->setDescription(context, description, input.getLastOffset()); | 567 | 48.2k | add(std::move(obj)); | 568 | 48.2k | } |
|
569 | | |
570 | | template <typename T, typename... Args> |
571 | | QPDFObjectHandle |
572 | | QPDFParser::withDescription(Args&&... args) |
573 | 2.95M | { |
574 | 2.95M | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); |
575 | 2.95M | obj->setDescription(context, description, start); |
576 | 2.95M | return {obj}; |
577 | 2.95M | } QPDFObjectHandle QPDFParser::withDescription<QPDF_Bool, bool>(bool&&) Line | Count | Source | 573 | 3.31k | { | 574 | 3.31k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 575 | 3.31k | obj->setDescription(context, description, start); | 576 | 3.31k | return {obj}; | 577 | 3.31k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_Integer, long long>(long long&&) Line | Count | Source | 573 | 188k | { | 574 | 188k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 575 | 188k | obj->setDescription(context, description, start); | 576 | 188k | return {obj}; | 577 | 188k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 573 | 71.9k | { | 574 | 71.9k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 575 | 71.9k | obj->setDescription(context, description, start); | 576 | 71.9k | return {obj}; | 577 | 71.9k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 573 | 79.8k | { | 574 | 79.8k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 575 | 79.8k | obj->setDescription(context, description, start); | 576 | 79.8k | return {obj}; | 577 | 79.8k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 573 | 2.56M | { | 574 | 2.56M | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 575 | 2.56M | obj->setDescription(context, description, start); | 576 | 2.56M | return {obj}; | 577 | 2.56M | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 573 | 40.3k | { | 574 | 40.3k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 575 | 40.3k | obj->setDescription(context, description, start); | 576 | 40.3k | return {obj}; | 577 | 40.3k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 573 | 260 | { | 574 | 260 | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 575 | 260 | obj->setDescription(context, description, start); | 576 | 260 | return {obj}; | 577 | 260 | } |
|
578 | | |
579 | | void |
580 | | QPDFParser::setDescription(ObjectPtr& obj, qpdf_offset_t parsed_offset) |
581 | 333k | { |
582 | 333k | if (obj) { |
583 | 333k | obj->setDescription(context, description, parsed_offset); |
584 | 333k | } |
585 | 333k | } |
586 | | |
587 | | void |
588 | | QPDFParser::fixMissingKeys() |
589 | 5.22k | { |
590 | 5.22k | std::set<std::string> names; |
591 | 29.0k | for (auto& obj: frame->olist) { |
592 | 29.0k | if (obj.getObj()->getTypeCode() == ::ot_name) { |
593 | 164 | names.insert(obj.getObj()->getStringValue()); |
594 | 164 | } |
595 | 29.0k | } |
596 | 5.22k | int next_fake_key = 1; |
597 | 24.8k | for (auto const& item: frame->olist) { |
598 | 24.8k | while (true) { |
599 | 24.8k | const std::string key = "/QPDFFake" + std::to_string(next_fake_key++); |
600 | 24.8k | const bool found_fake = !frame->dict.contains(key) && !names.contains(key); |
601 | 24.8k | QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); |
602 | 24.8k | if (found_fake) { |
603 | 24.8k | warn( |
604 | 24.8k | frame->offset, |
605 | 24.8k | "expected dictionary key but found non-name object; inserting key " + key); |
606 | 24.8k | frame->dict[key] = item; |
607 | 24.8k | break; |
608 | 24.8k | } |
609 | 24.8k | } |
610 | 24.8k | } |
611 | 5.22k | } |
612 | | |
613 | | bool |
614 | | QPDFParser::tooManyBadTokens() |
615 | 276k | { |
616 | 276k | if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) { |
617 | 234 | if (bad_count) { |
618 | 175 | warn( |
619 | 175 | "encountered errors while parsing an array or dictionary with more than 5000 " |
620 | 175 | "elements; giving up on reading object"); |
621 | 175 | return true; |
622 | 175 | } |
623 | 59 | warn( |
624 | 59 | "encountered an array or dictionary with more than 5000 elements during xref recovery; " |
625 | 59 | "giving up on reading object"); |
626 | 59 | } |
627 | 275k | if (--max_bad_count > 0 && good_count > 4) { |
628 | 111k | good_count = 0; |
629 | 111k | bad_count = 1; |
630 | 111k | return false; |
631 | 111k | } |
632 | 164k | if (++bad_count > 5 || |
633 | 164k | (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) { |
634 | | // Give up after 5 errors in close proximity or if the number of missing dictionary keys |
635 | | // exceeds the remaining number of allowable total errors. |
636 | 8.26k | warn("too many errors; giving up on reading object"); |
637 | 8.26k | return true; |
638 | 8.26k | } |
639 | 155k | good_count = 0; |
640 | 155k | return false; |
641 | 164k | } |
642 | | |
643 | | void |
644 | | QPDFParser::warn(QPDFExc const& e) const |
645 | 482k | { |
646 | | // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the |
647 | | // object. If parsing for some other reason, such as an explicit creation of an object from a |
648 | | // string, then just throw the exception. |
649 | 482k | if (context) { |
650 | 482k | context->warn(e); |
651 | 482k | } else { |
652 | 0 | throw e; |
653 | 0 | } |
654 | 482k | } |
655 | | |
656 | | void |
657 | | QPDFParser::warnDuplicateKey() |
658 | 66.8k | { |
659 | 66.8k | QTC::TC("qpdf", "QPDFParser duplicate dict key"); |
660 | 66.8k | warn( |
661 | 66.8k | frame->offset, |
662 | 66.8k | "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones"); |
663 | 66.8k | } |
664 | | |
665 | | void |
666 | | QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const |
667 | 482k | { |
668 | 482k | if (stream_id) { |
669 | 8.25k | std::string descr = "object "s + std::to_string(obj_id) + " 0"; |
670 | 8.25k | std::string name = context->getFilename() + " object stream " + std::to_string(stream_id); |
671 | 8.25k | warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg)); |
672 | 474k | } else { |
673 | 474k | warn(QPDFExc(qpdf_e_damaged_pdf, input.getName(), object_description, offset, msg)); |
674 | 474k | } |
675 | 482k | } |
676 | | |
677 | | void |
678 | | QPDFParser::warn(std::string const& msg) const |
679 | 339k | { |
680 | 339k | warn(input.getLastOffset(), msg); |
681 | 339k | } |