/src/qpdf/libqpdf/QPDFParser.cc
Line | Count | Source |
1 | | #include <qpdf/QPDFParser.hh> |
2 | | |
3 | | #include <qpdf/QPDF.hh> |
4 | | #include <qpdf/QPDFObjGen.hh> |
5 | | #include <qpdf/QPDFObjectHandle.hh> |
6 | | #include <qpdf/QPDFObject_private.hh> |
7 | | #include <qpdf/QPDFTokenizer_private.hh> |
8 | | #include <qpdf/QTC.hh> |
9 | | #include <qpdf/QUtil.hh> |
10 | | |
11 | | #include <memory> |
12 | | |
13 | | using namespace std::literals; |
14 | | using namespace qpdf; |
15 | | |
16 | | using ObjectPtr = std::shared_ptr<QPDFObject>; |
17 | | |
18 | | static uint32_t const& max_nesting{global::Limits::objects_max_nesting()}; |
19 | | |
20 | | // The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides |
21 | | // special access to allow the parser to create unresolved objects and dangling references. |
22 | | class QPDF::Doc::ParseGuard |
23 | | { |
24 | | public: |
25 | | ParseGuard(QPDF* qpdf) : |
26 | 4.45M | objects(qpdf ? &qpdf->m->objects : nullptr) |
27 | 4.45M | { |
28 | 4.45M | if (objects) { |
29 | 4.40M | objects->inParse(true); |
30 | 4.40M | } |
31 | 4.45M | } |
32 | | |
33 | | static std::shared_ptr<QPDFObject> |
34 | | getObject(QPDF* qpdf, int id, int gen, bool parse_pdf) |
35 | 4.11M | { |
36 | 4.11M | return qpdf->m->objects.getObjectForParser(id, gen, parse_pdf); |
37 | 4.11M | } |
38 | | |
39 | | ~ParseGuard() |
40 | 4.45M | { |
41 | 4.45M | if (objects) { |
42 | 4.40M | objects->inParse(false); |
43 | 4.40M | } |
44 | 4.45M | } |
45 | | QPDF::Doc::Objects* objects; |
46 | | }; |
47 | | |
48 | | using ParseGuard = QPDF::Doc::ParseGuard; |
49 | | |
50 | | QPDFObjectHandle |
51 | | QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context) |
52 | 51.8k | { |
53 | 51.8k | qpdf::Tokenizer tokenizer; |
54 | 51.8k | if (auto result = QPDFParser( |
55 | 51.8k | input, |
56 | 51.8k | make_description(input.getName(), object_description), |
57 | 51.8k | object_description, |
58 | 51.8k | tokenizer, |
59 | 51.8k | nullptr, |
60 | 51.8k | context, |
61 | 51.8k | false) |
62 | 51.8k | .parse()) { |
63 | 51.2k | return result; |
64 | 51.2k | } |
65 | 621 | return {QPDFObject::create<QPDF_Null>()}; |
66 | 51.8k | } |
67 | | |
68 | | QPDFObjectHandle |
69 | | QPDFParser::parse_content( |
70 | | InputSource& input, |
71 | | std::shared_ptr<QPDFObject::Description> sp_description, |
72 | | qpdf::Tokenizer& tokenizer, |
73 | | QPDF* context) |
74 | 2.48M | { |
75 | 2.48M | static const std::string content("content"); // GCC12 - make constexpr |
76 | 2.48M | auto p = QPDFParser( |
77 | 2.48M | input, |
78 | 2.48M | std::move(sp_description), |
79 | 2.48M | content, |
80 | 2.48M | tokenizer, |
81 | 2.48M | nullptr, |
82 | 2.48M | context, |
83 | 2.48M | true, |
84 | 2.48M | 0, |
85 | 2.48M | 0, |
86 | 2.48M | context && context->doc().reconstructed_xref()); |
87 | 2.48M | auto result = p.parse(true); |
88 | 2.48M | if (result || p.empty_) { |
89 | | // In content stream mode, leave object uninitialized to indicate EOF |
90 | 2.44M | return result; |
91 | 2.44M | } |
92 | 41.6k | return {QPDFObject::create<QPDF_Null>()}; |
93 | 2.48M | } |
94 | | |
95 | | QPDFObjectHandle |
96 | | QPDFParser::parse( |
97 | | InputSource& input, |
98 | | std::string const& object_description, |
99 | | QPDFTokenizer& tokenizer, |
100 | | bool& empty, |
101 | | QPDFObjectHandle::StringDecrypter* decrypter, |
102 | | QPDF* context) |
103 | 0 | { |
104 | | // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the |
105 | | // only user of the 'empty' member. When removing this overload also remove 'empty'. |
106 | 0 | auto p = QPDFParser( |
107 | 0 | input, |
108 | 0 | make_description(input.getName(), object_description), |
109 | 0 | object_description, |
110 | 0 | *tokenizer.m, |
111 | 0 | decrypter, |
112 | 0 | context, |
113 | 0 | false); |
114 | 0 | auto result = p.parse(); |
115 | 0 | empty = p.empty_; |
116 | 0 | if (result) { |
117 | 0 | return result; |
118 | 0 | } |
119 | 0 | return {QPDFObject::create<QPDF_Null>()}; |
120 | 0 | } |
121 | | |
122 | | QPDFObjectHandle |
123 | | QPDFParser::parse( |
124 | | InputSource& input, |
125 | | std::string const& object_description, |
126 | | qpdf::Tokenizer& tokenizer, |
127 | | QPDFObjectHandle::StringDecrypter* decrypter, |
128 | | QPDF& context, |
129 | | bool sanity_checks) |
130 | 1.53M | { |
131 | 1.53M | return QPDFParser( |
132 | 1.53M | input, |
133 | 1.53M | make_description(input.getName(), object_description), |
134 | 1.53M | object_description, |
135 | 1.53M | tokenizer, |
136 | 1.53M | decrypter, |
137 | 1.53M | &context, |
138 | 1.53M | true, |
139 | 1.53M | 0, |
140 | 1.53M | 0, |
141 | 1.53M | sanity_checks) |
142 | 1.53M | .parse(); |
143 | 1.53M | } |
144 | | |
145 | | QPDFObjectHandle |
146 | | QPDFParser::parse( |
147 | | is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) |
148 | 383k | { |
149 | 383k | return QPDFParser( |
150 | 383k | input, |
151 | 383k | std::make_shared<QPDFObject::Description>( |
152 | 383k | QPDFObject::ObjStreamDescr(stream_id, obj_id)), |
153 | 383k | "", |
154 | 383k | tokenizer, |
155 | 383k | nullptr, |
156 | 383k | &context, |
157 | 383k | true, |
158 | 383k | stream_id, |
159 | 383k | obj_id) |
160 | 383k | .parse(); |
161 | 383k | } |
162 | | |
163 | | QPDFObjectHandle |
164 | | QPDFParser::parse(bool content_stream) |
165 | 4.45M | { |
166 | 4.45M | try { |
167 | 4.45M | return parse_first(content_stream); |
168 | 4.45M | } catch (Error&) { |
169 | 73.6k | return {}; |
170 | 73.6k | } catch (QPDFExc& e) { |
171 | 33.0k | throw e; |
172 | 33.0k | } catch (std::logic_error& e) { |
173 | 7 | throw e; |
174 | 9.14k | } catch (std::exception& e) { |
175 | 9.14k | warn("treating object as null because of error during parsing : "s + e.what()); |
176 | 9.14k | return {}; |
177 | 9.14k | } |
178 | 4.45M | } |
179 | | |
180 | | QPDFObjectHandle |
181 | | QPDFParser::parse_first(bool content_stream) |
182 | 4.45M | { |
183 | | // This method must take care not to resolve any objects. Don't check the type of any object |
184 | | // without first ensuring that it is a direct object. Otherwise, doing so may have the side |
185 | | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
186 | | // logic error to be thrown from QPDF::inParse(). |
187 | | |
188 | 4.45M | QPDF::Doc::ParseGuard pg(context); |
189 | 4.45M | start = input.tell(); |
190 | 4.45M | if (!tokenizer.nextToken(input, object_description)) { |
191 | 46.1k | warn(tokenizer.getErrorMessage()); |
192 | 46.1k | } |
193 | | |
194 | 4.45M | switch (tokenizer.getType()) { |
195 | 14.6k | case QPDFTokenizer::tt_eof: |
196 | 14.6k | if (content_stream) { |
197 | | // In content stream mode, leave object uninitialized to indicate EOF |
198 | 7.31k | empty_ = true; |
199 | 7.31k | return {}; |
200 | 7.31k | } |
201 | 7.28k | warn("unexpected EOF"); |
202 | 7.28k | return {}; |
203 | | |
204 | 44.7k | case QPDFTokenizer::tt_bad: |
205 | 44.7k | return {}; |
206 | | |
207 | 3.47k | case QPDFTokenizer::tt_brace_open: |
208 | 7.21k | case QPDFTokenizer::tt_brace_close: |
209 | 7.21k | warn("treating unexpected brace token as null"); |
210 | 7.21k | return {}; |
211 | | |
212 | 13.0k | case QPDFTokenizer::tt_array_close: |
213 | 13.0k | warn("treating unexpected array close token as null"); |
214 | 13.0k | return {}; |
215 | | |
216 | 5.73k | case QPDFTokenizer::tt_dict_close: |
217 | 5.73k | warn("unexpected dictionary close token"); |
218 | 5.73k | return {}; |
219 | | |
220 | 119k | case QPDFTokenizer::tt_array_open: |
221 | 1.67M | case QPDFTokenizer::tt_dict_open: |
222 | 1.67M | stack.clear(); |
223 | 1.67M | stack.emplace_back( |
224 | 1.67M | input, |
225 | 1.67M | (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key); |
226 | 1.67M | frame = &stack.back(); |
227 | 1.67M | return parseRemainder(content_stream); |
228 | | |
229 | 6.38k | case QPDFTokenizer::tt_bool: |
230 | 6.38k | return withDescription<QPDF_Bool>(tokenizer.getValue() == "true"); |
231 | | |
232 | 387k | case QPDFTokenizer::tt_null: |
233 | 387k | return {QPDFObject::create<QPDF_Null>()}; |
234 | | |
235 | 490k | case QPDFTokenizer::tt_integer: |
236 | 490k | return withDescription<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); |
237 | | |
238 | 203k | case QPDFTokenizer::tt_real: |
239 | 203k | return withDescription<QPDF_Real>(tokenizer.getValue()); |
240 | | |
241 | 206k | case QPDFTokenizer::tt_name: |
242 | 206k | return withDescription<QPDF_Name>(tokenizer.getValue()); |
243 | | |
244 | 1.37M | case QPDFTokenizer::tt_word: |
245 | 1.37M | { |
246 | 1.37M | auto const& value = tokenizer.getValue(); |
247 | 1.37M | if (content_stream) { |
248 | 1.28M | return withDescription<QPDF_Operator>(value); |
249 | 1.28M | } else if (value == "endobj") { |
250 | | // We just saw endobj without having read anything. Nothing in the PDF spec appears |
251 | | // to allow empty objects, but they have been encountered in actual PDF files and |
252 | | // Adobe Reader appears to ignore them. Treat this as a null and do not move the |
253 | | // input source's offset. |
254 | 2.86k | empty_ = true; |
255 | 2.86k | input.seek(input.getLastOffset(), SEEK_SET); |
256 | 2.86k | if (!content_stream) { |
257 | 2.86k | warn("empty object treated as null"); |
258 | 2.86k | } |
259 | 2.86k | return {}; |
260 | 89.8k | } else { |
261 | 89.8k | warn("unknown token while reading object; treating as string"); |
262 | 89.8k | return withDescription<QPDF_String>(value); |
263 | 89.8k | } |
264 | 1.37M | } |
265 | | |
266 | 24.3k | case QPDFTokenizer::tt_string: |
267 | 24.3k | if (decrypter) { |
268 | 1.58k | std::string s{tokenizer.getValue()}; |
269 | 1.58k | decrypter->decryptString(s); |
270 | 1.58k | return withDescription<QPDF_String>(s); |
271 | 22.7k | } else { |
272 | 22.7k | return withDescription<QPDF_String>(tokenizer.getValue()); |
273 | 22.7k | } |
274 | | |
275 | 0 | default: |
276 | 0 | warn("treating unknown token type as null while reading object"); |
277 | 0 | return {}; |
278 | 4.45M | } |
279 | 4.45M | } |
280 | | |
281 | | QPDFObjectHandle |
282 | | QPDFParser::parseRemainder(bool content_stream) |
283 | 1.67M | { |
284 | | // This method must take care not to resolve any objects. Don't check the type of any object |
285 | | // without first ensuring that it is a direct object. Otherwise, doing so may have the side |
286 | | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
287 | | // logic error to be thrown from QPDF::inParse(). |
288 | | |
289 | 1.67M | bad_count = 0; |
290 | 1.67M | bool b_contents = false; |
291 | | |
292 | 80.9M | while (true) { |
293 | 80.8M | if (!tokenizer.nextToken(input, object_description)) { |
294 | 428k | warn(tokenizer.getErrorMessage()); |
295 | 428k | } |
296 | 80.8M | ++good_count; // optimistically |
297 | | |
298 | 80.8M | if (int_count != 0) { |
299 | | // Special handling of indirect references. Treat integer tokens as part of an indirect |
300 | | // reference until proven otherwise. |
301 | 24.1M | if (tokenizer.getType() == QPDFTokenizer::tt_integer) { |
302 | 17.1M | if (++int_count > 2) { |
303 | | // Process the oldest buffered integer. |
304 | 12.2M | addInt(int_count); |
305 | 12.2M | } |
306 | 17.1M | last_offset_buffer[int_count % 2] = input.getLastOffset(); |
307 | 17.1M | int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str()); |
308 | 17.1M | continue; |
309 | | |
310 | 17.1M | } else if ( |
311 | 6.90M | int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word && |
312 | 4.37M | tokenizer.getValue() == "R") { |
313 | 4.14M | if (!context) { |
314 | 0 | throw std::logic_error( |
315 | 0 | "QPDFParser::parse called without context on an object with indirect " |
316 | 0 | "references"); |
317 | 0 | } |
318 | 4.14M | auto id = QIntC::to_int(int_buffer[(int_count - 1) % 2]); |
319 | 4.14M | auto gen = QIntC::to_int(int_buffer[(int_count) % 2]); |
320 | 4.14M | if (!(id < 1 || gen < 0 || gen >= 65535)) { |
321 | 4.11M | add(ParseGuard::getObject(context, id, gen, parse_pdf)); |
322 | 4.11M | } else { |
323 | 27.5k | add_bad_null( |
324 | 27.5k | "treating bad indirect reference (" + std::to_string(id) + " " + |
325 | 27.5k | std::to_string(gen) + " R) as null"); |
326 | 27.5k | } |
327 | 4.14M | int_count = 0; |
328 | 4.14M | continue; |
329 | | |
330 | 4.14M | } else if (int_count > 0) { |
331 | | // Process the buffered integers before processing the current token. |
332 | 2.76M | if (int_count > 1) { |
333 | 747k | addInt(int_count - 1); |
334 | 747k | } |
335 | 2.76M | addInt(int_count); |
336 | 2.76M | int_count = 0; |
337 | 2.76M | } |
338 | 24.1M | } |
339 | | |
340 | 59.5M | switch (tokenizer.getType()) { |
341 | 93.6k | case QPDFTokenizer::tt_eof: |
342 | 93.6k | warn("parse error while reading object"); |
343 | 93.6k | if (content_stream) { |
344 | | // In content stream mode, leave object uninitialized to indicate EOF |
345 | 494 | return {}; |
346 | 494 | } |
347 | 93.1k | warn("unexpected EOF"); |
348 | 93.1k | return {}; |
349 | | |
350 | 360k | case QPDFTokenizer::tt_bad: |
351 | 360k | check_too_many_bad_tokens(); |
352 | 360k | addNull(); |
353 | 360k | continue; |
354 | | |
355 | 28.7k | case QPDFTokenizer::tt_brace_open: |
356 | 53.7k | case QPDFTokenizer::tt_brace_close: |
357 | 53.7k | add_bad_null("treating unexpected brace token as null"); |
358 | 53.7k | continue; |
359 | | |
360 | 1.21M | case QPDFTokenizer::tt_array_close: |
361 | 1.21M | if (frame->state == st_array) { |
362 | 1.19M | auto object = frame->null_count > 100 |
363 | 1.19M | ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true) |
364 | 1.19M | : QPDFObject::create<QPDF_Array>(std::move(frame->olist)); |
365 | 1.19M | setDescription(object, frame->offset - 1); |
366 | | // The `offset` points to the next of "[". Set the rewind offset to point to the |
367 | | // beginning of "[". This has been explicitly tested with whitespace surrounding the |
368 | | // array start delimiter. getLastOffset points to the array end token and therefore |
369 | | // can't be used here. |
370 | 1.19M | if (stack.size() <= 1) { |
371 | 35.5k | return object; |
372 | 35.5k | } |
373 | 1.15M | stack.pop_back(); |
374 | 1.15M | frame = &stack.back(); |
375 | 1.15M | add(std::move(object)); |
376 | 1.15M | } else { |
377 | 22.7k | if (sanity_checks) { |
378 | | // During sanity checks, assume nesting of containers is corrupt and object is |
379 | | // unusable. |
380 | 15.8k | warn("unexpected array close token; giving up on reading object"); |
381 | 15.8k | return {}; |
382 | 15.8k | } |
383 | 6.89k | add_bad_null("treating unexpected array close token as null"); |
384 | 6.89k | } |
385 | 1.16M | continue; |
386 | | |
387 | 2.13M | case QPDFTokenizer::tt_dict_close: |
388 | 2.13M | if (frame->state <= st_dictionary_value) { |
389 | | // Attempt to recover more or less gracefully from invalid dictionaries. |
390 | 2.11M | auto& dict = frame->dict; |
391 | | |
392 | 2.11M | if (frame->state == st_dictionary_value) { |
393 | 103k | warn( |
394 | 103k | frame->offset, |
395 | 103k | "dictionary ended prematurely; using null as value for last key"); |
396 | 103k | dict[frame->key] = QPDFObject::create<QPDF_Null>(); |
397 | 103k | } |
398 | 2.11M | if (!frame->olist.empty()) { |
399 | 518k | if (sanity_checks) { |
400 | 485k | warn( |
401 | 485k | frame->offset, |
402 | 485k | "expected dictionary keys but found non-name objects; ignoring"); |
403 | 485k | } else { |
404 | 33.2k | fixMissingKeys(); |
405 | 33.2k | } |
406 | 518k | } |
407 | | |
408 | 2.11M | if (!frame->contents_string.empty() && dict.contains("/Type") && |
409 | 1.00k | dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") && |
410 | 315 | dict.contains("/Contents") && dict["/Contents"].isString()) { |
411 | 308 | dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string); |
412 | 308 | dict["/Contents"].setParsedOffset(frame->contents_offset); |
413 | 308 | } |
414 | 2.11M | auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict)); |
415 | 2.11M | setDescription(object, frame->offset - 2); |
416 | | // The `offset` points to the next of "<<". Set the rewind offset to point to the |
417 | | // beginning of "<<". This has been explicitly tested with whitespace surrounding |
418 | | // the dictionary start delimiter. getLastOffset points to the dictionary end token |
419 | | // and therefore can't be used here. |
420 | 2.11M | if (stack.size() <= 1) { |
421 | 1.38M | return object; |
422 | 1.38M | } |
423 | 726k | stack.pop_back(); |
424 | 726k | frame = &stack.back(); |
425 | 726k | add(std::move(object)); |
426 | 726k | } else { |
427 | 17.7k | if (sanity_checks) { |
428 | | // During sanity checks, assume nesting of containers is corrupt and object is |
429 | | // unusable. |
430 | 12.3k | warn("unexpected dictionary close token; giving up on reading object"); |
431 | 12.3k | return {}; |
432 | 12.3k | } |
433 | 5.39k | add_bad_null("unexpected dictionary close token"); |
434 | 5.39k | } |
435 | 731k | continue; |
436 | | |
437 | 2.07M | case QPDFTokenizer::tt_array_open: |
438 | 3.31M | case QPDFTokenizer::tt_dict_open: |
439 | 3.31M | if (stack.size() > max_nesting) { |
440 | 1.38k | warn("ignoring excessively deeply nested data structure"); |
441 | 1.38k | return {}; |
442 | 3.30M | } else { |
443 | 3.30M | b_contents = false; |
444 | 3.30M | stack.emplace_back( |
445 | 3.30M | input, |
446 | 3.30M | (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array |
447 | 3.30M | : st_dictionary_key); |
448 | 3.30M | frame = &stack.back(); |
449 | 3.30M | continue; |
450 | 3.30M | } |
451 | | |
452 | 120k | case QPDFTokenizer::tt_bool: |
453 | 120k | addScalar<QPDF_Bool>(tokenizer.getValue() == "true"); |
454 | 120k | continue; |
455 | | |
456 | 1.06M | case QPDFTokenizer::tt_null: |
457 | 1.06M | addNull(); |
458 | 1.06M | continue; |
459 | | |
460 | 7.63M | case QPDFTokenizer::tt_integer: |
461 | 7.63M | if (!content_stream) { |
462 | | // Buffer token in case it is part of an indirect reference. |
463 | 6.91M | last_offset_buffer[1] = input.getLastOffset(); |
464 | 6.91M | int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str()); |
465 | 6.91M | int_count = 1; |
466 | 6.91M | } else { |
467 | 722k | addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); |
468 | 722k | } |
469 | 7.63M | continue; |
470 | | |
471 | 877k | case QPDFTokenizer::tt_real: |
472 | 877k | addScalar<QPDF_Real>(tokenizer.getValue()); |
473 | 877k | continue; |
474 | | |
475 | 38.3M | case QPDFTokenizer::tt_name: |
476 | 38.3M | if (frame->state == st_dictionary_key) { |
477 | 8.89M | frame->key = tokenizer.getValue(); |
478 | 8.89M | frame->state = st_dictionary_value; |
479 | 8.89M | b_contents = decrypter && frame->key == "/Contents"; |
480 | 8.89M | continue; |
481 | 29.4M | } else { |
482 | 29.4M | addScalar<QPDF_Name>(tokenizer.getValue()); |
483 | 29.4M | } |
484 | 29.4M | continue; |
485 | | |
486 | 29.4M | case QPDFTokenizer::tt_word: |
487 | 2.48M | if (content_stream) { |
488 | 317k | addScalar<QPDF_Operator>(tokenizer.getValue()); |
489 | 317k | continue; |
490 | 317k | } |
491 | | |
492 | 2.17M | if (sanity_checks) { |
493 | 2.09M | if (tokenizer.getValue() == "endobj" || tokenizer.getValue() == "endstream") { |
494 | | // During sanity checks, assume an unexpected endobj or endstream indicates that |
495 | | // we are parsing past the end of the object. |
496 | 25.2k | warn( |
497 | 25.2k | "unexpected 'endobj' or 'endstream' while reading object; giving up on " |
498 | 25.2k | "reading object"); |
499 | 25.2k | return {}; |
500 | 25.2k | } |
501 | | |
502 | 2.06M | add_bad_null("unknown token while reading object; treating as null"); |
503 | 2.06M | continue; |
504 | 2.09M | } |
505 | | |
506 | 79.3k | warn("unknown token while reading object; treating as string"); |
507 | 79.3k | check_too_many_bad_tokens(); |
508 | 79.3k | addScalar<QPDF_String>(tokenizer.getValue()); |
509 | | |
510 | 79.3k | continue; |
511 | | |
512 | 1.83M | case QPDFTokenizer::tt_string: |
513 | 1.83M | { |
514 | 1.83M | auto const& val = tokenizer.getValue(); |
515 | 1.83M | if (decrypter) { |
516 | 184k | if (b_contents) { |
517 | 10.7k | frame->contents_string = val; |
518 | 10.7k | frame->contents_offset = input.getLastOffset(); |
519 | 10.7k | b_contents = false; |
520 | 10.7k | } |
521 | 184k | std::string s{val}; |
522 | 184k | decrypter->decryptString(s); |
523 | 184k | addScalar<QPDF_String>(s); |
524 | 1.64M | } else { |
525 | 1.64M | addScalar<QPDF_String>(val); |
526 | 1.64M | } |
527 | 1.83M | } |
528 | 1.83M | continue; |
529 | | |
530 | 0 | default: |
531 | 0 | add_bad_null("treating unknown token type as null while reading object"); |
532 | 59.5M | } |
533 | 59.5M | } |
534 | 1.67M | } |
535 | | |
536 | | void |
537 | | QPDFParser::add(std::shared_ptr<QPDFObject>&& obj) |
538 | 55.1M | { |
539 | 55.1M | if (frame->state != st_dictionary_value) { |
540 | | // If state is st_dictionary_key then there is a missing key. Push onto olist for |
541 | | // processing once the tt_dict_close token has been found. |
542 | 46.8M | frame->olist.emplace_back(std::move(obj)); |
543 | 46.8M | } else { |
544 | 8.31M | if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) { |
545 | 479k | warnDuplicateKey(); |
546 | 479k | } |
547 | 8.31M | frame->state = st_dictionary_key; |
548 | 8.31M | } |
549 | 55.1M | } |
550 | | |
551 | | void |
552 | | QPDFParser::addNull() |
553 | 3.49M | { |
554 | 3.49M | const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>(); |
555 | | |
556 | 3.49M | if (frame->state != st_dictionary_value) { |
557 | | // If state is st_dictionary_key then there is a missing key. Push onto olist for |
558 | | // processing once the tt_dict_close token has been found. |
559 | 3.11M | frame->olist.emplace_back(null_obj); |
560 | 3.11M | } else { |
561 | 381k | if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) { |
562 | 39.6k | warnDuplicateKey(); |
563 | 39.6k | } |
564 | 381k | frame->state = st_dictionary_key; |
565 | 381k | } |
566 | 3.49M | ++frame->null_count; |
567 | 3.49M | } |
568 | | |
569 | | void |
570 | | QPDFParser::add_bad_null(std::string const& msg) |
571 | 2.15M | { |
572 | 2.15M | warn(msg); |
573 | 2.15M | check_too_many_bad_tokens(); |
574 | 2.15M | addNull(); |
575 | 2.15M | } |
576 | | |
577 | | void |
578 | | QPDFParser::addInt(int count) |
579 | 15.8M | { |
580 | 15.8M | auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]); |
581 | 15.8M | obj->setDescription(context, description, last_offset_buffer[count % 2]); |
582 | 15.8M | add(std::move(obj)); |
583 | 15.8M | } |
584 | | |
585 | | template <typename T, typename... Args> |
586 | | void |
587 | | QPDFParser::addScalar(Args&&... args) |
588 | 33.3M | { |
589 | 33.3M | auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); |
590 | 33.3M | if (frame->olist.size() > limit || frame->dict.size() > limit) { |
591 | | // Stop adding scalars. We are going to abort when the close token or a bad token is |
592 | | // encountered. |
593 | 4.28k | max_bad_count = 0; |
594 | 4.28k | check_too_many_bad_tokens(); // always throws Error() |
595 | 4.28k | } |
596 | 33.3M | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); |
597 | 33.3M | obj->setDescription(context, description, input.getLastOffset()); |
598 | 33.3M | add(std::move(obj)); |
599 | 33.3M | } void QPDFParser::addScalar<QPDF_Bool, bool>(bool&&) Line | Count | Source | 588 | 120k | { | 589 | 120k | auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); | 590 | 120k | if (frame->olist.size() > limit || frame->dict.size() > limit) { | 591 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 592 | | // encountered. | 593 | 140 | max_bad_count = 0; | 594 | 140 | check_too_many_bad_tokens(); // always throws Error() | 595 | 140 | } | 596 | 120k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 597 | 120k | obj->setDescription(context, description, input.getLastOffset()); | 598 | 120k | add(std::move(obj)); | 599 | 120k | } |
void QPDFParser::addScalar<QPDF_Integer, long long>(long long&&) Line | Count | Source | 588 | 722k | { | 589 | 722k | auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); | 590 | 722k | if (frame->olist.size() > limit || frame->dict.size() > limit) { | 591 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 592 | | // encountered. | 593 | 74 | max_bad_count = 0; | 594 | 74 | check_too_many_bad_tokens(); // always throws Error() | 595 | 74 | } | 596 | 722k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 597 | 722k | obj->setDescription(context, description, input.getLastOffset()); | 598 | 722k | add(std::move(obj)); | 599 | 722k | } |
void QPDFParser::addScalar<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 588 | 877k | { | 589 | 877k | auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); | 590 | 877k | if (frame->olist.size() > limit || frame->dict.size() > limit) { | 591 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 592 | | // encountered. | 593 | 304 | max_bad_count = 0; | 594 | 304 | check_too_many_bad_tokens(); // always throws Error() | 595 | 304 | } | 596 | 877k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 597 | 877k | obj->setDescription(context, description, input.getLastOffset()); | 598 | 877k | add(std::move(obj)); | 599 | 877k | } |
void QPDFParser::addScalar<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 588 | 29.4M | { | 589 | 29.4M | auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); | 590 | 29.4M | if (frame->olist.size() > limit || frame->dict.size() > limit) { | 591 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 592 | | // encountered. | 593 | 3.27k | max_bad_count = 0; | 594 | 3.27k | check_too_many_bad_tokens(); // always throws Error() | 595 | 3.27k | } | 596 | 29.4M | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 597 | 29.4M | obj->setDescription(context, description, input.getLastOffset()); | 598 | 29.4M | add(std::move(obj)); | 599 | 29.4M | } |
void QPDFParser::addScalar<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 588 | 317k | { | 589 | 317k | auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); | 590 | 317k | if (frame->olist.size() > limit || frame->dict.size() > limit) { | 591 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 592 | | // encountered. | 593 | 122 | max_bad_count = 0; | 594 | 122 | check_too_many_bad_tokens(); // always throws Error() | 595 | 122 | } | 596 | 317k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 597 | 317k | obj->setDescription(context, description, input.getLastOffset()); | 598 | 317k | add(std::move(obj)); | 599 | 317k | } |
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 588 | 1.72M | { | 589 | 1.72M | auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); | 590 | 1.72M | if (frame->olist.size() > limit || frame->dict.size() > limit) { | 591 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 592 | | // encountered. | 593 | 332 | max_bad_count = 0; | 594 | 332 | check_too_many_bad_tokens(); // always throws Error() | 595 | 332 | } | 596 | 1.72M | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 597 | 1.72M | obj->setDescription(context, description, input.getLastOffset()); | 598 | 1.72M | add(std::move(obj)); | 599 | 1.72M | } |
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 588 | 184k | { | 589 | 184k | auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); | 590 | 184k | if (frame->olist.size() > limit || frame->dict.size() > limit) { | 591 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 592 | | // encountered. | 593 | 41 | max_bad_count = 0; | 594 | 41 | check_too_many_bad_tokens(); // always throws Error() | 595 | 41 | } | 596 | 184k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 597 | 184k | obj->setDescription(context, description, input.getLastOffset()); | 598 | 184k | add(std::move(obj)); | 599 | 184k | } |
|
600 | | |
601 | | template <typename T, typename... Args> |
602 | | QPDFObjectHandle |
603 | | QPDFParser::withDescription(Args&&... args) |
604 | 2.29M | { |
605 | 2.29M | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); |
606 | 2.29M | obj->setDescription(context, description, start); |
607 | 2.29M | return {obj}; |
608 | 2.29M | } QPDFObjectHandle QPDFParser::withDescription<QPDF_Bool, bool>(bool&&) Line | Count | Source | 604 | 6.38k | { | 605 | 6.38k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 606 | 6.38k | obj->setDescription(context, description, start); | 607 | 6.38k | return {obj}; | 608 | 6.38k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_Integer, long long>(long long&&) Line | Count | Source | 604 | 487k | { | 605 | 487k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 606 | 487k | obj->setDescription(context, description, start); | 607 | 487k | return {obj}; | 608 | 487k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 604 | 203k | { | 605 | 203k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 606 | 203k | obj->setDescription(context, description, start); | 607 | 203k | return {obj}; | 608 | 203k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 604 | 206k | { | 605 | 206k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 606 | 206k | obj->setDescription(context, description, start); | 607 | 206k | return {obj}; | 608 | 206k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 604 | 1.28M | { | 605 | 1.28M | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 606 | 1.28M | obj->setDescription(context, description, start); | 607 | 1.28M | return {obj}; | 608 | 1.28M | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 604 | 107k | { | 605 | 107k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 606 | 107k | obj->setDescription(context, description, start); | 607 | 107k | return {obj}; | 608 | 107k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 604 | 1.49k | { | 605 | 1.49k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 606 | 1.49k | obj->setDescription(context, description, start); | 607 | 1.49k | return {obj}; | 608 | 1.49k | } |
|
609 | | |
610 | | void |
611 | | QPDFParser::setDescription(ObjectPtr& obj, qpdf_offset_t parsed_offset) |
612 | 3.30M | { |
613 | 3.30M | if (obj) { |
614 | 3.30M | obj->setDescription(context, description, parsed_offset); |
615 | 3.30M | } |
616 | 3.30M | } |
617 | | |
618 | | void |
619 | | QPDFParser::fixMissingKeys() |
620 | 33.2k | { |
621 | 33.2k | std::set<std::string> names; |
622 | 123k | for (auto& obj: frame->olist) { |
623 | 123k | if (obj.raw_type_code() == ::ot_name) { |
624 | 3.78k | names.insert(obj.obj_sp()->getStringValue()); |
625 | 3.78k | } |
626 | 123k | } |
627 | 33.2k | int next_fake_key = 1; |
628 | 120k | for (auto const& item: frame->olist) { |
629 | 121k | while (true) { |
630 | 121k | const std::string key = "/QPDFFake" + std::to_string(next_fake_key++); |
631 | 121k | const bool found_fake = !frame->dict.contains(key) && !names.contains(key); |
632 | 121k | QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); |
633 | 121k | if (found_fake) { |
634 | 120k | warn( |
635 | 120k | frame->offset, |
636 | 120k | "expected dictionary key but found non-name object; inserting key " + key); |
637 | 120k | frame->dict[key] = item; |
638 | 120k | break; |
639 | 120k | } |
640 | 121k | } |
641 | 120k | } |
642 | 33.2k | } |
643 | | |
644 | | void |
645 | | QPDFParser::check_too_many_bad_tokens() |
646 | 2.59M | { |
647 | 2.59M | auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); |
648 | 2.59M | if (frame->olist.size() > limit || frame->dict.size() > limit) { |
649 | 4.45k | if (bad_count) { |
650 | 3.15k | warn( |
651 | 3.15k | "encountered errors while parsing an array or dictionary with more than " + |
652 | 3.15k | std::to_string(limit) + " elements; giving up on reading object"); |
653 | 3.15k | throw Error(); |
654 | 3.15k | } |
655 | 1.29k | warn( |
656 | 1.29k | "encountered an array or dictionary with more than " + std::to_string(limit) + |
657 | 1.29k | " elements during xref recovery; giving up on reading object"); |
658 | 1.29k | } |
659 | 2.58M | if (max_bad_count && --max_bad_count > 0 && good_count > 4) { |
660 | 934k | good_count = 0; |
661 | 934k | bad_count = 1; |
662 | 934k | return; |
663 | 934k | } |
664 | 1.65M | if (++bad_count > 5 || |
665 | 1.60M | (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) { |
666 | | // Give up after 5 errors in close proximity or if the number of missing dictionary keys |
667 | | // exceeds the remaining number of allowable total errors. |
668 | 70.7k | warn("too many errors; giving up on reading object"); |
669 | 70.7k | throw Error(); |
670 | 70.7k | } |
671 | 1.58M | good_count = 0; |
672 | 1.58M | } |
673 | | |
674 | | void |
675 | | QPDFParser::warn(QPDFExc const& e) const |
676 | 4.39M | { |
677 | | // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the |
678 | | // object. If parsing for some other reason, such as an explicit creation of an object from a |
679 | | // string, then just throw the exception. |
680 | 4.39M | if (context) { |
681 | 4.39M | context->warn(e); |
682 | 4.39M | } else { |
683 | 621 | throw e; |
684 | 621 | } |
685 | 4.39M | } |
686 | | |
687 | | void |
688 | | QPDFParser::warnDuplicateKey() |
689 | 519k | { |
690 | 519k | warn( |
691 | 519k | frame->offset, |
692 | 519k | "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones"); |
693 | 519k | } |
694 | | |
695 | | void |
696 | | QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const |
697 | 4.39M | { |
698 | 4.39M | if (stream_id) { |
699 | 207k | std::string descr = "object "s + std::to_string(obj_id) + " 0"; |
700 | 207k | std::string name = context->getFilename() + " object stream " + std::to_string(stream_id); |
701 | 207k | warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg)); |
702 | 4.18M | } else { |
703 | 4.18M | warn(QPDFExc(qpdf_e_damaged_pdf, input.getName(), object_description, offset, msg)); |
704 | 4.18M | } |
705 | 4.39M | } |
706 | | |
707 | | void |
708 | | QPDFParser::warn(std::string const& msg) const |
709 | 3.16M | { |
710 | 3.16M | warn(input.getLastOffset(), msg); |
711 | 3.16M | } |