/src/qpdf/libqpdf/QPDFParser.cc
Line | Count | Source |
1 | | #include <qpdf/QPDFParser.hh> |
2 | | |
3 | | #include <qpdf/QPDF.hh> |
4 | | #include <qpdf/QPDFObjGen.hh> |
5 | | #include <qpdf/QPDFObjectHandle.hh> |
6 | | #include <qpdf/QPDFObject_private.hh> |
7 | | #include <qpdf/QPDFTokenizer_private.hh> |
8 | | #include <qpdf/QTC.hh> |
9 | | #include <qpdf/QUtil.hh> |
10 | | |
11 | | #include <memory> |
12 | | |
13 | | using namespace std::literals; |
14 | | using namespace qpdf; |
15 | | |
16 | | using ObjectPtr = std::shared_ptr<QPDFObject>; |
17 | | |
18 | | // The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides |
19 | | // special access to allow the parser to create unresolved objects and dangling references. |
20 | | class QPDF::Doc::ParseGuard |
21 | | { |
22 | | public: |
23 | | ParseGuard(QPDF* qpdf) : |
24 | 235k | objects(qpdf ? &qpdf->m->objects : nullptr) |
25 | 235k | { |
26 | 235k | if (objects) { |
27 | 208k | objects->inParse(true); |
28 | 208k | } |
29 | 235k | } |
30 | | |
31 | | static std::shared_ptr<QPDFObject> |
32 | | getObject(QPDF* qpdf, int id, int gen, bool parse_pdf) |
33 | 495k | { |
34 | 495k | return qpdf->m->objects.getObjectForParser(id, gen, parse_pdf); |
35 | 495k | } |
36 | | |
37 | | ~ParseGuard() |
38 | 235k | { |
39 | 235k | if (objects) { |
40 | 208k | objects->inParse(false); |
41 | 208k | } |
42 | 235k | } |
43 | | QPDF::Doc::Objects* objects; |
44 | | }; |
45 | | |
46 | | using ParseGuard = QPDF::Doc::ParseGuard; |
47 | | |
48 | | QPDFObjectHandle |
49 | | QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context) |
50 | 27.1k | { |
51 | 27.1k | qpdf::Tokenizer tokenizer; |
52 | 27.1k | bool empty = false; |
53 | 27.1k | return QPDFParser( |
54 | 27.1k | input, |
55 | 27.1k | make_description(input.getName(), object_description), |
56 | 27.1k | object_description, |
57 | 27.1k | tokenizer, |
58 | 27.1k | nullptr, |
59 | 27.1k | context, |
60 | 27.1k | false) |
61 | 27.1k | .parse(empty, false); |
62 | 27.1k | } |
63 | | |
64 | | QPDFObjectHandle |
65 | | QPDFParser::parse_content( |
66 | | InputSource& input, |
67 | | std::shared_ptr<QPDFObject::Description> sp_description, |
68 | | qpdf::Tokenizer& tokenizer, |
69 | | QPDF* context) |
70 | 0 | { |
71 | 0 | bool empty = false; |
72 | 0 | return QPDFParser( |
73 | 0 | input, |
74 | 0 | std::move(sp_description), |
75 | 0 | "content", |
76 | 0 | tokenizer, |
77 | 0 | nullptr, |
78 | 0 | context, |
79 | 0 | true, |
80 | 0 | 0, |
81 | 0 | 0, |
82 | 0 | context && context->doc().reconstructed_xref()) |
83 | 0 | .parse(empty, true); |
84 | 0 | } |
85 | | |
86 | | QPDFObjectHandle |
87 | | QPDFParser::parse( |
88 | | InputSource& input, |
89 | | std::string const& object_description, |
90 | | QPDFTokenizer& tokenizer, |
91 | | bool& empty, |
92 | | QPDFObjectHandle::StringDecrypter* decrypter, |
93 | | QPDF* context) |
94 | 0 | { |
95 | 0 | return QPDFParser( |
96 | 0 | input, |
97 | 0 | make_description(input.getName(), object_description), |
98 | 0 | object_description, |
99 | 0 | *tokenizer.m, |
100 | 0 | decrypter, |
101 | 0 | context, |
102 | 0 | false) |
103 | 0 | .parse(empty, false); |
104 | 0 | } |
105 | | |
106 | | std::pair<QPDFObjectHandle, bool> |
107 | | QPDFParser::parse( |
108 | | InputSource& input, |
109 | | std::string const& object_description, |
110 | | qpdf::Tokenizer& tokenizer, |
111 | | QPDFObjectHandle::StringDecrypter* decrypter, |
112 | | QPDF& context, |
113 | | bool sanity_checks) |
114 | 159k | { |
115 | 159k | bool empty{false}; |
116 | 159k | auto result = QPDFParser( |
117 | 159k | input, |
118 | 159k | make_description(input.getName(), object_description), |
119 | 159k | object_description, |
120 | 159k | tokenizer, |
121 | 159k | decrypter, |
122 | 159k | &context, |
123 | 159k | true, |
124 | 159k | 0, |
125 | 159k | 0, |
126 | 159k | sanity_checks) |
127 | 159k | .parse(empty, false); |
128 | 159k | return {result, empty}; |
129 | 159k | } |
130 | | |
131 | | std::pair<QPDFObjectHandle, bool> |
132 | | QPDFParser::parse( |
133 | | is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) |
134 | 48.9k | { |
135 | 48.9k | bool empty{false}; |
136 | 48.9k | auto result = QPDFParser( |
137 | 48.9k | input, |
138 | 48.9k | std::make_shared<QPDFObject::Description>( |
139 | 48.9k | QPDFObject::ObjStreamDescr(stream_id, obj_id)), |
140 | 48.9k | "", |
141 | 48.9k | tokenizer, |
142 | 48.9k | nullptr, |
143 | 48.9k | &context, |
144 | 48.9k | true, |
145 | 48.9k | stream_id, |
146 | 48.9k | obj_id) |
147 | 48.9k | .parse(empty, false); |
148 | 48.9k | return {result, empty}; |
149 | 48.9k | } |
150 | | |
151 | | QPDFObjectHandle |
152 | | QPDFParser::parse(bool& empty, bool content_stream) |
153 | 235k | { |
154 | | // This method must take care not to resolve any objects. Don't check the type of any object |
155 | | // without first ensuring that it is a direct object. Otherwise, doing so may have the side |
156 | | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
157 | | // logic error to be thrown from QPDF::inParse(). |
158 | | |
159 | 235k | ParseGuard pg(context); |
160 | 235k | empty = false; |
161 | 235k | start = input.tell(); |
162 | | |
163 | 235k | if (!tokenizer.nextToken(input, object_description)) { |
164 | 1.86k | warn(tokenizer.getErrorMessage()); |
165 | 1.86k | } |
166 | | |
167 | 235k | switch (tokenizer.getType()) { |
168 | 1.00k | case QPDFTokenizer::tt_eof: |
169 | 1.00k | if (content_stream) { |
170 | | // In content stream mode, leave object uninitialized to indicate EOF |
171 | 0 | return {}; |
172 | 0 | } |
173 | 1.00k | QTC::TC("qpdf", "QPDFParser eof in parse"); |
174 | 1.00k | warn("unexpected EOF"); |
175 | 1.00k | return {QPDFObject::create<QPDF_Null>()}; |
176 | | |
177 | 1.96k | case QPDFTokenizer::tt_bad: |
178 | 1.96k | QTC::TC("qpdf", "QPDFParser bad token in parse"); |
179 | 1.96k | return {QPDFObject::create<QPDF_Null>()}; |
180 | | |
181 | 210 | case QPDFTokenizer::tt_brace_open: |
182 | 284 | case QPDFTokenizer::tt_brace_close: |
183 | 284 | QTC::TC("qpdf", "QPDFParser bad brace"); |
184 | 284 | warn("treating unexpected brace token as null"); |
185 | 284 | return {QPDFObject::create<QPDF_Null>()}; |
186 | | |
187 | 526 | case QPDFTokenizer::tt_array_close: |
188 | 526 | QTC::TC("qpdf", "QPDFParser bad array close"); |
189 | 526 | warn("treating unexpected array close token as null"); |
190 | 526 | return {QPDFObject::create<QPDF_Null>()}; |
191 | | |
192 | 408 | case QPDFTokenizer::tt_dict_close: |
193 | 408 | QTC::TC("qpdf", "QPDFParser bad dictionary close"); |
194 | 408 | warn("unexpected dictionary close token"); |
195 | 408 | return {QPDFObject::create<QPDF_Null>()}; |
196 | | |
197 | 7.20k | case QPDFTokenizer::tt_array_open: |
198 | 192k | case QPDFTokenizer::tt_dict_open: |
199 | 192k | stack.clear(); |
200 | 192k | stack.emplace_back( |
201 | 192k | input, |
202 | 192k | (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key); |
203 | 192k | frame = &stack.back(); |
204 | 192k | return parseRemainder(content_stream); |
205 | | |
206 | 428 | case QPDFTokenizer::tt_bool: |
207 | 428 | return withDescription<QPDF_Bool>(tokenizer.getValue() == "true"); |
208 | | |
209 | 156 | case QPDFTokenizer::tt_null: |
210 | 156 | return {QPDFObject::create<QPDF_Null>()}; |
211 | | |
212 | 13.1k | case QPDFTokenizer::tt_integer: |
213 | 13.1k | return withDescription<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); |
214 | | |
215 | 791 | case QPDFTokenizer::tt_real: |
216 | 791 | return withDescription<QPDF_Real>(tokenizer.getValue()); |
217 | | |
218 | 13.8k | case QPDFTokenizer::tt_name: |
219 | 13.8k | return withDescription<QPDF_Name>(tokenizer.getValue()); |
220 | | |
221 | 10.2k | case QPDFTokenizer::tt_word: |
222 | 10.2k | { |
223 | 10.2k | auto const& value = tokenizer.getValue(); |
224 | 10.2k | if (content_stream) { |
225 | 0 | return withDescription<QPDF_Operator>(value); |
226 | 10.2k | } else if (value == "endobj") { |
227 | | // We just saw endobj without having read anything. Treat this as a null and do |
228 | | // not move the input source's offset. |
229 | 390 | input.seek(input.getLastOffset(), SEEK_SET); |
230 | 390 | empty = true; |
231 | 390 | return {QPDFObject::create<QPDF_Null>()}; |
232 | 9.88k | } else { |
233 | 9.88k | QTC::TC("qpdf", "QPDFParser treat word as string"); |
234 | 9.88k | warn("unknown token while reading object; treating as string"); |
235 | 9.88k | return withDescription<QPDF_String>(value); |
236 | 9.88k | } |
237 | 10.2k | } |
238 | | |
239 | 499 | case QPDFTokenizer::tt_string: |
240 | 499 | if (decrypter) { |
241 | 117 | std::string s{tokenizer.getValue()}; |
242 | 117 | decrypter->decryptString(s); |
243 | 117 | return withDescription<QPDF_String>(s); |
244 | 382 | } else { |
245 | 382 | return withDescription<QPDF_String>(tokenizer.getValue()); |
246 | 382 | } |
247 | | |
248 | 0 | default: |
249 | 0 | warn("treating unknown token type as null while reading object"); |
250 | 0 | return {QPDFObject::create<QPDF_Null>()}; |
251 | 235k | } |
252 | 235k | } |
253 | | |
254 | | QPDFObjectHandle |
255 | | QPDFParser::parseRemainder(bool content_stream) |
256 | 192k | { |
257 | | // This method must take care not to resolve any objects. Don't check the type of any object |
258 | | // without first ensuring that it is a direct object. Otherwise, doing so may have the side |
259 | | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
260 | | // logic error to be thrown from QPDF::inParse(). |
261 | | |
262 | 192k | bad_count = 0; |
263 | 192k | bool b_contents = false; |
264 | | |
265 | 7.74M | while (true) { |
266 | 7.74M | if (!tokenizer.nextToken(input, object_description)) { |
267 | 37.4k | warn(tokenizer.getErrorMessage()); |
268 | 37.4k | } |
269 | 7.74M | ++good_count; // optimistically |
270 | | |
271 | 7.74M | if (int_count != 0) { |
272 | | // Special handling of indirect references. Treat integer tokens as part of an indirect |
273 | | // reference until proven otherwise. |
274 | 1.62M | if (tokenizer.getType() == QPDFTokenizer::tt_integer) { |
275 | 788k | if (++int_count > 2) { |
276 | | // Process the oldest buffered integer. |
277 | 221k | addInt(int_count); |
278 | 221k | } |
279 | 788k | last_offset_buffer[int_count % 2] = input.getLastOffset(); |
280 | 788k | int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str()); |
281 | 788k | continue; |
282 | | |
283 | 833k | } else if ( |
284 | 833k | int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word && |
285 | 519k | tokenizer.getValue() == "R") { |
286 | 498k | if (context == nullptr) { |
287 | 0 | QTC::TC("qpdf", "QPDFParser indirect without context"); |
288 | 0 | throw std::logic_error( |
289 | 0 | "QPDFParser::parse called without context on an object " |
290 | 0 | "with indirect references"); |
291 | 0 | } |
292 | 498k | auto id = QIntC::to_int(int_buffer[(int_count - 1) % 2]); |
293 | 498k | auto gen = QIntC::to_int(int_buffer[(int_count) % 2]); |
294 | 498k | if (!(id < 1 || gen < 0 || gen >= 65535)) { |
295 | 495k | add(ParseGuard::getObject(context, id, gen, parse_pdf)); |
296 | 495k | } else { |
297 | 3.02k | QTC::TC("qpdf", "QPDFParser invalid objgen"); |
298 | 3.02k | addNull(); |
299 | 3.02k | } |
300 | 498k | int_count = 0; |
301 | 498k | continue; |
302 | | |
303 | 498k | } else if (int_count > 0) { |
304 | | // Process the buffered integers before processing the current token. |
305 | 334k | if (int_count > 1) { |
306 | 67.6k | addInt(int_count - 1); |
307 | 67.6k | } |
308 | 334k | addInt(int_count); |
309 | 334k | int_count = 0; |
310 | 334k | } |
311 | 1.62M | } |
312 | | |
313 | 6.45M | switch (tokenizer.getType()) { |
314 | 8.03k | case QPDFTokenizer::tt_eof: |
315 | 8.03k | warn("parse error while reading object"); |
316 | 8.03k | if (content_stream) { |
317 | | // In content stream mode, leave object uninitialized to indicate EOF |
318 | 0 | return {}; |
319 | 0 | } |
320 | 8.03k | QTC::TC("qpdf", "QPDFParser eof in parseRemainder"); |
321 | 8.03k | warn("unexpected EOF"); |
322 | 8.03k | return {QPDFObject::create<QPDF_Null>()}; |
323 | | |
324 | 32.5k | case QPDFTokenizer::tt_bad: |
325 | 32.5k | QTC::TC("qpdf", "QPDFParser bad token in parseRemainder"); |
326 | 32.5k | if (tooManyBadTokens()) { |
327 | 1.30k | return {QPDFObject::create<QPDF_Null>()}; |
328 | 1.30k | } |
329 | 31.2k | addNull(); |
330 | 31.2k | continue; |
331 | | |
332 | 2.04k | case QPDFTokenizer::tt_brace_open: |
333 | 3.51k | case QPDFTokenizer::tt_brace_close: |
334 | 3.51k | QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder"); |
335 | 3.51k | warn("treating unexpected brace token as null"); |
336 | 3.51k | if (tooManyBadTokens()) { |
337 | 312 | return {QPDFObject::create<QPDF_Null>()}; |
338 | 312 | } |
339 | 3.19k | addNull(); |
340 | 3.19k | continue; |
341 | | |
342 | 124k | case QPDFTokenizer::tt_array_close: |
343 | 124k | if ((bad_count || sanity_checks) && !max_bad_count) { |
344 | | // Trigger warning. |
345 | 141 | (void)tooManyBadTokens(); |
346 | 141 | return {QPDFObject::create<QPDF_Null>()}; |
347 | 141 | } |
348 | 123k | if (frame->state == st_array) { |
349 | 121k | auto object = frame->null_count > 100 |
350 | 121k | ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true) |
351 | 121k | : QPDFObject::create<QPDF_Array>(std::move(frame->olist)); |
352 | 121k | setDescription(object, frame->offset - 1); |
353 | | // The `offset` points to the next of "[". Set the rewind offset to point to the |
354 | | // beginning of "[". This has been explicitly tested with whitespace surrounding the |
355 | | // array start delimiter. getLastOffset points to the array end token and therefore |
356 | | // can't be used here. |
357 | 121k | if (stack.size() <= 1) { |
358 | 2.28k | return object; |
359 | 2.28k | } |
360 | 118k | stack.pop_back(); |
361 | 118k | frame = &stack.back(); |
362 | 118k | add(std::move(object)); |
363 | 118k | } else { |
364 | 2.74k | QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder"); |
365 | 2.74k | if (sanity_checks) { |
366 | | // During sanity checks, assume nesting of containers is corrupt and object is |
367 | | // unusable. |
368 | 2.22k | warn("unexpected array close token; giving up on reading object"); |
369 | 2.22k | return {QPDFObject::create<QPDF_Null>()}; |
370 | 2.22k | } |
371 | 519 | warn("treating unexpected array close token as null"); |
372 | 519 | if (tooManyBadTokens()) { |
373 | 55 | return {QPDFObject::create<QPDF_Null>()}; |
374 | 55 | } |
375 | 464 | addNull(); |
376 | 464 | } |
377 | 119k | continue; |
378 | | |
379 | 244k | case QPDFTokenizer::tt_dict_close: |
380 | 244k | if ((bad_count || sanity_checks) && !max_bad_count) { |
381 | | // Trigger warning. |
382 | 102 | (void)tooManyBadTokens(); |
383 | 102 | return {QPDFObject::create<QPDF_Null>()}; |
384 | 102 | } |
385 | 244k | if (frame->state <= st_dictionary_value) { |
386 | | // Attempt to recover more or less gracefully from invalid dictionaries. |
387 | 242k | auto& dict = frame->dict; |
388 | | |
389 | 242k | if (frame->state == st_dictionary_value) { |
390 | 7.38k | QTC::TC("qpdf", "QPDFParser no val for last key"); |
391 | 7.38k | warn( |
392 | 7.38k | frame->offset, |
393 | 7.38k | "dictionary ended prematurely; using null as value for last key"); |
394 | 7.38k | dict[frame->key] = QPDFObject::create<QPDF_Null>(); |
395 | 7.38k | } |
396 | 242k | if (!frame->olist.empty()) { |
397 | 37.7k | if (sanity_checks) { |
398 | 35.6k | warn( |
399 | 35.6k | frame->offset, |
400 | 35.6k | "expected dictionary keys but found non-name objects; ignoring"); |
401 | 35.6k | } else { |
402 | 2.08k | fixMissingKeys(); |
403 | 2.08k | } |
404 | 37.7k | } |
405 | | |
406 | 242k | if (!frame->contents_string.empty() && dict.contains("/Type") && |
407 | 42 | dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") && |
408 | 5 | dict.contains("/Contents") && dict["/Contents"].isString()) { |
409 | 5 | dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string); |
410 | 5 | dict["/Contents"].setParsedOffset(frame->contents_offset); |
411 | 5 | } |
412 | 242k | auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict)); |
413 | 242k | setDescription(object, frame->offset - 2); |
414 | | // The `offset` points to the next of "<<". Set the rewind offset to point to the |
415 | | // beginning of "<<". This has been explicitly tested with whitespace surrounding |
416 | | // the dictionary start delimiter. getLastOffset points to the dictionary end token |
417 | | // and therefore can't be used here. |
418 | 242k | if (stack.size() <= 1) { |
419 | 165k | return object; |
420 | 165k | } |
421 | 76.3k | stack.pop_back(); |
422 | 76.3k | frame = &stack.back(); |
423 | 76.3k | add(std::move(object)); |
424 | 76.3k | } else { |
425 | 1.89k | if (sanity_checks) { |
426 | | // During sanity checks, assume nesting of containers is corrupt and object is |
427 | | // unusable. |
428 | 1.15k | warn("unexpected dictionary close token; giving up on reading object"); |
429 | 1.15k | return {QPDFObject::create<QPDF_Null>()}; |
430 | 1.15k | } |
431 | 738 | warn("unexpected dictionary close token"); |
432 | 738 | if (tooManyBadTokens()) { |
433 | 51 | return {QPDFObject::create<QPDF_Null>()}; |
434 | 51 | } |
435 | 687 | addNull(); |
436 | 687 | } |
437 | 77.0k | continue; |
438 | | |
439 | 192k | case QPDFTokenizer::tt_array_open: |
440 | 354k | case QPDFTokenizer::tt_dict_open: |
441 | 354k | if (stack.size() > 499) { |
442 | 165 | QTC::TC("qpdf", "QPDFParser too deep"); |
443 | 165 | warn("ignoring excessively deeply nested data structure"); |
444 | 165 | return {QPDFObject::create<QPDF_Null>()}; |
445 | 354k | } else { |
446 | 354k | b_contents = false; |
447 | 354k | stack.emplace_back( |
448 | 354k | input, |
449 | 354k | (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array |
450 | 354k | : st_dictionary_key); |
451 | 354k | frame = &stack.back(); |
452 | 354k | continue; |
453 | 354k | } |
454 | | |
455 | 14.9k | case QPDFTokenizer::tt_bool: |
456 | 14.9k | addScalar<QPDF_Bool>(tokenizer.getValue() == "true"); |
457 | 14.9k | continue; |
458 | | |
459 | 102k | case QPDFTokenizer::tt_null: |
460 | 102k | addNull(); |
461 | 102k | continue; |
462 | | |
463 | 833k | case QPDFTokenizer::tt_integer: |
464 | 833k | if (!content_stream) { |
465 | | // Buffer token in case it is part of an indirect reference. |
466 | 833k | last_offset_buffer[1] = input.getLastOffset(); |
467 | 833k | int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str()); |
468 | 833k | int_count = 1; |
469 | 833k | } else { |
470 | 0 | addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); |
471 | 0 | } |
472 | 833k | continue; |
473 | | |
474 | 147k | case QPDFTokenizer::tt_real: |
475 | 147k | addScalar<QPDF_Real>(tokenizer.getValue()); |
476 | 147k | continue; |
477 | | |
478 | 4.26M | case QPDFTokenizer::tt_name: |
479 | 4.26M | if (frame->state == st_dictionary_key) { |
480 | 1.00M | frame->key = tokenizer.getValue(); |
481 | 1.00M | frame->state = st_dictionary_value; |
482 | 1.00M | b_contents = decrypter && frame->key == "/Contents"; |
483 | 1.00M | continue; |
484 | 3.25M | } else { |
485 | 3.25M | addScalar<QPDF_Name>(tokenizer.getValue()); |
486 | 3.25M | } |
487 | 3.25M | continue; |
488 | | |
489 | 3.25M | case QPDFTokenizer::tt_word: |
490 | 182k | if (content_stream) { |
491 | 0 | addScalar<QPDF_Operator>(tokenizer.getValue()); |
492 | 0 | continue; |
493 | 0 | } |
494 | | |
495 | 182k | if (sanity_checks) { |
496 | 175k | if (tokenizer.getValue() == "endobj" || tokenizer.getValue() == "endstream") { |
497 | | // During sanity checks, assume an unexpected endobj or endstream indicates that |
498 | | // we are parsing past the end of the object. |
499 | 3.19k | warn( |
500 | 3.19k | "unexpected 'endobj' or 'endstream' while reading object; giving up on " |
501 | 3.19k | "reading object"); |
502 | 3.19k | return {QPDFObject::create<QPDF_Null>()}; |
503 | 3.19k | } |
504 | | |
505 | 172k | warn("unknown token while reading object; treating as null"); |
506 | 172k | if (tooManyBadTokens()) { |
507 | 4.67k | return {QPDFObject::create<QPDF_Null>()}; |
508 | 4.67k | } |
509 | 167k | addNull(); |
510 | 167k | continue; |
511 | 172k | } |
512 | | |
513 | 7.28k | QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder"); |
514 | 7.28k | warn("unknown token while reading object; treating as string"); |
515 | 7.28k | if (tooManyBadTokens()) { |
516 | 164 | return {QPDFObject::create<QPDF_Null>()}; |
517 | 164 | } |
518 | 7.12k | addScalar<QPDF_String>(tokenizer.getValue()); |
519 | | |
520 | 7.12k | continue; |
521 | | |
522 | 142k | case QPDFTokenizer::tt_string: |
523 | 142k | { |
524 | 142k | auto const& val = tokenizer.getValue(); |
525 | 142k | if (decrypter) { |
526 | 9.50k | if (b_contents) { |
527 | 1.02k | frame->contents_string = val; |
528 | 1.02k | frame->contents_offset = input.getLastOffset(); |
529 | 1.02k | b_contents = false; |
530 | 1.02k | } |
531 | 9.50k | std::string s{val}; |
532 | 9.50k | decrypter->decryptString(s); |
533 | 9.50k | addScalar<QPDF_String>(s); |
534 | 133k | } else { |
535 | 133k | addScalar<QPDF_String>(val); |
536 | 133k | } |
537 | 142k | } |
538 | 142k | continue; |
539 | | |
540 | 0 | default: |
541 | 0 | warn("treating unknown token type as null while reading object"); |
542 | 0 | if (tooManyBadTokens()) { |
543 | 0 | return {QPDFObject::create<QPDF_Null>()}; |
544 | 0 | } |
545 | 0 | addNull(); |
546 | 6.45M | } |
547 | 6.45M | } |
548 | 192k | } |
549 | | |
550 | | void |
551 | | QPDFParser::add(std::shared_ptr<QPDFObject>&& obj) |
552 | 4.39M | { |
553 | 4.39M | if (frame->state != st_dictionary_value) { |
554 | | // If state is st_dictionary_key then there is a missing key. Push onto olist for |
555 | | // processing once the tt_dict_close token has been found. |
556 | 3.44M | frame->olist.emplace_back(std::move(obj)); |
557 | 3.44M | } else { |
558 | 953k | if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) { |
559 | 56.6k | warnDuplicateKey(); |
560 | 56.6k | } |
561 | 953k | frame->state = st_dictionary_key; |
562 | 953k | } |
563 | 4.39M | } |
564 | | |
565 | | void |
566 | | QPDFParser::addNull() |
567 | 306k | { |
568 | 306k | const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>(); |
569 | | |
570 | 306k | if (frame->state != st_dictionary_value) { |
571 | | // If state is st_dictionary_key then there is a missing key. Push onto olist for |
572 | | // processing once the tt_dict_close token has been found. |
573 | 270k | frame->olist.emplace_back(null_obj); |
574 | 270k | } else { |
575 | 36.0k | if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) { |
576 | 3.40k | warnDuplicateKey(); |
577 | 3.40k | } |
578 | 36.0k | frame->state = st_dictionary_key; |
579 | 36.0k | } |
580 | 306k | ++frame->null_count; |
581 | 306k | } |
582 | | |
583 | | void |
584 | | QPDFParser::addInt(int count) |
585 | 624k | { |
586 | 624k | auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]); |
587 | 624k | obj->setDescription(context, description, last_offset_buffer[count % 2]); |
588 | 624k | add(std::move(obj)); |
589 | 624k | } |
590 | | |
591 | | template <typename T, typename... Args> |
592 | | void |
593 | | QPDFParser::addScalar(Args&&... args) |
594 | 3.56M | { |
595 | 3.56M | if ((bad_count || sanity_checks) && |
596 | 3.47M | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { |
597 | | // Stop adding scalars. We are going to abort when the close token or a bad token is |
598 | | // encountered. |
599 | 488k | max_bad_count = 0; |
600 | 488k | return; |
601 | 488k | } |
602 | 3.08M | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); |
603 | 3.08M | obj->setDescription(context, description, input.getLastOffset()); |
604 | 3.08M | add(std::move(obj)); |
605 | 3.08M | } void QPDFParser::addScalar<QPDF_Bool, bool>(bool&&) Line | Count | Source | 594 | 14.9k | { | 595 | 14.9k | if ((bad_count || sanity_checks) && | 596 | 11.8k | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 597 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 598 | | // encountered. | 599 | 261 | max_bad_count = 0; | 600 | 261 | return; | 601 | 261 | } | 602 | 14.6k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 603 | 14.6k | obj->setDescription(context, description, input.getLastOffset()); | 604 | 14.6k | add(std::move(obj)); | 605 | 14.6k | } |
Unexecuted instantiation: void QPDFParser::addScalar<QPDF_Integer, long long>(long long&&) void QPDFParser::addScalar<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 594 | 147k | { | 595 | 147k | if ((bad_count || sanity_checks) && | 596 | 131k | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 597 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 598 | | // encountered. | 599 | 45.1k | max_bad_count = 0; | 600 | 45.1k | return; | 601 | 45.1k | } | 602 | 102k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 603 | 102k | obj->setDescription(context, description, input.getLastOffset()); | 604 | 102k | add(std::move(obj)); | 605 | 102k | } |
void QPDFParser::addScalar<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 594 | 3.25M | { | 595 | 3.25M | if ((bad_count || sanity_checks) && | 596 | 3.19M | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 597 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 598 | | // encountered. | 599 | 442k | max_bad_count = 0; | 600 | 442k | return; | 601 | 442k | } | 602 | 2.81M | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 603 | 2.81M | obj->setDescription(context, description, input.getLastOffset()); | 604 | 2.81M | add(std::move(obj)); | 605 | 2.81M | } |
Unexecuted instantiation: void QPDFParser::addScalar<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 594 | 140k | { | 595 | 140k | if ((bad_count || sanity_checks) && | 596 | 134k | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 597 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 598 | | // encountered. | 599 | 602 | max_bad_count = 0; | 600 | 602 | return; | 601 | 602 | } | 602 | 139k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 603 | 139k | obj->setDescription(context, description, input.getLastOffset()); | 604 | 139k | add(std::move(obj)); | 605 | 139k | } |
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 594 | 9.50k | { | 595 | 9.50k | if ((bad_count || sanity_checks) && | 596 | 9.42k | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 597 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 598 | | // encountered. | 599 | 36 | max_bad_count = 0; | 600 | 36 | return; | 601 | 36 | } | 602 | 9.46k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 603 | 9.46k | obj->setDescription(context, description, input.getLastOffset()); | 604 | 9.46k | add(std::move(obj)); | 605 | 9.46k | } |
|
606 | | |
607 | | template <typename T, typename... Args> |
608 | | QPDFObjectHandle |
609 | | QPDFParser::withDescription(Args&&... args) |
610 | 38.0k | { |
611 | 38.0k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); |
612 | 38.0k | obj->setDescription(context, description, start); |
613 | 38.0k | return {obj}; |
614 | 38.0k | } QPDFObjectHandle QPDFParser::withDescription<QPDF_Bool, bool>(bool&&) Line | Count | Source | 610 | 428 | { | 611 | 428 | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 612 | 428 | obj->setDescription(context, description, start); | 613 | 428 | return {obj}; | 614 | 428 | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_Integer, long long>(long long&&) Line | Count | Source | 610 | 13.0k | { | 611 | 13.0k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 612 | 13.0k | obj->setDescription(context, description, start); | 613 | 13.0k | return {obj}; | 614 | 13.0k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 610 | 791 | { | 611 | 791 | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 612 | 791 | obj->setDescription(context, description, start); | 613 | 791 | return {obj}; | 614 | 791 | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 610 | 13.8k | { | 611 | 13.8k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 612 | 13.8k | obj->setDescription(context, description, start); | 613 | 13.8k | return {obj}; | 614 | 13.8k | } |
Unexecuted instantiation: QPDFObjectHandle QPDFParser::withDescription<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 610 | 9.81k | { | 611 | 9.81k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 612 | 9.81k | obj->setDescription(context, description, start); | 613 | 9.81k | return {obj}; | 614 | 9.81k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 610 | 117 | { | 611 | 117 | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 612 | 117 | obj->setDescription(context, description, start); | 613 | 117 | return {obj}; | 614 | 117 | } |
|
615 | | |
616 | | void |
617 | | QPDFParser::setDescription(ObjectPtr& obj, qpdf_offset_t parsed_offset) |
618 | 363k | { |
619 | 363k | if (obj) { |
620 | 363k | obj->setDescription(context, description, parsed_offset); |
621 | 363k | } |
622 | 363k | } |
623 | | |
624 | | void |
625 | | QPDFParser::fixMissingKeys() |
626 | 2.08k | { |
627 | 2.08k | std::set<std::string> names; |
628 | 6.38k | for (auto& obj: frame->olist) { |
629 | 6.38k | if (obj.getObj()->getTypeCode() == ::ot_name) { |
630 | 120 | names.insert(obj.getObj()->getStringValue()); |
631 | 120 | } |
632 | 6.38k | } |
633 | 2.08k | int next_fake_key = 1; |
634 | 6.37k | for (auto const& item: frame->olist) { |
635 | 6.39k | while (true) { |
636 | 6.39k | const std::string key = "/QPDFFake" + std::to_string(next_fake_key++); |
637 | 6.39k | const bool found_fake = !frame->dict.contains(key) && !names.contains(key); |
638 | 6.39k | QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); |
639 | 6.39k | if (found_fake) { |
640 | 6.37k | warn( |
641 | 6.37k | frame->offset, |
642 | 6.37k | "expected dictionary key but found non-name object; inserting key " + key); |
643 | 6.37k | frame->dict[key] = item; |
644 | 6.37k | break; |
645 | 6.37k | } |
646 | 6.39k | } |
647 | 6.37k | } |
648 | 2.08k | } |
649 | | |
650 | | bool |
651 | | QPDFParser::tooManyBadTokens() |
652 | 215k | { |
653 | 215k | if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) { |
654 | 395 | if (bad_count) { |
655 | 338 | warn( |
656 | 338 | "encountered errors while parsing an array or dictionary with more than 5000 " |
657 | 338 | "elements; giving up on reading object"); |
658 | 338 | return true; |
659 | 338 | } |
660 | 57 | warn( |
661 | 57 | "encountered an array or dictionary with more than 5000 elements during xref recovery; " |
662 | 57 | "giving up on reading object"); |
663 | 57 | } |
664 | 215k | if (max_bad_count && --max_bad_count > 0 && good_count > 4) { |
665 | 77.9k | good_count = 0; |
666 | 77.9k | bad_count = 1; |
667 | 77.9k | return false; |
668 | 77.9k | } |
669 | 137k | if (++bad_count > 5 || |
670 | 133k | (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) { |
671 | | // Give up after 5 errors in close proximity or if the number of missing dictionary keys |
672 | | // exceeds the remaining number of allowable total errors. |
673 | 6.40k | warn("too many errors; giving up on reading object"); |
674 | 6.40k | return true; |
675 | 6.40k | } |
676 | 131k | good_count = 0; |
677 | 131k | return false; |
678 | 137k | } |
679 | | |
680 | | void |
681 | | QPDFParser::warn(QPDFExc const& e) const |
682 | 374k | { |
683 | | // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the |
684 | | // object. If parsing for some other reason, such as an explicit creation of an object from a |
685 | | // string, then just throw the exception. |
686 | 374k | if (context) { |
687 | 374k | context->warn(e); |
688 | 374k | } else { |
689 | 0 | throw e; |
690 | 0 | } |
691 | 374k | } |
692 | | |
693 | | void |
694 | | QPDFParser::warnDuplicateKey() |
695 | 60.0k | { |
696 | 60.0k | QTC::TC("qpdf", "QPDFParser duplicate dict key"); |
697 | 60.0k | warn( |
698 | 60.0k | frame->offset, |
699 | 60.0k | "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones"); |
700 | 60.0k | } |
701 | | |
702 | | void |
703 | | QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const |
704 | 374k | { |
705 | 374k | if (stream_id) { |
706 | 25.2k | std::string descr = "object "s + std::to_string(obj_id) + " 0"; |
707 | 25.2k | std::string name = context->getFilename() + " object stream " + std::to_string(stream_id); |
708 | 25.2k | warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg)); |
709 | 349k | } else { |
710 | 349k | warn(QPDFExc(qpdf_e_damaged_pdf, input.getName(), object_description, offset, msg)); |
711 | 349k | } |
712 | 374k | } |
713 | | |
714 | | void |
715 | | QPDFParser::warn(std::string const& msg) const |
716 | 265k | { |
717 | 265k | warn(input.getLastOffset(), msg); |
718 | 265k | } |