/src/qpdf/libqpdf/QPDFParser.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/QPDFParser.hh> |
2 | | |
3 | | #include <qpdf/QPDF.hh> |
4 | | #include <qpdf/QPDFObjGen.hh> |
5 | | #include <qpdf/QPDFObjectHandle.hh> |
6 | | #include <qpdf/QPDFObject_private.hh> |
7 | | #include <qpdf/QPDFTokenizer_private.hh> |
8 | | #include <qpdf/QTC.hh> |
9 | | #include <qpdf/QUtil.hh> |
10 | | |
11 | | #include <memory> |
12 | | |
13 | | using namespace std::literals; |
14 | | using namespace qpdf; |
15 | | |
16 | | using ObjectPtr = std::shared_ptr<QPDFObject>; |
17 | | |
18 | | QPDFObjectHandle |
19 | | QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context) |
20 | 19.0k | { |
21 | 19.0k | qpdf::Tokenizer tokenizer; |
22 | 19.0k | bool empty = false; |
23 | 19.0k | return QPDFParser( |
24 | 19.0k | input, |
25 | 19.0k | make_description(input.getName(), object_description), |
26 | 19.0k | object_description, |
27 | 19.0k | tokenizer, |
28 | 19.0k | nullptr, |
29 | 19.0k | context, |
30 | 19.0k | false) |
31 | 19.0k | .parse(empty, false); |
32 | 19.0k | } |
33 | | |
34 | | QPDFObjectHandle |
35 | | QPDFParser::parse_content( |
36 | | InputSource& input, |
37 | | std::shared_ptr<QPDFObject::Description> sp_description, |
38 | | qpdf::Tokenizer& tokenizer, |
39 | | QPDF* context) |
40 | 722k | { |
41 | 722k | bool empty = false; |
42 | 722k | return QPDFParser( |
43 | 722k | input, |
44 | 722k | std::move(sp_description), |
45 | 722k | "content", |
46 | 722k | tokenizer, |
47 | 722k | nullptr, |
48 | 722k | context, |
49 | 722k | true, |
50 | 722k | 0, |
51 | 722k | 0, |
52 | 722k | context && context->reconstructed_xref()) |
53 | 722k | .parse(empty, true); |
54 | 722k | } |
55 | | |
56 | | QPDFObjectHandle |
57 | | QPDFParser::parse( |
58 | | InputSource& input, |
59 | | std::string const& object_description, |
60 | | QPDFTokenizer& tokenizer, |
61 | | bool& empty, |
62 | | QPDFObjectHandle::StringDecrypter* decrypter, |
63 | | QPDF* context) |
64 | 0 | { |
65 | 0 | return QPDFParser( |
66 | 0 | input, |
67 | 0 | make_description(input.getName(), object_description), |
68 | 0 | object_description, |
69 | 0 | *tokenizer.m, |
70 | 0 | decrypter, |
71 | 0 | context, |
72 | 0 | false) |
73 | 0 | .parse(empty, false); |
74 | 0 | } |
75 | | |
76 | | std::pair<QPDFObjectHandle, bool> |
77 | | QPDFParser::parse( |
78 | | InputSource& input, |
79 | | std::string const& object_description, |
80 | | qpdf::Tokenizer& tokenizer, |
81 | | QPDFObjectHandle::StringDecrypter* decrypter, |
82 | | QPDF& context, |
83 | | bool sanity_checks) |
84 | 174k | { |
85 | 174k | bool empty{false}; |
86 | 174k | auto result = QPDFParser( |
87 | 174k | input, |
88 | 174k | make_description(input.getName(), object_description), |
89 | 174k | object_description, |
90 | 174k | tokenizer, |
91 | 174k | decrypter, |
92 | 174k | &context, |
93 | 174k | true, |
94 | 174k | 0, |
95 | 174k | 0, |
96 | 174k | sanity_checks) |
97 | 174k | .parse(empty, false); |
98 | 174k | return {result, empty}; |
99 | 174k | } |
100 | | |
101 | | std::pair<QPDFObjectHandle, bool> |
102 | | QPDFParser::parse( |
103 | | is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) |
104 | 31.8k | { |
105 | 31.8k | bool empty{false}; |
106 | 31.8k | auto result = QPDFParser( |
107 | 31.8k | input, |
108 | 31.8k | std::make_shared<QPDFObject::Description>( |
109 | 31.8k | QPDFObject::ObjStreamDescr(stream_id, obj_id)), |
110 | 31.8k | "", |
111 | 31.8k | tokenizer, |
112 | 31.8k | nullptr, |
113 | 31.8k | &context, |
114 | 31.8k | true, |
115 | 31.8k | stream_id, |
116 | 31.8k | obj_id) |
117 | 31.8k | .parse(empty, false); |
118 | 31.8k | return {result, empty}; |
119 | 31.8k | } |
120 | | |
121 | | QPDFObjectHandle |
122 | | QPDFParser::parse(bool& empty, bool content_stream) |
123 | 947k | { |
124 | | // This method must take care not to resolve any objects. Don't check the type of any object |
125 | | // without first ensuring that it is a direct object. Otherwise, doing so may have the side |
126 | | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
127 | | // logic error to be thrown from QPDF::inParse(). |
128 | | |
129 | 947k | QPDF::ParseGuard pg(context); |
130 | 947k | empty = false; |
131 | 947k | start = input.tell(); |
132 | | |
133 | 947k | if (!tokenizer.nextToken(input, object_description)) { |
134 | 10.0k | warn(tokenizer.getErrorMessage()); |
135 | 10.0k | } |
136 | | |
137 | 947k | switch (tokenizer.getType()) { |
138 | 3.66k | case QPDFTokenizer::tt_eof: |
139 | 3.66k | if (content_stream) { |
140 | | // In content stream mode, leave object uninitialized to indicate EOF |
141 | 3.25k | return {}; |
142 | 3.25k | } |
143 | 413 | QTC::TC("qpdf", "QPDFParser eof in parse"); |
144 | 413 | warn("unexpected EOF"); |
145 | 413 | return {QPDFObject::create<QPDF_Null>()}; |
146 | | |
147 | 9.85k | case QPDFTokenizer::tt_bad: |
148 | 9.85k | QTC::TC("qpdf", "QPDFParser bad token in parse"); |
149 | 9.85k | return {QPDFObject::create<QPDF_Null>()}; |
150 | | |
151 | 1.22k | case QPDFTokenizer::tt_brace_open: |
152 | 2.15k | case QPDFTokenizer::tt_brace_close: |
153 | 2.15k | QTC::TC("qpdf", "QPDFParser bad brace"); |
154 | 2.15k | warn("treating unexpected brace token as null"); |
155 | 2.15k | return {QPDFObject::create<QPDF_Null>()}; |
156 | | |
157 | 5.44k | case QPDFTokenizer::tt_array_close: |
158 | 5.44k | QTC::TC("qpdf", "QPDFParser bad array close"); |
159 | 5.44k | warn("treating unexpected array close token as null"); |
160 | 5.44k | return {QPDFObject::create<QPDF_Null>()}; |
161 | | |
162 | 1.00k | case QPDFTokenizer::tt_dict_close: |
163 | 1.00k | QTC::TC("qpdf", "QPDFParser bad dictionary close"); |
164 | 1.00k | warn("unexpected dictionary close token"); |
165 | 1.00k | return {QPDFObject::create<QPDF_Null>()}; |
166 | | |
167 | 29.8k | case QPDFTokenizer::tt_array_open: |
168 | 207k | case QPDFTokenizer::tt_dict_open: |
169 | 207k | stack.clear(); |
170 | 207k | stack.emplace_back( |
171 | 207k | input, |
172 | 207k | (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key); |
173 | 207k | frame = &stack.back(); |
174 | 207k | return parseRemainder(content_stream); |
175 | | |
176 | 2.31k | case QPDFTokenizer::tt_bool: |
177 | 2.31k | return withDescription<QPDF_Bool>(tokenizer.getValue() == "true"); |
178 | | |
179 | 273 | case QPDFTokenizer::tt_null: |
180 | 273 | return {QPDFObject::create<QPDF_Null>()}; |
181 | | |
182 | 165k | case QPDFTokenizer::tt_integer: |
183 | 165k | return withDescription<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); |
184 | | |
185 | 127k | case QPDFTokenizer::tt_real: |
186 | 127k | return withDescription<QPDF_Real>(tokenizer.getValue()); |
187 | | |
188 | 77.8k | case QPDFTokenizer::tt_name: |
189 | 77.8k | return withDescription<QPDF_Name>(tokenizer.getValue()); |
190 | | |
191 | 334k | case QPDFTokenizer::tt_word: |
192 | 334k | { |
193 | 334k | auto const& value = tokenizer.getValue(); |
194 | 334k | if (content_stream) { |
195 | 322k | return withDescription<QPDF_Operator>(value); |
196 | 322k | } else if (value == "endobj") { |
197 | | // We just saw endobj without having read anything. Treat this as a null and do |
198 | | // not move the input source's offset. |
199 | 348 | input.seek(input.getLastOffset(), SEEK_SET); |
200 | 348 | empty = true; |
201 | 348 | return {QPDFObject::create<QPDF_Null>()}; |
202 | 10.9k | } else { |
203 | 10.9k | QTC::TC("qpdf", "QPDFParser treat word as string"); |
204 | 10.9k | warn("unknown token while reading object; treating as string"); |
205 | 10.9k | return withDescription<QPDF_String>(value); |
206 | 10.9k | } |
207 | 334k | } |
208 | | |
209 | 10.7k | case QPDFTokenizer::tt_string: |
210 | 10.7k | if (decrypter) { |
211 | 158 | std::string s{tokenizer.getValue()}; |
212 | 158 | decrypter->decryptString(s); |
213 | 158 | return withDescription<QPDF_String>(s); |
214 | 10.5k | } else { |
215 | 10.5k | return withDescription<QPDF_String>(tokenizer.getValue()); |
216 | 10.5k | } |
217 | | |
218 | 0 | default: |
219 | 0 | warn("treating unknown token type as null while reading object"); |
220 | 0 | return {QPDFObject::create<QPDF_Null>()}; |
221 | 947k | } |
222 | 947k | } |
223 | | |
224 | | QPDFObjectHandle |
225 | | QPDFParser::parseRemainder(bool content_stream) |
226 | 207k | { |
227 | | // This method must take care not to resolve any objects. Don't check the type of any object |
228 | | // without first ensuring that it is a direct object. Otherwise, doing so may have the side |
229 | | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
230 | | // logic error to be thrown from QPDF::inParse(). |
231 | | |
232 | 207k | bad_count = 0; |
233 | 207k | bool b_contents = false; |
234 | | |
235 | 9.39M | while (true) { |
236 | 9.39M | if (!tokenizer.nextToken(input, object_description)) { |
237 | 47.5k | warn(tokenizer.getErrorMessage()); |
238 | 47.5k | } |
239 | 9.39M | ++good_count; // optimistically |
240 | | |
241 | 9.39M | if (int_count != 0) { |
242 | | // Special handling of indirect references. Treat integer tokens as part of an indirect |
243 | | // reference until proven otherwise. |
244 | 1.81M | if (tokenizer.getType() == QPDFTokenizer::tt_integer) { |
245 | 979k | if (++int_count > 2) { |
246 | | // Process the oldest buffered integer. |
247 | 353k | addInt(int_count); |
248 | 353k | } |
249 | 979k | last_offset_buffer[int_count % 2] = input.getLastOffset(); |
250 | 979k | int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str()); |
251 | 979k | continue; |
252 | | |
253 | 979k | } else if ( |
254 | 835k | int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word && |
255 | 835k | tokenizer.getValue() == "R") { |
256 | 528k | if (context == nullptr) { |
257 | 0 | QTC::TC("qpdf", "QPDFParser indirect without context"); |
258 | 0 | throw std::logic_error( |
259 | 0 | "QPDFParser::parse called without context on an object " |
260 | 0 | "with indirect references"); |
261 | 0 | } |
262 | 528k | auto id = QIntC::to_int(int_buffer[(int_count - 1) % 2]); |
263 | 528k | auto gen = QIntC::to_int(int_buffer[(int_count) % 2]); |
264 | 528k | if (!(id < 1 || gen < 0 || gen >= 65535)) { |
265 | 522k | add(QPDF::ParseGuard::getObject(context, id, gen, parse_pdf)); |
266 | 522k | } else { |
267 | 5.98k | QTC::TC("qpdf", "QPDFParser invalid objgen"); |
268 | 5.98k | addNull(); |
269 | 5.98k | } |
270 | 528k | int_count = 0; |
271 | 528k | continue; |
272 | | |
273 | 528k | } else if (int_count > 0) { |
274 | | // Process the buffered integers before processing the current token. |
275 | 307k | if (int_count > 1) { |
276 | 97.0k | addInt(int_count - 1); |
277 | 97.0k | } |
278 | 307k | addInt(int_count); |
279 | 307k | int_count = 0; |
280 | 307k | } |
281 | 1.81M | } |
282 | | |
283 | 7.88M | switch (tokenizer.getType()) { |
284 | 8.97k | case QPDFTokenizer::tt_eof: |
285 | 8.97k | warn("parse error while reading object"); |
286 | 8.97k | if (content_stream) { |
287 | | // In content stream mode, leave object uninitialized to indicate EOF |
288 | 344 | return {}; |
289 | 344 | } |
290 | 8.63k | QTC::TC("qpdf", "QPDFParser eof in parseRemainder"); |
291 | 8.63k | warn("unexpected EOF"); |
292 | 8.63k | return {QPDFObject::create<QPDF_Null>()}; |
293 | | |
294 | 43.4k | case QPDFTokenizer::tt_bad: |
295 | 43.4k | QTC::TC("qpdf", "QPDFParser bad token in parseRemainder"); |
296 | 43.4k | if (tooManyBadTokens()) { |
297 | 1.68k | return {QPDFObject::create<QPDF_Null>()}; |
298 | 1.68k | } |
299 | 41.7k | addNull(); |
300 | 41.7k | continue; |
301 | | |
302 | 4.88k | case QPDFTokenizer::tt_brace_open: |
303 | 6.67k | case QPDFTokenizer::tt_brace_close: |
304 | 6.67k | QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder"); |
305 | 6.67k | warn("treating unexpected brace token as null"); |
306 | 6.67k | if (tooManyBadTokens()) { |
307 | 424 | return {QPDFObject::create<QPDF_Null>()}; |
308 | 424 | } |
309 | 6.24k | addNull(); |
310 | 6.24k | continue; |
311 | | |
312 | 182k | case QPDFTokenizer::tt_array_close: |
313 | 182k | if ((bad_count || sanity_checks) && !max_bad_count) { |
314 | | // Trigger warning. |
315 | 370 | (void)tooManyBadTokens(); |
316 | 370 | return {QPDFObject::create<QPDF_Null>()}; |
317 | 370 | } |
318 | 182k | if (frame->state == st_array) { |
319 | 178k | auto object = frame->null_count > 100 |
320 | 178k | ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true) |
321 | 178k | : QPDFObject::create<QPDF_Array>(std::move(frame->olist)); |
322 | 178k | setDescription(object, frame->offset - 1); |
323 | | // The `offset` points to the next of "[". Set the rewind offset to point to the |
324 | | // beginning of "[". This has been explicitly tested with whitespace surrounding the |
325 | | // array start delimiter. getLastOffset points to the array end token and therefore |
326 | | // can't be used here. |
327 | 178k | if (stack.size() <= 1) { |
328 | 22.5k | return object; |
329 | 22.5k | } |
330 | 155k | stack.pop_back(); |
331 | 155k | frame = &stack.back(); |
332 | 155k | add(std::move(object)); |
333 | 155k | } else { |
334 | 4.32k | QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder"); |
335 | 4.32k | if (sanity_checks) { |
336 | | // During sanity checks, assume nesting of containers is corrupt and object is |
337 | | // unusable. |
338 | 3.92k | warn("unexpected array close token; giving up on reading object"); |
339 | 3.92k | return {QPDFObject::create<QPDF_Null>()}; |
340 | 3.92k | } |
341 | 400 | warn("treating unexpected array close token as null"); |
342 | 400 | if (tooManyBadTokens()) { |
343 | 28 | return {QPDFObject::create<QPDF_Null>()}; |
344 | 28 | } |
345 | 372 | addNull(); |
346 | 372 | } |
347 | 155k | continue; |
348 | | |
349 | 276k | case QPDFTokenizer::tt_dict_close: |
350 | 276k | if ((bad_count || sanity_checks) && !max_bad_count) { |
351 | | // Trigger warning. |
352 | 142 | (void)tooManyBadTokens(); |
353 | 142 | return {QPDFObject::create<QPDF_Null>()}; |
354 | 142 | } |
355 | 276k | if (frame->state <= st_dictionary_value) { |
356 | | // Attempt to recover more or less gracefully from invalid dictionaries. |
357 | 273k | auto& dict = frame->dict; |
358 | | |
359 | 273k | if (frame->state == st_dictionary_value) { |
360 | 6.32k | QTC::TC("qpdf", "QPDFParser no val for last key"); |
361 | 6.32k | warn( |
362 | 6.32k | frame->offset, |
363 | 6.32k | "dictionary ended prematurely; using null as value for last key"); |
364 | 6.32k | dict[frame->key] = QPDFObject::create<QPDF_Null>(); |
365 | 6.32k | } |
366 | 273k | if (!frame->olist.empty()) { |
367 | 48.0k | if (sanity_checks) { |
368 | 46.5k | warn( |
369 | 46.5k | frame->offset, |
370 | 46.5k | "expected dictionary keys but found non-name objects; ignoring"); |
371 | 46.5k | } else { |
372 | 1.48k | fixMissingKeys(); |
373 | 1.48k | } |
374 | 48.0k | } |
375 | | |
376 | 273k | if (!frame->contents_string.empty() && dict.contains("/Type") && |
377 | 273k | dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") && |
378 | 273k | dict.contains("/Contents") && dict["/Contents"].isString()) { |
379 | 14 | dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string); |
380 | 14 | dict["/Contents"].setParsedOffset(frame->contents_offset); |
381 | 14 | } |
382 | 273k | auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict)); |
383 | 273k | setDescription(object, frame->offset - 2); |
384 | | // The `offset` points to the next of "<<". Set the rewind offset to point to the |
385 | | // beginning of "<<". This has been explicitly tested with whitespace surrounding |
386 | | // the dictionary start delimiter. getLastOffset points to the dictionary end token |
387 | | // and therefore can't be used here. |
388 | 273k | if (stack.size() <= 1) { |
389 | 151k | return object; |
390 | 151k | } |
391 | 121k | stack.pop_back(); |
392 | 121k | frame = &stack.back(); |
393 | 121k | add(std::move(object)); |
394 | 121k | } else { |
395 | 3.14k | QTC::TC("qpdf", "QPDFParser bad dictionary close in parseRemainder"); |
396 | 3.14k | if (sanity_checks) { |
397 | | // During sanity checks, assume nesting of containers is corrupt and object is |
398 | | // unusable. |
399 | 2.55k | warn("unexpected dictionary close token; giving up on reading object"); |
400 | 2.55k | return {QPDFObject::create<QPDF_Null>()}; |
401 | 2.55k | } |
402 | 591 | warn("unexpected dictionary close token"); |
403 | 591 | if (tooManyBadTokens()) { |
404 | 56 | return {QPDFObject::create<QPDF_Null>()}; |
405 | 56 | } |
406 | 535 | addNull(); |
407 | 535 | } |
408 | 121k | continue; |
409 | | |
410 | 349k | case QPDFTokenizer::tt_array_open: |
411 | 511k | case QPDFTokenizer::tt_dict_open: |
412 | 511k | if (stack.size() > 499) { |
413 | 313 | QTC::TC("qpdf", "QPDFParser too deep"); |
414 | 313 | warn("ignoring excessively deeply nested data structure"); |
415 | 313 | return {QPDFObject::create<QPDF_Null>()}; |
416 | 511k | } else { |
417 | 511k | b_contents = false; |
418 | 511k | stack.emplace_back( |
419 | 511k | input, |
420 | 511k | (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array |
421 | 511k | : st_dictionary_key); |
422 | 511k | frame = &stack.back(); |
423 | 511k | continue; |
424 | 511k | } |
425 | | |
426 | 24.0k | case QPDFTokenizer::tt_bool: |
427 | 24.0k | addScalar<QPDF_Bool>(tokenizer.getValue() == "true"); |
428 | 24.0k | continue; |
429 | | |
430 | 163k | case QPDFTokenizer::tt_null: |
431 | 163k | addNull(); |
432 | 163k | continue; |
433 | | |
434 | 1.49M | case QPDFTokenizer::tt_integer: |
435 | 1.49M | if (!content_stream) { |
436 | | // Buffer token in case it is part of an indirect reference. |
437 | 836k | last_offset_buffer[1] = input.getLastOffset(); |
438 | 836k | int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str()); |
439 | 836k | int_count = 1; |
440 | 836k | } else { |
441 | 661k | addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); |
442 | 661k | } |
443 | 1.49M | continue; |
444 | | |
445 | 200k | case QPDFTokenizer::tt_real: |
446 | 200k | addScalar<QPDF_Real>(tokenizer.getValue()); |
447 | 200k | continue; |
448 | | |
449 | 3.90M | case QPDFTokenizer::tt_name: |
450 | 3.90M | if (frame->state == st_dictionary_key) { |
451 | 1.14M | frame->key = tokenizer.getValue(); |
452 | 1.14M | frame->state = st_dictionary_value; |
453 | 1.14M | b_contents = decrypter && frame->key == "/Contents"; |
454 | 1.14M | continue; |
455 | 2.75M | } else { |
456 | 2.75M | addScalar<QPDF_Name>(tokenizer.getValue()); |
457 | 2.75M | } |
458 | 2.75M | continue; |
459 | | |
460 | 2.75M | case QPDFTokenizer::tt_word: |
461 | 373k | if (content_stream) { |
462 | 142k | addScalar<QPDF_Operator>(tokenizer.getValue()); |
463 | 142k | continue; |
464 | 142k | } |
465 | | |
466 | 231k | if (sanity_checks) { |
467 | 225k | if (tokenizer.getValue() == "endobj" || tokenizer.getValue() == "endstream") { |
468 | | // During sanity checks, assume an unexpected endobj or endstream indicates that |
469 | | // we are parsing past the end of the object. |
470 | 4.38k | warn( |
471 | 4.38k | "unexpected 'endobj' or 'endstream' while reading object; giving up on " |
472 | 4.38k | "reading object"); |
473 | 4.38k | return {QPDFObject::create<QPDF_Null>()}; |
474 | 4.38k | } |
475 | | |
476 | 221k | warn("unknown token while reading object; treating as null"); |
477 | 221k | if (tooManyBadTokens()) { |
478 | 6.32k | return {QPDFObject::create<QPDF_Null>()}; |
479 | 6.32k | } |
480 | 215k | addNull(); |
481 | 215k | continue; |
482 | 221k | } |
483 | | |
484 | 5.18k | QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder"); |
485 | 5.18k | warn("unknown token while reading object; treating as string"); |
486 | 5.18k | if (tooManyBadTokens()) { |
487 | 172 | return {QPDFObject::create<QPDF_Null>()}; |
488 | 172 | } |
489 | 5.01k | addScalar<QPDF_String>(tokenizer.getValue()); |
490 | | |
491 | 5.01k | continue; |
492 | | |
493 | 696k | case QPDFTokenizer::tt_string: |
494 | 696k | { |
495 | 696k | auto const& val = tokenizer.getValue(); |
496 | 696k | if (decrypter) { |
497 | 15.7k | if (b_contents) { |
498 | 121 | frame->contents_string = val; |
499 | 121 | frame->contents_offset = input.getLastOffset(); |
500 | 121 | b_contents = false; |
501 | 121 | } |
502 | 15.7k | std::string s{val}; |
503 | 15.7k | decrypter->decryptString(s); |
504 | 15.7k | addScalar<QPDF_String>(s); |
505 | 681k | } else { |
506 | 681k | addScalar<QPDF_String>(val); |
507 | 681k | } |
508 | 696k | } |
509 | 696k | continue; |
510 | | |
511 | 0 | default: |
512 | 0 | warn("treating unknown token type as null while reading object"); |
513 | 0 | if (tooManyBadTokens()) { |
514 | 0 | return {QPDFObject::create<QPDF_Null>()}; |
515 | 0 | } |
516 | 0 | addNull(); |
517 | 7.88M | } |
518 | 7.88M | } |
519 | 207k | } |
520 | | |
521 | | void |
522 | | QPDFParser::add(std::shared_ptr<QPDFObject>&& obj) |
523 | 5.68M | { |
524 | 5.68M | if (frame->state != st_dictionary_value) { |
525 | | // If state is st_dictionary_key then there is a missing key. Push onto olist for |
526 | | // processing once the tt_dict_close token has been found. |
527 | 4.59M | frame->olist.emplace_back(std::move(obj)); |
528 | 4.59M | } else { |
529 | 1.08M | if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) { |
530 | 45.3k | warnDuplicateKey(); |
531 | 45.3k | } |
532 | 1.08M | frame->state = st_dictionary_key; |
533 | 1.08M | } |
534 | 5.68M | } |
535 | | |
536 | | void |
537 | | QPDFParser::addNull() |
538 | 431k | { |
539 | 431k | const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>(); |
540 | | |
541 | 431k | if (frame->state != st_dictionary_value) { |
542 | | // If state is st_dictionary_key then there is a missing key. Push onto olist for |
543 | | // processing once the tt_dict_close token has been found. |
544 | 390k | frame->olist.emplace_back(null_obj); |
545 | 390k | } else { |
546 | 41.2k | if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) { |
547 | 3.14k | warnDuplicateKey(); |
548 | 3.14k | } |
549 | 41.2k | frame->state = st_dictionary_key; |
550 | 41.2k | } |
551 | 431k | ++frame->null_count; |
552 | 431k | } |
553 | | |
554 | | void |
555 | | QPDFParser::addInt(int count) |
556 | 758k | { |
557 | 758k | auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]); |
558 | 758k | obj->setDescription(context, description, last_offset_buffer[count % 2]); |
559 | 758k | add(std::move(obj)); |
560 | 758k | } |
561 | | |
562 | | template <typename T, typename... Args> |
563 | | void |
564 | | QPDFParser::addScalar(Args&&... args) |
565 | 4.48M | { |
566 | 4.48M | if ((bad_count || sanity_checks) && |
567 | 4.48M | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { |
568 | | // Stop adding scalars. We are going to abort when the close token or a bad token is |
569 | | // encountered. |
570 | 354k | max_bad_count = 0; |
571 | 354k | return; |
572 | 354k | } |
573 | 4.12M | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); |
574 | 4.12M | obj->setDescription(context, description, input.getLastOffset()); |
575 | 4.12M | add(std::move(obj)); |
576 | 4.12M | } void QPDFParser::addScalar<QPDF_Bool, bool>(bool&&) Line | Count | Source | 565 | 24.0k | { | 566 | 24.0k | if ((bad_count || sanity_checks) && | 567 | 24.0k | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 568 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 569 | | // encountered. | 570 | 372 | max_bad_count = 0; | 571 | 372 | return; | 572 | 372 | } | 573 | 23.6k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 574 | 23.6k | obj->setDescription(context, description, input.getLastOffset()); | 575 | 23.6k | add(std::move(obj)); | 576 | 23.6k | } |
void QPDFParser::addScalar<QPDF_Integer, long long>(long long&&) Line | Count | Source | 565 | 661k | { | 566 | 661k | if ((bad_count || sanity_checks) && | 567 | 661k | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 568 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 569 | | // encountered. | 570 | 10.7k | max_bad_count = 0; | 571 | 10.7k | return; | 572 | 10.7k | } | 573 | 651k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 574 | 651k | obj->setDescription(context, description, input.getLastOffset()); | 575 | 651k | add(std::move(obj)); | 576 | 651k | } |
void QPDFParser::addScalar<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 565 | 200k | { | 566 | 200k | if ((bad_count || sanity_checks) && | 567 | 200k | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 568 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 569 | | // encountered. | 570 | 477 | max_bad_count = 0; | 571 | 477 | return; | 572 | 477 | } | 573 | 200k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 574 | 200k | obj->setDescription(context, description, input.getLastOffset()); | 575 | 200k | add(std::move(obj)); | 576 | 200k | } |
void QPDFParser::addScalar<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 565 | 2.75M | { | 566 | 2.75M | if ((bad_count || sanity_checks) && | 567 | 2.75M | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 568 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 569 | | // encountered. | 570 | 333k | max_bad_count = 0; | 571 | 333k | return; | 572 | 333k | } | 573 | 2.42M | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 574 | 2.42M | obj->setDescription(context, description, input.getLastOffset()); | 575 | 2.42M | add(std::move(obj)); | 576 | 2.42M | } |
void QPDFParser::addScalar<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 565 | 142k | { | 566 | 142k | if ((bad_count || sanity_checks) && | 567 | 142k | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 568 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 569 | | // encountered. | 570 | 10.0k | max_bad_count = 0; | 571 | 10.0k | return; | 572 | 10.0k | } | 573 | 132k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 574 | 132k | obj->setDescription(context, description, input.getLastOffset()); | 575 | 132k | add(std::move(obj)); | 576 | 132k | } |
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 565 | 686k | { | 566 | 686k | if ((bad_count || sanity_checks) && | 567 | 686k | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 568 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 569 | | // encountered. | 570 | 279 | max_bad_count = 0; | 571 | 279 | return; | 572 | 279 | } | 573 | 685k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 574 | 685k | obj->setDescription(context, description, input.getLastOffset()); | 575 | 685k | add(std::move(obj)); | 576 | 685k | } |
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 565 | 15.6k | { | 566 | 15.6k | if ((bad_count || sanity_checks) && | 567 | 15.6k | (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 568 | | // Stop adding scalars. We are going to abort when the close token or a bad token is | 569 | | // encountered. | 570 | 10 | max_bad_count = 0; | 571 | 10 | return; | 572 | 10 | } | 573 | 15.6k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 574 | 15.6k | obj->setDescription(context, description, input.getLastOffset()); | 575 | 15.6k | add(std::move(obj)); | 576 | 15.6k | } |
|
577 | | |
578 | | template <typename T, typename... Args> |
579 | | QPDFObjectHandle |
580 | | QPDFParser::withDescription(Args&&... args) |
581 | 716k | { |
582 | 716k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); |
583 | 716k | obj->setDescription(context, description, start); |
584 | 716k | return {obj}; |
585 | 716k | } QPDFObjectHandle QPDFParser::withDescription<QPDF_Bool, bool>(bool&&) Line | Count | Source | 581 | 2.31k | { | 582 | 2.31k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 583 | 2.31k | obj->setDescription(context, description, start); | 584 | 2.31k | return {obj}; | 585 | 2.31k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_Integer, long long>(long long&&) Line | Count | Source | 581 | 165k | { | 582 | 165k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 583 | 165k | obj->setDescription(context, description, start); | 584 | 165k | return {obj}; | 585 | 165k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 581 | 127k | { | 582 | 127k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 583 | 127k | obj->setDescription(context, description, start); | 584 | 127k | return {obj}; | 585 | 127k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 581 | 77.8k | { | 582 | 77.8k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 583 | 77.8k | obj->setDescription(context, description, start); | 584 | 77.8k | return {obj}; | 585 | 77.8k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 581 | 322k | { | 582 | 322k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 583 | 322k | obj->setDescription(context, description, start); | 584 | 322k | return {obj}; | 585 | 322k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 581 | 21.1k | { | 582 | 21.1k | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 583 | 21.1k | obj->setDescription(context, description, start); | 584 | 21.1k | return {obj}; | 585 | 21.1k | } |
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 581 | 146 | { | 582 | 146 | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 583 | 146 | obj->setDescription(context, description, start); | 584 | 146 | return {obj}; | 585 | 146 | } |
|
586 | | |
587 | | void |
588 | | QPDFParser::setDescription(ObjectPtr& obj, qpdf_offset_t parsed_offset) |
589 | 450k | { |
590 | 450k | if (obj) { |
591 | 450k | obj->setDescription(context, description, parsed_offset); |
592 | 450k | } |
593 | 450k | } |
594 | | |
595 | | void |
596 | | QPDFParser::fixMissingKeys() |
597 | 1.48k | { |
598 | 1.48k | std::set<std::string> names; |
599 | 4.71k | for (auto& obj: frame->olist) { |
600 | 4.71k | if (obj.getObj()->getTypeCode() == ::ot_name) { |
601 | 103 | names.insert(obj.getObj()->getStringValue()); |
602 | 103 | } |
603 | 4.71k | } |
604 | 1.48k | int next_fake_key = 1; |
605 | 4.65k | for (auto const& item: frame->olist) { |
606 | 4.67k | while (true) { |
607 | 4.67k | const std::string key = "/QPDFFake" + std::to_string(next_fake_key++); |
608 | 4.67k | const bool found_fake = !frame->dict.contains(key) && !names.contains(key); |
609 | 4.67k | QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); |
610 | 4.67k | if (found_fake) { |
611 | 4.65k | warn( |
612 | 4.65k | frame->offset, |
613 | 4.65k | "expected dictionary key but found non-name object; inserting key " + key); |
614 | 4.65k | frame->dict[key] = item; |
615 | 4.65k | break; |
616 | 4.65k | } |
617 | 4.67k | } |
618 | 4.65k | } |
619 | 1.48k | } |
620 | | |
621 | | bool |
622 | | QPDFParser::tooManyBadTokens() |
623 | 276k | { |
624 | 276k | if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) { |
625 | 323 | if (bad_count) { |
626 | 262 | warn( |
627 | 262 | "encountered errors while parsing an array or dictionary with more than 5000 " |
628 | 262 | "elements; giving up on reading object"); |
629 | 262 | return true; |
630 | 262 | } |
631 | 61 | warn( |
632 | 61 | "encountered an array or dictionary with more than 5000 elements during xref recovery; " |
633 | 61 | "giving up on reading object"); |
634 | 61 | } |
635 | 276k | if (max_bad_count && --max_bad_count > 0 && good_count > 4) { |
636 | 104k | good_count = 0; |
637 | 104k | bad_count = 1; |
638 | 104k | return false; |
639 | 104k | } |
640 | 172k | if (++bad_count > 5 || |
641 | 172k | (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) { |
642 | | // Give up after 5 errors in close proximity or if the number of missing dictionary keys |
643 | | // exceeds the remaining number of allowable total errors. |
644 | 8.73k | warn("too many errors; giving up on reading object"); |
645 | 8.73k | return true; |
646 | 8.73k | } |
647 | 163k | good_count = 0; |
648 | 163k | return false; |
649 | 172k | } |
650 | | |
651 | | void |
652 | | QPDFParser::warn(QPDFExc const& e) const |
653 | 455k | { |
654 | | // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the |
655 | | // object. If parsing for some other reason, such as an explicit creation of an object from a |
656 | | // string, then just throw the exception. |
657 | 455k | if (context) { |
658 | 455k | context->warn(e); |
659 | 455k | } else { |
660 | 0 | throw e; |
661 | 0 | } |
662 | 455k | } |
663 | | |
664 | | void |
665 | | QPDFParser::warnDuplicateKey() |
666 | 48.4k | { |
667 | 48.4k | QTC::TC("qpdf", "QPDFParser duplicate dict key"); |
668 | 48.4k | warn( |
669 | 48.4k | frame->offset, |
670 | 48.4k | "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones"); |
671 | 48.4k | } |
672 | | |
673 | | void |
674 | | QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const |
675 | 455k | { |
676 | 455k | if (stream_id) { |
677 | 17.3k | std::string descr = "object "s + std::to_string(obj_id) + " 0"; |
678 | 17.3k | std::string name = context->getFilename() + " object stream " + std::to_string(stream_id); |
679 | 17.3k | warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg)); |
680 | 438k | } else { |
681 | 438k | warn(QPDFExc(qpdf_e_damaged_pdf, input.getName(), object_description, offset, msg)); |
682 | 438k | } |
683 | 455k | } |
684 | | |
685 | | void |
686 | | QPDFParser::warn(std::string const& msg) const |
687 | 349k | { |
688 | 349k | warn(input.getLastOffset(), msg); |
689 | 349k | } |