/src/qpdf/libqpdf/JSON.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/JSON.hh> |
2 | | |
3 | | #include <qpdf/JSON_writer.hh> |
4 | | |
5 | | #include <qpdf/BufferInputSource.hh> |
6 | | #include <qpdf/Pl_Base64.hh> |
7 | | #include <qpdf/Pl_Concatenate.hh> |
8 | | #include <qpdf/Pl_String.hh> |
9 | | #include <qpdf/QTC.hh> |
10 | | #include <qpdf/QUtil.hh> |
11 | | #include <qpdf/Util.hh> |
12 | | |
13 | | #include <cstring> |
14 | | #include <stdexcept> |
15 | | |
16 | | using namespace qpdf; |
17 | | |
18 | | JSON::Members::Members(std::unique_ptr<JSON_value> value) : |
19 | 0 | value(std::move(value)) |
20 | 0 | { |
21 | 0 | } |
22 | | |
23 | | JSON::JSON(std::unique_ptr<JSON_value> value) : |
24 | 0 | m(new Members(std::move(value))) |
25 | 0 | { |
26 | 0 | } |
27 | | |
28 | | void |
29 | | JSON::writeClose(Pipeline* p, bool first, size_t depth, char const* delimiter) |
30 | 0 | { |
31 | 0 | if (first) { |
32 | 0 | *p << delimiter; |
33 | 0 | } else { |
34 | 0 | std::string s{"\n"}; |
35 | 0 | s.append(2 * depth, ' '); |
36 | 0 | *p << s + delimiter; |
37 | 0 | } |
38 | 0 | } |
39 | | |
40 | | void |
41 | | JSON::writeNext(Pipeline* p, bool& first, size_t depth) |
42 | 0 | { |
43 | 0 | if (first) { |
44 | 0 | first = false; |
45 | 0 | std::string s{"\n"}; |
46 | 0 | s.append(2 * depth, ' '); |
47 | 0 | *p << s; |
48 | 0 | } else { |
49 | 0 | std::string s{",\n"}; |
50 | 0 | s.append(2 * depth, ' '); |
51 | 0 | *p << s; |
52 | 0 | } |
53 | 0 | } |
54 | | |
55 | | void |
56 | | JSON::writeDictionaryOpen(Pipeline* p, bool& first, size_t depth) |
57 | 0 | { |
58 | 0 | *p << "{"; |
59 | 0 | first = true; |
60 | 0 | } |
61 | | |
62 | | void |
63 | | JSON::writeArrayOpen(Pipeline* p, bool& first, size_t depth) |
64 | 0 | { |
65 | 0 | *p << "["; |
66 | 0 | first = true; |
67 | 0 | } |
68 | | |
69 | | void |
70 | | JSON::writeDictionaryClose(Pipeline* p, bool first, size_t depth) |
71 | 0 | { |
72 | 0 | writeClose(p, first, depth, "}"); |
73 | 0 | } |
74 | | |
75 | | void |
76 | | JSON::writeArrayClose(Pipeline* p, bool first, size_t depth) |
77 | 0 | { |
78 | 0 | writeClose(p, first, depth, "]"); |
79 | 0 | } |
80 | | |
81 | | void |
82 | | JSON::writeDictionaryKey(Pipeline* p, bool& first, std::string const& key, size_t depth) |
83 | 0 | { |
84 | 0 | writeNext(p, first, depth); |
85 | 0 | *p << std::string("\"") + key + "\": "; |
86 | 0 | } |
87 | | |
88 | | void |
89 | | JSON::writeDictionaryItem( |
90 | | Pipeline* p, bool& first, std::string const& key, JSON const& value, size_t depth) |
91 | 0 | { |
92 | 0 | writeDictionaryKey(p, first, key, depth); |
93 | 0 | value.write(p, depth); |
94 | 0 | } |
95 | | |
96 | | void |
97 | | JSON::writeArrayItem(Pipeline* p, bool& first, JSON const& element, size_t depth) |
98 | 0 | { |
99 | 0 | writeNext(p, first, depth); |
100 | 0 | element.write(p, depth); |
101 | 0 | } |
102 | | |
103 | | void |
104 | | JSON::JSON_dictionary::write(Pipeline* p, size_t depth) const |
105 | 0 | { |
106 | 0 | bool first = true; |
107 | 0 | writeDictionaryOpen(p, first, depth); |
108 | 0 | for (auto const& iter: members) { |
109 | 0 | writeDictionaryItem(p, first, iter.first, iter.second, 1 + depth); |
110 | 0 | } |
111 | 0 | writeDictionaryClose(p, first, depth); |
112 | 0 | } |
113 | | |
114 | | void |
115 | | JSON::JSON_array::write(Pipeline* p, size_t depth) const |
116 | 0 | { |
117 | 0 | bool first = true; |
118 | 0 | writeArrayOpen(p, first, depth); |
119 | 0 | for (auto const& element: elements) { |
120 | 0 | writeArrayItem(p, first, element, 1 + depth); |
121 | 0 | } |
122 | 0 | writeArrayClose(p, first, depth); |
123 | 0 | } |
124 | | |
125 | | JSON::JSON_string::JSON_string(std::string const& utf8) : |
126 | 0 | JSON_value(vt_string), |
127 | 0 | utf8(utf8) |
128 | 0 | { |
129 | 0 | } |
130 | | |
131 | | void |
132 | | JSON::JSON_string::write(Pipeline* p, size_t) const |
133 | 0 | { |
134 | 0 | *p << std::string("\"") + Writer::encode_string(utf8) + "\""; |
135 | 0 | } |
136 | | |
137 | | JSON::JSON_number::JSON_number(long long value) : |
138 | 0 | JSON_value(vt_number), |
139 | 0 | encoded(std::to_string(value)) |
140 | 0 | { |
141 | 0 | } |
142 | | |
143 | | JSON::JSON_number::JSON_number(double value) : |
144 | 0 | JSON_value(vt_number), |
145 | 0 | encoded(QUtil::double_to_string(value, 6)) |
146 | 0 | { |
147 | 0 | } |
148 | | |
149 | | JSON::JSON_number::JSON_number(std::string const& value) : |
150 | 0 | JSON_value(vt_number), |
151 | 0 | encoded(value) |
152 | 0 | { |
153 | 0 | } |
154 | | |
155 | | void |
156 | | JSON::JSON_number::write(Pipeline* p, size_t) const |
157 | 0 | { |
158 | 0 | *p << encoded; |
159 | 0 | } |
160 | | |
161 | | JSON::JSON_bool::JSON_bool(bool val) : |
162 | 0 | JSON_value(vt_bool), |
163 | 0 | value(val) |
164 | 0 | { |
165 | 0 | } |
166 | | |
167 | | void |
168 | | JSON::JSON_bool::write(Pipeline* p, size_t) const |
169 | 0 | { |
170 | 0 | *p << (value ? "true" : "false"); |
171 | 0 | } |
172 | | |
173 | | void |
174 | | JSON::JSON_null::write(Pipeline* p, size_t) const |
175 | 0 | { |
176 | 0 | *p << "null"; |
177 | 0 | } |
178 | | |
179 | | JSON::JSON_blob::JSON_blob(std::function<void(Pipeline*)> fn) : |
180 | 0 | JSON_value(vt_blob), |
181 | 0 | fn(fn) |
182 | 0 | { |
183 | 0 | } |
184 | | |
185 | | void |
186 | | JSON::JSON_blob::write(Pipeline* p, size_t) const |
187 | 0 | { |
188 | 0 | *p << "\""; |
189 | 0 | Pl_Concatenate cat("blob concatenate", p); |
190 | 0 | Pl_Base64 base64("blob base64", &cat, Pl_Base64::a_encode); |
191 | 0 | fn(&base64); |
192 | 0 | base64.finish(); |
193 | 0 | *p << "\""; |
194 | 0 | } |
195 | | |
196 | | void |
197 | | JSON::write(Pipeline* p, size_t depth) const |
198 | 0 | { |
199 | 0 | if (!m) { |
200 | 0 | *p << "null"; |
201 | 0 | } else { |
202 | 0 | m->value->write(p, depth); |
203 | 0 | } |
204 | 0 | } |
205 | | |
206 | | std::string |
207 | | JSON::unparse() const |
208 | 0 | { |
209 | 0 | if (!m) { |
210 | 0 | return "null"; |
211 | 0 | } |
212 | 0 | std::string s; |
213 | 0 | Pl_String p("unparse", nullptr, s); |
214 | 0 | write(&p, 0); |
215 | 0 | return s; |
216 | 0 | } |
217 | | |
218 | | std::string |
219 | | JSON::Writer::encode_string(std::string const& str) |
220 | 0 | { |
221 | 0 | static auto constexpr hexchars = "0123456789abcdef"; |
222 | |
|
223 | 0 | auto begin = str.cbegin(); |
224 | 0 | auto end = str.cend(); |
225 | 0 | auto iter = begin; |
226 | 0 | while (iter != end) { |
227 | 0 | auto c = static_cast<unsigned char>(*iter); |
228 | 0 | if ((c > 34 && c != '\\') || c == ' ' || c == 33) { |
229 | | // Optimistically check that no char in str requires escaping. Hopefully we can just |
230 | | // return the input str. |
231 | 0 | ++iter; |
232 | 0 | } else { |
233 | | // We found a char that requires escaping. Initialize result to the chars scanned so |
234 | | // far, append/replace the rest of str one char at a time, and return the result. |
235 | 0 | std::string result{begin, iter}; |
236 | |
|
237 | 0 | for (; iter != end; ++iter) { |
238 | 0 | auto ch = static_cast<unsigned char>(*iter); |
239 | 0 | if ((ch > 34 && ch != '\\') || ch == ' ' || ch == 33) { |
240 | | // Check for most common case first. |
241 | 0 | result += *iter; |
242 | 0 | } else { |
243 | 0 | switch (ch) { |
244 | 0 | case '\\': |
245 | 0 | result += "\\\\"; |
246 | 0 | break; |
247 | 0 | case '\"': |
248 | 0 | result += "\\\""; |
249 | 0 | break; |
250 | 0 | case '\b': |
251 | 0 | result += "\\b"; |
252 | 0 | break; |
253 | 0 | case '\f': |
254 | 0 | result += "\\f"; |
255 | 0 | break; |
256 | 0 | case '\n': |
257 | 0 | result += "\\n"; |
258 | 0 | break; |
259 | 0 | case '\r': |
260 | 0 | result += "\\r"; |
261 | 0 | break; |
262 | 0 | case '\t': |
263 | 0 | result += "\\t"; |
264 | 0 | break; |
265 | 0 | default: |
266 | 0 | result += ch < 16 ? "\\u000" : "\\u001"; |
267 | 0 | result += hexchars[ch % 16]; |
268 | 0 | } |
269 | 0 | } |
270 | 0 | } |
271 | 0 | return result; |
272 | 0 | } |
273 | 0 | } |
274 | 0 | return str; |
275 | 0 | } |
276 | | |
277 | | JSON |
278 | | JSON::makeDictionary() |
279 | 0 | { |
280 | 0 | return {std::make_unique<JSON_dictionary>()}; |
281 | 0 | } |
282 | | |
283 | | JSON |
284 | | JSON::addDictionaryMember(std::string const& key, JSON const& val) |
285 | 0 | { |
286 | 0 | if (auto* obj = m ? dynamic_cast<JSON_dictionary*>(m->value.get()) : nullptr) { |
287 | 0 | return obj->members[Writer::encode_string(key)] = val.m ? val : makeNull(); |
288 | 0 | } else { |
289 | 0 | throw std::runtime_error("JSON::addDictionaryMember called on non-dictionary"); |
290 | 0 | } |
291 | 0 | } |
292 | | |
293 | | JSON |
294 | | JSON::makeArray() |
295 | 0 | { |
296 | 0 | return {std::make_unique<JSON_array>()}; |
297 | 0 | } |
298 | | |
299 | | JSON |
300 | | JSON::addArrayElement(JSON const& val) |
301 | 0 | { |
302 | 0 | if (auto* arr = m ? dynamic_cast<JSON_array*>(m->value.get()) : nullptr) { |
303 | 0 | if (val.m) { |
304 | 0 | arr->elements.push_back(val); |
305 | 0 | } else { |
306 | 0 | arr->elements.push_back(makeNull()); |
307 | 0 | } |
308 | 0 | return arr->elements.back(); |
309 | 0 | } |
310 | 0 | throw std::runtime_error("JSON::addArrayElement called on non-array"); |
311 | 0 | return {}; // unreachable |
312 | 0 | } |
313 | | |
314 | | JSON |
315 | | JSON::makeString(std::string const& utf8) |
316 | 0 | { |
317 | 0 | return {std::make_unique<JSON_string>(utf8)}; |
318 | 0 | } |
319 | | |
320 | | JSON |
321 | | JSON::makeInt(long long int value) |
322 | 0 | { |
323 | 0 | return {std::make_unique<JSON_number>(value)}; |
324 | 0 | } |
325 | | |
326 | | JSON |
327 | | JSON::makeReal(double value) |
328 | 0 | { |
329 | 0 | return {std::make_unique<JSON_number>(value)}; |
330 | 0 | } |
331 | | |
332 | | JSON |
333 | | JSON::makeNumber(std::string const& encoded) |
334 | 0 | { |
335 | 0 | return {std::make_unique<JSON_number>(encoded)}; |
336 | 0 | } |
337 | | |
338 | | JSON |
339 | | JSON::makeBool(bool value) |
340 | 0 | { |
341 | 0 | return {std::make_unique<JSON_bool>(value)}; |
342 | 0 | } |
343 | | |
344 | | JSON |
345 | | JSON::makeNull() |
346 | 0 | { |
347 | 0 | return {std::make_unique<JSON_null>()}; |
348 | 0 | } |
349 | | |
350 | | JSON |
351 | | JSON::makeBlob(std::function<void(Pipeline*)> fn) |
352 | 0 | { |
353 | 0 | return {std::make_unique<JSON_blob>(fn)}; |
354 | 0 | } |
355 | | |
356 | | bool |
357 | | JSON::isArray() const |
358 | 0 | { |
359 | 0 | return m ? m->value->type_code == vt_array : false; |
360 | 0 | } |
361 | | |
362 | | bool |
363 | | JSON::isDictionary() const |
364 | 0 | { |
365 | 0 | return m && m->value->type_code == vt_dictionary; |
366 | 0 | } |
367 | | |
368 | | bool |
369 | | JSON::getString(std::string& utf8) const |
370 | 0 | { |
371 | 0 | if (m && m->value->type_code == vt_string) { |
372 | 0 | auto v = dynamic_cast<JSON_string const*>(m->value.get()); |
373 | 0 | utf8 = v->utf8; |
374 | 0 | return true; |
375 | 0 | } |
376 | 0 | return false; |
377 | 0 | } |
378 | | |
379 | | bool |
380 | | JSON::getNumber(std::string& value) const |
381 | 0 | { |
382 | 0 | if (m && m->value->type_code == vt_number) { |
383 | 0 | auto v = dynamic_cast<JSON_number const*>(m->value.get()); |
384 | 0 | value = v->encoded; |
385 | 0 | return true; |
386 | 0 | } |
387 | 0 | return false; |
388 | 0 | } |
389 | | |
390 | | bool |
391 | | JSON::getBool(bool& value) const |
392 | 0 | { |
393 | 0 | if (m && m->value->type_code == vt_bool) { |
394 | 0 | auto v = dynamic_cast<JSON_bool const*>(m->value.get()); |
395 | 0 | value = v->value; |
396 | 0 | return true; |
397 | 0 | } |
398 | 0 | return false; |
399 | 0 | } |
400 | | |
401 | | bool |
402 | | JSON::isNull() const |
403 | 0 | { |
404 | 0 | return m && m->value->type_code == vt_null; |
405 | 0 | } |
406 | | |
407 | | JSON |
408 | | JSON::getDictItem(std::string const& key) const |
409 | 0 | { |
410 | 0 | if (auto v = m ? dynamic_cast<JSON_dictionary const*>(m->value.get()) : nullptr) { |
411 | 0 | if (auto it = v->members.find(key); it != v->members.end()) { |
412 | 0 | return it->second; |
413 | 0 | } |
414 | 0 | } |
415 | 0 | return makeNull(); |
416 | 0 | } |
417 | | |
418 | | bool |
419 | | JSON::forEachDictItem(std::function<void(std::string const& key, JSON value)> fn) const |
420 | 0 | { |
421 | 0 | if (auto v = m ? dynamic_cast<JSON_dictionary const*>(m->value.get()) : nullptr) { |
422 | 0 | for (auto const& [key, value]: v->members) { |
423 | 0 | fn(key, value); |
424 | 0 | } |
425 | 0 | return true; |
426 | 0 | } |
427 | 0 | return false; |
428 | 0 | } |
429 | | |
430 | | bool |
431 | | JSON::forEachArrayItem(std::function<void(JSON value)> fn) const |
432 | 0 | { |
433 | 0 | if (auto v = m ? dynamic_cast<JSON_array const*>(m->value.get()) : nullptr) { |
434 | 0 | for (auto const& i: v->elements) { |
435 | 0 | fn(JSON(i)); |
436 | 0 | } |
437 | 0 | return true; |
438 | 0 | } |
439 | 0 | return false; |
440 | 0 | } |
441 | | |
442 | | bool |
443 | | JSON::checkSchema(JSON schema, std::list<std::string>& errors) |
444 | 0 | { |
445 | 0 | return m && checkSchemaInternal(m->value.get(), schema.m->value.get(), 0, errors, ""); |
446 | 0 | } |
447 | | |
448 | | bool |
449 | | JSON::checkSchema(JSON schema, unsigned long flags, std::list<std::string>& errors) |
450 | 0 | { |
451 | 0 | return m && checkSchemaInternal(m->value.get(), schema.m->value.get(), flags, errors, ""); |
452 | 0 | } |
453 | | |
454 | | bool |
455 | | JSON::checkSchemaInternal( |
456 | | JSON_value* this_v, |
457 | | JSON_value* sch_v, |
458 | | unsigned long flags, |
459 | | std::list<std::string>& errors, |
460 | | std::string prefix) |
461 | 0 | { |
462 | 0 | auto* this_arr = dynamic_cast<JSON_array*>(this_v); |
463 | 0 | auto* this_dict = dynamic_cast<JSON_dictionary*>(this_v); |
464 | |
|
465 | 0 | auto* sch_arr = dynamic_cast<JSON_array*>(sch_v); |
466 | 0 | auto* sch_dict = dynamic_cast<JSON_dictionary*>(sch_v); |
467 | |
|
468 | 0 | auto* sch_str = dynamic_cast<JSON_string*>(sch_v); |
469 | |
|
470 | 0 | std::string err_prefix; |
471 | 0 | if (prefix.empty()) { |
472 | 0 | err_prefix = "top-level object"; |
473 | 0 | } else { |
474 | 0 | err_prefix = "json key \"" + prefix + "\""; |
475 | 0 | } |
476 | |
|
477 | 0 | std::string pattern_key; |
478 | 0 | if (sch_dict) { |
479 | 0 | if (!this_dict) { |
480 | 0 | QTC::TC("libtests", "JSON wanted dictionary"); |
481 | 0 | errors.push_back(err_prefix + " is supposed to be a dictionary"); |
482 | 0 | return false; |
483 | 0 | } |
484 | 0 | auto members = sch_dict->members; |
485 | 0 | std::string key; |
486 | 0 | if ((members.size() == 1) && |
487 | 0 | ((key = members.begin()->first, key.length() > 2) && (key.at(0) == '<') && |
488 | 0 | (key.at(key.length() - 1) == '>'))) { |
489 | 0 | pattern_key = key; |
490 | 0 | } |
491 | 0 | } |
492 | | |
493 | 0 | if (sch_dict && !pattern_key.empty()) { |
494 | 0 | auto pattern_schema = sch_dict->members[pattern_key].m->value.get(); |
495 | 0 | for (auto const& [key, val]: this_dict->members) { |
496 | 0 | checkSchemaInternal( |
497 | 0 | val.m->value.get(), pattern_schema, flags, errors, prefix + "." + key); |
498 | 0 | } |
499 | 0 | } else if (sch_dict) { |
500 | 0 | for (auto& [key, val]: sch_dict->members) { |
501 | 0 | if (this_dict->members.contains(key)) { |
502 | 0 | checkSchemaInternal( |
503 | 0 | this_dict->members[key].m->value.get(), |
504 | 0 | val.m->value.get(), |
505 | 0 | flags, |
506 | 0 | errors, |
507 | 0 | prefix + "." + key); |
508 | 0 | } else { |
509 | 0 | if (flags & f_optional) { |
510 | 0 | QTC::TC("libtests", "JSON optional key"); |
511 | 0 | } else { |
512 | 0 | QTC::TC("libtests", "JSON key missing in object"); |
513 | 0 | errors.emplace_back( |
514 | 0 | err_prefix + ": key \"" + key + |
515 | 0 | "\" is present in schema but missing in object"); |
516 | 0 | } |
517 | 0 | } |
518 | 0 | } |
519 | 0 | for (auto const& item: this_dict->members) { |
520 | 0 | if (!sch_dict->members.contains(item.first)) { |
521 | 0 | QTC::TC("libtests", "JSON key extra in object"); |
522 | 0 | errors.emplace_back( |
523 | 0 | err_prefix + ": key \"" + item.first + |
524 | 0 | "\" is not present in schema but appears in object"); |
525 | 0 | } |
526 | 0 | } |
527 | 0 | } else if (sch_arr) { |
528 | 0 | auto n_elements = sch_arr->elements.size(); |
529 | 0 | if (n_elements == 1) { |
530 | | // A single-element array in the schema allows a single element in the object or a |
531 | | // variable-length array, each of whose items must conform to the single element of the |
532 | | // schema array. This doesn't apply to arrays of arrays -- we fall back to the behavior |
533 | | // of allowing a single item only when the object is not an array. |
534 | 0 | if (this_arr) { |
535 | 0 | int i = 0; |
536 | 0 | for (auto const& element: this_arr->elements) { |
537 | 0 | checkSchemaInternal( |
538 | 0 | element.m->value.get(), |
539 | 0 | sch_arr->elements.at(0).m->value.get(), |
540 | 0 | flags, |
541 | 0 | errors, |
542 | 0 | prefix + "." + std::to_string(i)); |
543 | 0 | ++i; |
544 | 0 | } |
545 | 0 | } else { |
546 | 0 | QTC::TC("libtests", "JSON schema array for single item"); |
547 | 0 | checkSchemaInternal( |
548 | 0 | this_v, sch_arr->elements.at(0).m->value.get(), flags, errors, prefix); |
549 | 0 | } |
550 | 0 | } else if (!this_arr || this_arr->elements.size() != n_elements) { |
551 | 0 | QTC::TC("libtests", "JSON schema array length mismatch"); |
552 | 0 | errors.emplace_back( |
553 | 0 | err_prefix + " is supposed to be an array of length " + std::to_string(n_elements)); |
554 | 0 | return false; |
555 | 0 | } else { |
556 | | // A multi-element array in the schema must correspond to an element of the same length |
557 | | // in the object. Each element in the object is validated against the corresponding |
558 | | // element in the schema. |
559 | 0 | size_t i = 0; |
560 | 0 | for (auto const& element: this_arr->elements) { |
561 | 0 | checkSchemaInternal( |
562 | 0 | element.m->value.get(), |
563 | 0 | sch_arr->elements.at(i).m->value.get(), |
564 | 0 | flags, |
565 | 0 | errors, |
566 | 0 | prefix + "." + std::to_string(i)); |
567 | 0 | ++i; |
568 | 0 | } |
569 | 0 | } |
570 | 0 | } else if (!sch_str) { |
571 | 0 | QTC::TC("libtests", "JSON schema other type"); |
572 | 0 | errors.emplace_back(err_prefix + " schema value is not dictionary, array, or string"); |
573 | 0 | return false; |
574 | 0 | } |
575 | | |
576 | 0 | return errors.empty(); |
577 | 0 | } |
578 | | |
579 | | namespace |
580 | | { |
581 | | class JSONParser |
582 | | { |
583 | | public: |
584 | | JSONParser(InputSource& is, JSON::Reactor* reactor) : |
585 | 0 | is(is), |
586 | 0 | reactor(reactor), |
587 | 0 | p(buf) |
588 | 0 | { |
589 | 0 | } |
590 | | |
591 | | JSON parse(); |
592 | | |
593 | | private: |
594 | | enum parser_state_e { |
595 | | ps_top, |
596 | | ps_dict_begin, |
597 | | ps_dict_after_key, |
598 | | ps_dict_after_colon, |
599 | | ps_dict_after_item, |
600 | | ps_dict_after_comma, |
601 | | ps_array_begin, |
602 | | ps_array_after_item, |
603 | | ps_array_after_comma, |
604 | | ps_done, |
605 | | }; |
606 | | |
607 | | enum lex_state_e { |
608 | | ls_top, |
609 | | ls_number, |
610 | | ls_number_minus, |
611 | | ls_number_leading_zero, |
612 | | ls_number_before_point, |
613 | | ls_number_point, |
614 | | ls_number_after_point, |
615 | | ls_number_e, |
616 | | ls_number_e_sign, |
617 | | ls_alpha, |
618 | | ls_string, |
619 | | ls_after_string, |
620 | | ls_backslash, |
621 | | ls_u4, |
622 | | ls_begin_array, |
623 | | ls_end_array, |
624 | | ls_begin_dict, |
625 | | ls_end_dict, |
626 | | ls_colon, |
627 | | ls_comma, |
628 | | }; |
629 | | |
630 | | struct StackFrame |
631 | | { |
632 | | StackFrame(parser_state_e state, JSON& item) : |
633 | 0 | state(state), |
634 | 0 | item(item) |
635 | 0 | { |
636 | 0 | } |
637 | | |
638 | | parser_state_e state; |
639 | | JSON item; |
640 | | }; |
641 | | |
642 | | void getToken(); |
643 | | void handleToken(); |
644 | | void tokenError(); |
645 | | static void handle_u_code( |
646 | | unsigned long codepoint, |
647 | | qpdf_offset_t offset, |
648 | | unsigned long& high_surrogate, |
649 | | qpdf_offset_t& high_offset, |
650 | | std::string& result); |
651 | | inline void append(); |
652 | | inline void append(lex_state_e); |
653 | | inline void ignore(); |
654 | | inline void ignore(lex_state_e); |
655 | | |
656 | | InputSource& is; |
657 | | JSON::Reactor* reactor; |
658 | | lex_state_e lex_state{ls_top}; |
659 | | char buf[16384]; |
660 | | size_t bytes{0}; |
661 | | char const* p; |
662 | | qpdf_offset_t u_count{0}; |
663 | | unsigned long u_value{0}; |
664 | | qpdf_offset_t offset{0}; |
665 | | bool done{false}; |
666 | | std::string token; |
667 | | qpdf_offset_t token_start{0}; |
668 | | parser_state_e parser_state{ps_top}; |
669 | | std::vector<StackFrame> stack; |
670 | | std::string dict_key; |
671 | | qpdf_offset_t dict_key_offset{0}; |
672 | | }; |
673 | | } // namespace |
674 | | |
675 | | void |
676 | | JSONParser::handle_u_code( |
677 | | unsigned long codepoint, |
678 | | qpdf_offset_t offset, |
679 | | unsigned long& high_surrogate, |
680 | | qpdf_offset_t& high_offset, |
681 | | std::string& result) |
682 | 0 | { |
683 | 0 | if ((codepoint & 0xFC00) == 0xD800) { |
684 | | // high surrogate |
685 | 0 | qpdf_offset_t new_high_offset = offset; |
686 | 0 | if (high_offset) { |
687 | 0 | QTC::TC("libtests", "JSON 16 high high"); |
688 | 0 | throw std::runtime_error( |
689 | 0 | "JSON: offset " + std::to_string(new_high_offset) + |
690 | 0 | ": UTF-16 high surrogate found after previous high surrogate at offset " + |
691 | 0 | std::to_string(high_offset)); |
692 | 0 | } |
693 | 0 | high_offset = new_high_offset; |
694 | 0 | high_surrogate = codepoint; |
695 | 0 | } else if ((codepoint & 0xFC00) == 0xDC00) { |
696 | | // low surrogate |
697 | 0 | if (offset != (high_offset + 6)) { |
698 | 0 | QTC::TC("libtests", "JSON 16 low not after high"); |
699 | 0 | throw std::runtime_error( |
700 | 0 | "JSON: offset " + std::to_string(offset) + |
701 | 0 | ": UTF-16 low surrogate found not immediately after high surrogate"); |
702 | 0 | } |
703 | 0 | high_offset = 0; |
704 | 0 | codepoint = 0x10000U + ((high_surrogate & 0x3FFU) << 10U) + (codepoint & 0x3FF); |
705 | 0 | result += QUtil::toUTF8(codepoint); |
706 | 0 | } else { |
707 | 0 | result += QUtil::toUTF8(codepoint); |
708 | 0 | } |
709 | 0 | } |
710 | | |
711 | | void |
712 | | JSONParser::tokenError() |
713 | 0 | { |
714 | 0 | if (done) { |
715 | 0 | QTC::TC("libtests", "JSON parse ls premature end of input"); |
716 | 0 | throw std::runtime_error("JSON: premature end of input"); |
717 | 0 | } |
718 | | |
719 | 0 | if (lex_state == ls_u4) { |
720 | 0 | QTC::TC("libtests", "JSON parse bad hex after u"); |
721 | 0 | throw std::runtime_error( |
722 | 0 | "JSON: offset " + std::to_string(offset - u_count - 1) + |
723 | 0 | ": \\u must be followed by four hex digits"); |
724 | 0 | } else if (lex_state == ls_alpha) { |
725 | 0 | QTC::TC("libtests", "JSON parse keyword bad character"); |
726 | 0 | throw std::runtime_error( |
727 | 0 | "JSON: offset " + std::to_string(offset) + ": keyword: unexpected character " + |
728 | 0 | std::string(p, 1)); |
729 | 0 | } else if (lex_state == ls_string) { |
730 | 0 | QTC::TC("libtests", "JSON parse control char in string"); |
731 | 0 | throw std::runtime_error( |
732 | 0 | "JSON: offset " + std::to_string(offset) + |
733 | 0 | ": control character in string (missing \"?)"); |
734 | 0 | } else if (lex_state == ls_backslash) { |
735 | 0 | QTC::TC("libtests", "JSON parse backslash bad character"); |
736 | 0 | throw std::runtime_error( |
737 | 0 | "JSON: offset " + std::to_string(offset) + |
738 | 0 | ": invalid character after backslash: " + std::string(p, 1)); |
739 | 0 | } |
740 | | |
741 | 0 | if (*p == '.') { |
742 | 0 | if (lex_state == ls_number || lex_state == ls_number_e || lex_state == ls_number_e_sign) { |
743 | 0 | QTC::TC("libtests", "JSON parse point after e"); |
744 | 0 | throw std::runtime_error( |
745 | 0 | "JSON: offset " + std::to_string(offset) + |
746 | 0 | ": numeric literal: decimal point after e"); |
747 | 0 | } else { |
748 | 0 | QTC::TC("libtests", "JSON parse duplicate point"); |
749 | 0 | throw std::runtime_error( |
750 | 0 | "JSON: offset " + std::to_string(offset) + |
751 | 0 | ": numeric literal: decimal point already seen"); |
752 | 0 | } |
753 | 0 | } else if (*p == 'e' || *p == 'E') { |
754 | 0 | QTC::TC("libtests", "JSON parse duplicate e"); |
755 | 0 | throw std::runtime_error( |
756 | 0 | "JSON: offset " + std::to_string(offset) + ": numeric literal: e already seen"); |
757 | 0 | } else if ((*p == '+') || (*p == '-')) { |
758 | 0 | QTC::TC("libtests", "JSON parse unexpected sign"); |
759 | 0 | throw std::runtime_error( |
760 | 0 | "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected sign"); |
761 | 0 | } else if (util::is_space(*p) || strchr("{}[]:,", *p)) { |
762 | 0 | QTC::TC("libtests", "JSON parse incomplete number"); |
763 | 0 | throw std::runtime_error( |
764 | 0 | "JSON: offset " + std::to_string(offset) + ": numeric literal: incomplete number"); |
765 | |
|
766 | 0 | } else { |
767 | 0 | QTC::TC("libtests", "JSON parse numeric bad character"); |
768 | 0 | throw std::runtime_error( |
769 | 0 | "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected character " + |
770 | 0 | std::string(p, 1)); |
771 | 0 | } |
772 | 0 | throw std::logic_error("JSON::tokenError : unhandled error"); |
773 | 0 | } |
774 | | |
775 | | // Append current character to token and advance to next input character. |
776 | | inline void |
777 | | JSONParser::append() |
778 | 0 | { |
779 | 0 | token += *p; |
780 | 0 | ++p; |
781 | 0 | ++offset; |
782 | 0 | } |
783 | | |
784 | | // Append current character to token, advance to next input character and transition to 'next' lexer |
785 | | // state. |
786 | | inline void |
787 | | JSONParser::append(lex_state_e next) |
788 | 0 | { |
789 | 0 | lex_state = next; |
790 | 0 | token += *p; |
791 | 0 | ++p; |
792 | 0 | ++offset; |
793 | 0 | } |
794 | | |
795 | | // Advance to next input character without appending the current character to token. |
796 | | inline void |
797 | | JSONParser::ignore() |
798 | 0 | { |
799 | 0 | ++p; |
800 | 0 | ++offset; |
801 | 0 | } |
802 | | |
803 | | // Advance to next input character without appending the current character to token and transition |
804 | | // to 'next' lexer state. |
805 | | inline void |
806 | | JSONParser::ignore(lex_state_e next) |
807 | 0 | { |
808 | 0 | lex_state = next; |
809 | 0 | ++p; |
810 | 0 | ++offset; |
811 | 0 | } |
812 | | |
813 | | void |
814 | | JSONParser::getToken() |
815 | 0 | { |
816 | 0 | token.clear(); |
817 | | |
818 | | // Keep track of UTF-16 surrogate pairs. |
819 | 0 | unsigned long high_surrogate = 0; |
820 | 0 | qpdf_offset_t high_offset = 0; |
821 | |
|
822 | 0 | while (true) { |
823 | 0 | if (p == (buf + bytes)) { |
824 | 0 | p = buf; |
825 | 0 | bytes = is.read(buf, sizeof(buf)); |
826 | 0 | if (bytes == 0) { |
827 | 0 | done = true; |
828 | 0 | break; |
829 | 0 | } |
830 | 0 | } |
831 | | |
832 | 0 | if ((*p < 32 && *p >= 0)) { |
833 | 0 | if (*p == '\t' || *p == '\n' || *p == '\r') { |
834 | | // Legal white space not permitted in strings. This will always end the current |
835 | | // token (unless we are still before the start of the token). |
836 | 0 | if (lex_state == ls_top) { |
837 | 0 | ignore(); |
838 | 0 | } else { |
839 | 0 | break; |
840 | 0 | } |
841 | |
|
842 | 0 | } else { |
843 | 0 | QTC::TC("libtests", "JSON parse null character"); |
844 | 0 | throw std::runtime_error( |
845 | 0 | "JSON: control or null character at offset " + std::to_string(offset)); |
846 | 0 | } |
847 | 0 | } else if (*p == ',') { |
848 | 0 | if (lex_state == ls_top) { |
849 | 0 | ignore(ls_comma); |
850 | 0 | return; |
851 | 0 | } else if (lex_state == ls_string) { |
852 | 0 | append(); |
853 | 0 | } else { |
854 | 0 | break; |
855 | 0 | } |
856 | 0 | } else if (*p == ':') { |
857 | 0 | if (lex_state == ls_top) { |
858 | 0 | ignore(ls_colon); |
859 | 0 | return; |
860 | 0 | } else if (lex_state == ls_string) { |
861 | 0 | append(); |
862 | 0 | } else { |
863 | 0 | break; |
864 | 0 | } |
865 | 0 | } else if (*p == ' ') { |
866 | 0 | if (lex_state == ls_top) { |
867 | 0 | ignore(); |
868 | 0 | } else if (lex_state == ls_string) { |
869 | 0 | append(); |
870 | 0 | } else { |
871 | 0 | break; |
872 | 0 | } |
873 | 0 | } else if (*p == '{') { |
874 | 0 | if (lex_state == ls_top) { |
875 | 0 | token_start = offset; |
876 | 0 | ignore(ls_begin_dict); |
877 | 0 | return; |
878 | 0 | } else if (lex_state == ls_string) { |
879 | 0 | append(); |
880 | 0 | } else { |
881 | 0 | break; |
882 | 0 | } |
883 | 0 | } else if (*p == '}') { |
884 | 0 | if (lex_state == ls_top) { |
885 | 0 | ignore(ls_end_dict); |
886 | 0 | return; |
887 | 0 | } else if (lex_state == ls_string) { |
888 | 0 | append(); |
889 | 0 | } else { |
890 | 0 | break; |
891 | 0 | } |
892 | 0 | } else if (*p == '[') { |
893 | 0 | if (lex_state == ls_top) { |
894 | 0 | token_start = offset; |
895 | 0 | ignore(ls_begin_array); |
896 | 0 | return; |
897 | 0 | } else if (lex_state == ls_string) { |
898 | 0 | append(); |
899 | 0 | } else { |
900 | 0 | break; |
901 | 0 | } |
902 | 0 | } else if (*p == ']') { |
903 | 0 | if (lex_state == ls_top) { |
904 | 0 | ignore(ls_end_array); |
905 | 0 | return; |
906 | 0 | } else if (lex_state == ls_string) { |
907 | 0 | append(); |
908 | 0 | } else { |
909 | 0 | break; |
910 | 0 | } |
911 | 0 | } else { |
912 | 0 | switch (lex_state) { |
913 | 0 | case ls_top: |
914 | 0 | token_start = offset; |
915 | 0 | if (*p == '"') { |
916 | 0 | ignore(ls_string); |
917 | 0 | } else if ((*p >= 'a') && (*p <= 'z')) { |
918 | 0 | append(ls_alpha); |
919 | 0 | } else if (*p == '-') { |
920 | 0 | append(ls_number_minus); |
921 | 0 | } else if ((*p >= '1') && (*p <= '9')) { |
922 | 0 | append(ls_number_before_point); |
923 | 0 | } else if (*p == '0') { |
924 | 0 | append(ls_number_leading_zero); |
925 | 0 | } else { |
926 | 0 | QTC::TC("libtests", "JSON parse bad character"); |
927 | 0 | throw std::runtime_error( |
928 | 0 | "JSON: offset " + std::to_string(offset) + ": unexpected character " + |
929 | 0 | std::string(p, 1)); |
930 | 0 | } |
931 | 0 | break; |
932 | | |
933 | 0 | case ls_number_minus: |
934 | 0 | if ((*p >= '1') && (*p <= '9')) { |
935 | 0 | append(ls_number_before_point); |
936 | 0 | } else if (*p == '0') { |
937 | 0 | append(ls_number_leading_zero); |
938 | 0 | } else { |
939 | 0 | QTC::TC("libtests", "JSON parse number minus no digits"); |
940 | 0 | throw std::runtime_error( |
941 | 0 | "JSON: offset " + std::to_string(offset) + |
942 | 0 | ": numeric literal: no digit after minus sign"); |
943 | 0 | } |
944 | 0 | break; |
945 | | |
946 | 0 | case ls_number_leading_zero: |
947 | 0 | if (*p == '.') { |
948 | 0 | append(ls_number_point); |
949 | 0 | } else if (*p == 'e' || *p == 'E') { |
950 | 0 | append(ls_number_e); |
951 | 0 | } else { |
952 | 0 | QTC::TC("libtests", "JSON parse leading zero"); |
953 | 0 | throw std::runtime_error( |
954 | 0 | "JSON: offset " + std::to_string(offset) + ": number with leading zero"); |
955 | 0 | } |
956 | 0 | break; |
957 | | |
958 | 0 | case ls_number_before_point: |
959 | 0 | if ((*p >= '0') && (*p <= '9')) { |
960 | 0 | append(); |
961 | 0 | } else if (*p == '.') { |
962 | 0 | append(ls_number_point); |
963 | 0 | } else if (*p == 'e' || *p == 'E') { |
964 | 0 | append(ls_number_e); |
965 | 0 | } else { |
966 | 0 | tokenError(); |
967 | 0 | } |
968 | 0 | break; |
969 | | |
970 | 0 | case ls_number_point: |
971 | 0 | if ((*p >= '0') && (*p <= '9')) { |
972 | 0 | append(ls_number_after_point); |
973 | 0 | } else { |
974 | 0 | tokenError(); |
975 | 0 | } |
976 | 0 | break; |
977 | | |
978 | 0 | case ls_number_after_point: |
979 | 0 | if ((*p >= '0') && (*p <= '9')) { |
980 | 0 | append(); |
981 | 0 | } else if (*p == 'e' || *p == 'E') { |
982 | 0 | append(ls_number_e); |
983 | 0 | } else { |
984 | 0 | tokenError(); |
985 | 0 | } |
986 | 0 | break; |
987 | | |
988 | 0 | case ls_number_e: |
989 | 0 | if ((*p >= '0') && (*p <= '9')) { |
990 | 0 | append(ls_number); |
991 | 0 | } else if ((*p == '+') || (*p == '-')) { |
992 | 0 | append(ls_number_e_sign); |
993 | 0 | } else { |
994 | 0 | tokenError(); |
995 | 0 | } |
996 | 0 | break; |
997 | | |
998 | 0 | case ls_number_e_sign: |
999 | 0 | if ((*p >= '0') && (*p <= '9')) { |
1000 | 0 | append(ls_number); |
1001 | 0 | } else { |
1002 | 0 | tokenError(); |
1003 | 0 | } |
1004 | 0 | break; |
1005 | | |
1006 | 0 | case ls_number: |
1007 | | // We only get here after we have seen an exponent. |
1008 | 0 | if ((*p >= '0') && (*p <= '9')) { |
1009 | 0 | append(); |
1010 | 0 | } else { |
1011 | 0 | tokenError(); |
1012 | 0 | } |
1013 | 0 | break; |
1014 | | |
1015 | 0 | case ls_alpha: |
1016 | 0 | if ((*p >= 'a') && (*p <= 'z')) { |
1017 | 0 | append(); |
1018 | 0 | } else { |
1019 | 0 | tokenError(); |
1020 | 0 | } |
1021 | 0 | break; |
1022 | | |
1023 | 0 | case ls_string: |
1024 | 0 | if (*p == '"') { |
1025 | 0 | if (high_offset) { |
1026 | 0 | QTC::TC("libtests", "JSON 16 dangling high"); |
1027 | 0 | throw std::runtime_error( |
1028 | 0 | "JSON: offset " + std::to_string(high_offset) + |
1029 | 0 | ": UTF-16 high surrogate not followed by low surrogate"); |
1030 | 0 | } |
1031 | 0 | ignore(ls_after_string); |
1032 | 0 | return; |
1033 | 0 | } else if (*p == '\\') { |
1034 | 0 | ignore(ls_backslash); |
1035 | 0 | } else { |
1036 | 0 | append(); |
1037 | 0 | } |
1038 | 0 | break; |
1039 | | |
1040 | 0 | case ls_backslash: |
1041 | 0 | lex_state = ls_string; |
1042 | 0 | switch (*p) { |
1043 | 0 | case '\\': |
1044 | 0 | case '\"': |
1045 | 0 | case '/': |
1046 | | // \/ is allowed in json input, but so is /, so we don't map / to \/ in output. |
1047 | 0 | token += *p; |
1048 | 0 | break; |
1049 | 0 | case 'b': |
1050 | 0 | token += '\b'; |
1051 | 0 | break; |
1052 | 0 | case 'f': |
1053 | 0 | token += '\f'; |
1054 | 0 | break; |
1055 | 0 | case 'n': |
1056 | 0 | token += '\n'; |
1057 | 0 | break; |
1058 | 0 | case 'r': |
1059 | 0 | token += '\r'; |
1060 | 0 | break; |
1061 | 0 | case 't': |
1062 | 0 | token += '\t'; |
1063 | 0 | break; |
1064 | 0 | case 'u': |
1065 | 0 | lex_state = ls_u4; |
1066 | 0 | u_count = 0; |
1067 | 0 | u_value = 0; |
1068 | 0 | break; |
1069 | 0 | default: |
1070 | 0 | lex_state = ls_backslash; |
1071 | 0 | tokenError(); |
1072 | 0 | } |
1073 | 0 | ignore(); |
1074 | 0 | break; |
1075 | | |
1076 | 0 | case ls_u4: |
1077 | 0 | using ui = unsigned int; |
1078 | 0 | if (ui val = ui(util::hex_decode_char(*p)); val < 16) { |
1079 | 0 | u_value = 16 * u_value + val; |
1080 | 0 | } else { |
1081 | 0 | tokenError(); |
1082 | 0 | } |
1083 | 0 | if (++u_count == 4) { |
1084 | 0 | handle_u_code(u_value, offset - 5, high_surrogate, high_offset, token); |
1085 | 0 | lex_state = ls_string; |
1086 | 0 | } |
1087 | 0 | ignore(); |
1088 | 0 | break; |
1089 | | |
1090 | 0 | default: |
1091 | 0 | throw std::logic_error("JSONParser::getToken : trying to handle delimiter state"); |
1092 | 0 | } |
1093 | 0 | } |
1094 | 0 | } |
1095 | | |
1096 | | // We only get here if on end of input or if the last character was a control character or other |
1097 | | // delimiter. |
1098 | | |
1099 | 0 | if (!token.empty()) { |
1100 | 0 | switch (lex_state) { |
1101 | 0 | case ls_top: |
1102 | | // Can't happen |
1103 | 0 | throw std::logic_error("tok_start set in ls_top while parsing"); |
1104 | 0 | break; |
1105 | | |
1106 | 0 | case ls_number_leading_zero: |
1107 | 0 | case ls_number_before_point: |
1108 | 0 | case ls_number_after_point: |
1109 | 0 | lex_state = ls_number; |
1110 | 0 | break; |
1111 | | |
1112 | 0 | case ls_number: |
1113 | 0 | case ls_alpha: |
1114 | | // terminal state |
1115 | 0 | break; |
1116 | | |
1117 | 0 | default: |
1118 | 0 | tokenError(); |
1119 | 0 | } |
1120 | 0 | } |
1121 | 0 | } |
1122 | | |
1123 | | void |
1124 | | JSONParser::handleToken() |
1125 | 0 | { |
1126 | 0 | if (lex_state == ls_top) { |
1127 | 0 | return; |
1128 | 0 | } |
1129 | | |
1130 | 0 | if (parser_state == ps_done) { |
1131 | 0 | QTC::TC("libtests", "JSON parse junk after object"); |
1132 | 0 | throw std::runtime_error( |
1133 | 0 | "JSON: offset " + std::to_string(offset) + |
1134 | 0 | ": material follows end of object: " + token); |
1135 | 0 | } |
1136 | | |
1137 | 0 | const static JSON null_item = JSON::makeNull(); |
1138 | 0 | JSON item; |
1139 | 0 | auto tos = stack.empty() ? null_item : stack.back().item; |
1140 | 0 | auto ls = lex_state; |
1141 | 0 | lex_state = ls_top; |
1142 | |
|
1143 | 0 | switch (ls) { |
1144 | 0 | case ls_begin_dict: |
1145 | 0 | item = JSON::makeDictionary(); |
1146 | 0 | break; |
1147 | | |
1148 | 0 | case ls_begin_array: |
1149 | 0 | item = JSON::makeArray(); |
1150 | 0 | break; |
1151 | | |
1152 | 0 | case ls_colon: |
1153 | 0 | if (parser_state != ps_dict_after_key) { |
1154 | 0 | QTC::TC("libtests", "JSON parse unexpected :"); |
1155 | 0 | throw std::runtime_error( |
1156 | 0 | "JSON: offset " + std::to_string(offset) + ": unexpected colon"); |
1157 | 0 | } |
1158 | 0 | parser_state = ps_dict_after_colon; |
1159 | 0 | return; |
1160 | | |
1161 | 0 | case ls_comma: |
1162 | 0 | if (!((parser_state == ps_dict_after_item) || (parser_state == ps_array_after_item))) { |
1163 | 0 | QTC::TC("libtests", "JSON parse unexpected ,"); |
1164 | 0 | throw std::runtime_error( |
1165 | 0 | "JSON: offset " + std::to_string(offset) + ": unexpected comma"); |
1166 | 0 | } |
1167 | 0 | if (parser_state == ps_dict_after_item) { |
1168 | 0 | parser_state = ps_dict_after_comma; |
1169 | 0 | } else if (parser_state == ps_array_after_item) { |
1170 | 0 | parser_state = ps_array_after_comma; |
1171 | 0 | } else { |
1172 | 0 | throw std::logic_error("JSONParser::handleToken: unexpected parser state for comma"); |
1173 | 0 | } |
1174 | 0 | return; |
1175 | | |
1176 | 0 | case ls_end_array: |
1177 | 0 | if (!(parser_state == ps_array_begin || parser_state == ps_array_after_item)) { |
1178 | 0 | QTC::TC("libtests", "JSON parse unexpected ]"); |
1179 | 0 | throw std::runtime_error( |
1180 | 0 | "JSON: offset " + std::to_string(offset) + ": unexpected array end delimiter"); |
1181 | 0 | } |
1182 | 0 | parser_state = stack.back().state; |
1183 | 0 | tos.setEnd(offset); |
1184 | 0 | if (reactor) { |
1185 | 0 | reactor->containerEnd(tos); |
1186 | 0 | } |
1187 | 0 | if (parser_state != ps_done) { |
1188 | 0 | stack.pop_back(); |
1189 | 0 | } |
1190 | 0 | return; |
1191 | | |
1192 | 0 | case ls_end_dict: |
1193 | 0 | if (!((parser_state == ps_dict_begin) || (parser_state == ps_dict_after_item))) { |
1194 | 0 | QTC::TC("libtests", "JSON parse unexpected }"); |
1195 | 0 | throw std::runtime_error( |
1196 | 0 | "JSON: offset " + std::to_string(offset) + ": unexpected dictionary end delimiter"); |
1197 | 0 | } |
1198 | 0 | parser_state = stack.back().state; |
1199 | 0 | tos.setEnd(offset); |
1200 | 0 | if (reactor) { |
1201 | 0 | reactor->containerEnd(tos); |
1202 | 0 | } |
1203 | 0 | if (parser_state != ps_done) { |
1204 | 0 | stack.pop_back(); |
1205 | 0 | } |
1206 | 0 | return; |
1207 | | |
1208 | 0 | case ls_number: |
1209 | 0 | item = JSON::makeNumber(token); |
1210 | 0 | break; |
1211 | | |
1212 | 0 | case ls_alpha: |
1213 | 0 | if (token == "true") { |
1214 | 0 | item = JSON::makeBool(true); |
1215 | 0 | } else if (token == "false") { |
1216 | 0 | item = JSON::makeBool(false); |
1217 | 0 | } else if (token == "null") { |
1218 | 0 | item = JSON::makeNull(); |
1219 | 0 | } else { |
1220 | 0 | QTC::TC("libtests", "JSON parse invalid keyword"); |
1221 | 0 | throw std::runtime_error( |
1222 | 0 | "JSON: offset " + std::to_string(offset) + ": invalid keyword " + token); |
1223 | 0 | } |
1224 | 0 | break; |
1225 | | |
1226 | 0 | case ls_after_string: |
1227 | 0 | if (parser_state == ps_dict_begin || parser_state == ps_dict_after_comma) { |
1228 | 0 | dict_key = token; |
1229 | 0 | dict_key_offset = token_start; |
1230 | 0 | parser_state = ps_dict_after_key; |
1231 | 0 | return; |
1232 | 0 | } else { |
1233 | 0 | item = JSON::makeString(token); |
1234 | 0 | } |
1235 | 0 | break; |
1236 | | |
1237 | 0 | default: |
1238 | 0 | throw std::runtime_error( |
1239 | 0 | "JSON: offset " + std::to_string(offset) + ": premature end of input"); |
1240 | 0 | break; |
1241 | 0 | } |
1242 | | |
1243 | 0 | item.setStart(token_start); |
1244 | 0 | item.setEnd(offset); |
1245 | |
|
1246 | 0 | switch (parser_state) { |
1247 | 0 | case ps_dict_begin: |
1248 | 0 | case ps_dict_after_comma: |
1249 | 0 | QTC::TC("libtests", "JSON parse string as dict key"); |
1250 | 0 | throw std::runtime_error( |
1251 | 0 | "JSON: offset " + std::to_string(offset) + ": expect string as dictionary key"); |
1252 | 0 | break; |
1253 | | |
1254 | 0 | case ps_dict_after_colon: |
1255 | 0 | if (!reactor || !reactor->dictionaryItem(dict_key, item)) { |
1256 | 0 | tos.addDictionaryMember(dict_key, item); |
1257 | 0 | } |
1258 | 0 | parser_state = ps_dict_after_item; |
1259 | 0 | break; |
1260 | | |
1261 | 0 | case ps_array_begin: |
1262 | 0 | case ps_array_after_comma: |
1263 | 0 | if (!reactor || !reactor->arrayItem(item)) { |
1264 | 0 | tos.addArrayElement(item); |
1265 | 0 | } |
1266 | 0 | parser_state = ps_array_after_item; |
1267 | 0 | break; |
1268 | | |
1269 | 0 | case ps_top: |
1270 | 0 | if (!(item.isDictionary() || item.isArray())) { |
1271 | 0 | stack.emplace_back(ps_done, item); |
1272 | 0 | parser_state = ps_done; |
1273 | 0 | return; |
1274 | 0 | } |
1275 | 0 | parser_state = ps_done; |
1276 | 0 | break; |
1277 | | |
1278 | 0 | case ps_dict_after_key: |
1279 | 0 | QTC::TC("libtests", "JSON parse expected colon"); |
1280 | 0 | throw std::runtime_error("JSON: offset " + std::to_string(offset) + ": expected ':'"); |
1281 | 0 | break; |
1282 | | |
1283 | 0 | case ps_dict_after_item: |
1284 | 0 | QTC::TC("libtests", "JSON parse expected , or }"); |
1285 | 0 | throw std::runtime_error( |
1286 | 0 | "JSON: offset " + std::to_string(offset) + ": expected ',' or '}'"); |
1287 | 0 | break; |
1288 | | |
1289 | 0 | case ps_array_after_item: |
1290 | 0 | QTC::TC("libtests", "JSON parse expected, or ]"); |
1291 | 0 | throw std::runtime_error( |
1292 | 0 | "JSON: offset " + std::to_string(offset) + ": expected ',' or ']'"); |
1293 | 0 | break; |
1294 | | |
1295 | 0 | case ps_done: |
1296 | 0 | throw std::logic_error("JSONParser::handleToken: unexpected parser state"); |
1297 | 0 | } |
1298 | | |
1299 | 0 | if (item.isDictionary() || item.isArray()) { |
1300 | 0 | stack.emplace_back(parser_state, item); |
1301 | | // Calling container start method is postponed until after adding the containers to their |
1302 | | // parent containers, if any. This makes it much easier to keep track of the current nesting |
1303 | | // level. |
1304 | 0 | if (item.isDictionary()) { |
1305 | 0 | if (reactor) { |
1306 | 0 | reactor->dictionaryStart(); |
1307 | 0 | } |
1308 | 0 | parser_state = ps_dict_begin; |
1309 | 0 | } else if (item.isArray()) { |
1310 | 0 | if (reactor) { |
1311 | 0 | reactor->arrayStart(); |
1312 | 0 | } |
1313 | 0 | parser_state = ps_array_begin; |
1314 | 0 | } |
1315 | |
|
1316 | 0 | if (stack.size() > 500) { |
1317 | 0 | throw std::runtime_error( |
1318 | 0 | "JSON: offset " + std::to_string(offset) + ": maximum object depth exceeded"); |
1319 | 0 | } |
1320 | 0 | } |
1321 | 0 | } |
1322 | | |
1323 | | JSON |
1324 | | JSONParser::parse() |
1325 | 0 | { |
1326 | 0 | while (!done) { |
1327 | 0 | getToken(); |
1328 | 0 | handleToken(); |
1329 | 0 | } |
1330 | 0 | if (parser_state != ps_done) { |
1331 | 0 | QTC::TC("libtests", "JSON parse premature EOF"); |
1332 | 0 | throw std::runtime_error("JSON: premature end of input"); |
1333 | 0 | } |
1334 | 0 | auto const& tos = stack.back().item; |
1335 | 0 | if (reactor && !(tos.isArray() || tos.isDictionary())) { |
1336 | 0 | reactor->topLevelScalar(); |
1337 | 0 | } |
1338 | 0 | return tos; |
1339 | 0 | } |
1340 | | |
1341 | | JSON |
1342 | | JSON::parse(InputSource& is, Reactor* reactor) |
1343 | 0 | { |
1344 | 0 | JSONParser jp(is, reactor); |
1345 | 0 | return jp.parse(); |
1346 | 0 | } |
1347 | | |
1348 | | JSON |
1349 | | JSON::parse(std::string const& s) |
1350 | 0 | { |
1351 | 0 | BufferInputSource bis("json input", s); |
1352 | 0 | JSONParser jp(bis, nullptr); |
1353 | 0 | return jp.parse(); |
1354 | 0 | } |
1355 | | |
1356 | | void |
1357 | | JSON::setStart(qpdf_offset_t start) |
1358 | 0 | { |
1359 | 0 | if (m) { |
1360 | 0 | m->start = start; |
1361 | 0 | } |
1362 | 0 | } |
1363 | | |
1364 | | void |
1365 | | JSON::setEnd(qpdf_offset_t end) |
1366 | 0 | { |
1367 | 0 | if (m) { |
1368 | 0 | m->end = end; |
1369 | 0 | } |
1370 | 0 | } |
1371 | | |
1372 | | qpdf_offset_t |
1373 | | JSON::getStart() const |
1374 | 0 | { |
1375 | 0 | return m ? m->start : 0; |
1376 | 0 | } |
1377 | | |
1378 | | qpdf_offset_t |
1379 | | JSON::getEnd() const |
1380 | 0 | { |
1381 | 0 | return m ? m->end : 0; |
1382 | 0 | } |