/src/qpdf/libqpdf/QPDF_json.cc
Line | Count | Source |
1 | | #include <qpdf/QPDF.hh> |
2 | | |
3 | | #include <qpdf/FileInputSource.hh> |
4 | | #include <qpdf/InputSource_private.hh> |
5 | | #include <qpdf/JSON_writer.hh> |
6 | | #include <qpdf/Pl_Base64.hh> |
7 | | #include <qpdf/Pl_StdioFile.hh> |
8 | | #include <qpdf/QIntC.hh> |
9 | | #include <qpdf/QPDFObjectHandle_private.hh> |
10 | | #include <qpdf/QPDFObject_private.hh> |
11 | | #include <qpdf/QTC.hh> |
12 | | #include <qpdf/QUtil.hh> |
13 | | #include <qpdf/Util.hh> |
14 | | |
15 | | #include <algorithm> |
16 | | #include <cstring> |
17 | | |
18 | | using namespace qpdf; |
19 | | |
20 | | // This chart shows an example of the state transitions that would occur in parsing a minimal file. |
21 | | |
22 | | // | |
23 | | // { | -> st_top |
24 | | // "qpdf": [ | -> st_qpdf |
25 | | // { | -> st_qpdf_meta |
26 | | // ... | ... |
27 | | // }, | ... |
28 | | // { | -> st_objects |
29 | | // "obj:1 0 R": { | -> st_object_top |
30 | | // "value": { | -> st_object |
31 | | // "/Pages": "2 0 R", | ... |
32 | | // "/Type": "/Catalog" | ... |
33 | | // } | <- st_object_top |
34 | | // }, | <- st_objects |
35 | | // "obj:2 0 R": { | -> st_object_top |
36 | | // "value": 12 | -> st_object |
37 | | // } | <- st_object_top |
38 | | // }, | <- st_objects |
39 | | // "obj:4 0 R": { | -> st_object_top |
40 | | // "stream": { | -> st_stream |
41 | | // "data": "cG90YXRv", | ... |
42 | | // "dict": { | -> st_object |
43 | | // "/K": true | ... |
44 | | // } | <- st_stream |
45 | | // } | <- st_object_top |
46 | | // }, | <- st_objects |
47 | | // "trailer": { | -> st_trailer |
48 | | // "value": { | -> st_object |
49 | | // "/Root": "1 0 R", | ... |
50 | | // "/Size": 7 | ... |
51 | | // } | <- st_trailer |
52 | | // } | <- st_objects |
53 | | // } | <- st_qpdf |
54 | | // ] | <- st_top |
55 | | // } | |
56 | | |
57 | | static char const* JSON_PDF = ( |
58 | | // force line break |
59 | | "%PDF-1.3\n" |
60 | | "xref\n" |
61 | | "0 1\n" |
62 | | "0000000000 65535 f \n" |
63 | | "trailer << /Size 1 >>\n" |
64 | | "startxref\n" |
65 | | "9\n" |
66 | | "%%EOF\n"); |
67 | | |
68 | | // Validator methods -- these are much more performant than std::regex. |
69 | | static bool |
70 | | is_indirect_object(std::string const& v, int& obj, int& gen) |
71 | 1.06M | { |
72 | 1.06M | char const* p = v.c_str(); |
73 | 1.06M | std::string o_str; |
74 | 1.06M | std::string g_str; |
75 | 1.06M | if (!util::is_digit(*p)) { |
76 | 108k | return false; |
77 | 108k | } |
78 | 3.73M | while (util::is_digit(*p)) { |
79 | 2.77M | o_str.append(1, *p++); |
80 | 2.77M | } |
81 | 960k | if (*p != ' ') { |
82 | 7.04k | return false; |
83 | 7.04k | } |
84 | 7.37M | while (*p == ' ') { |
85 | 6.42M | ++p; |
86 | 6.42M | } |
87 | 953k | if (!util::is_digit(*p)) { |
88 | 2.89k | return false; |
89 | 2.89k | } |
90 | 10.4M | while (util::is_digit(*p)) { |
91 | 9.53M | g_str.append(1, *p++); |
92 | 9.53M | } |
93 | 950k | if (*p != ' ') { |
94 | 3.31k | return false; |
95 | 3.31k | } |
96 | 17.7M | while (*p == ' ') { |
97 | 16.8M | ++p; |
98 | 16.8M | } |
99 | 947k | if (*p++ != 'R') { |
100 | 3.02k | return false; |
101 | 3.02k | } |
102 | 944k | if (*p) { |
103 | 1.79k | return false; |
104 | 1.79k | } |
105 | 942k | obj = QUtil::string_to_int(o_str.c_str()); |
106 | 942k | gen = QUtil::string_to_int(g_str.c_str()); |
107 | 942k | return obj > 0; |
108 | 944k | } |
109 | | |
110 | | static bool |
111 | | is_obj_key(std::string const& v, int& obj, int& gen) |
112 | 65.4k | { |
113 | 65.4k | if (v.substr(0, 4) != "obj:") { |
114 | 24.4k | return false; |
115 | 24.4k | } |
116 | 41.0k | return is_indirect_object(v.substr(4), obj, gen); |
117 | 65.4k | } |
118 | | |
119 | | static bool |
120 | | is_unicode_string(std::string const& v, std::string& str) |
121 | 119k | { |
122 | 119k | if (v.substr(0, 2) == "u:") { |
123 | 11.2k | str = v.substr(2); |
124 | 11.2k | return true; |
125 | 11.2k | } |
126 | 108k | return false; |
127 | 119k | } |
128 | | |
129 | | static bool |
130 | | is_binary_string(std::string const& v, std::string& str) |
131 | 108k | { |
132 | 108k | if (v.substr(0, 2) == "b:") { |
133 | 6.67k | str = v.substr(2); |
134 | 6.67k | int count = 0; |
135 | 85.1k | for (char c: str) { |
136 | 85.1k | if (!util::is_hex_digit(c)) { |
137 | 3.05k | return false; |
138 | 3.05k | } |
139 | 82.1k | ++count; |
140 | 82.1k | } |
141 | 3.61k | return (count % 2 == 0); |
142 | 6.67k | } |
143 | 101k | return false; |
144 | 108k | } |
145 | | |
146 | | static bool |
147 | | is_name(std::string const& v) |
148 | 105k | { |
149 | 105k | return ((v.length() > 1) && (v.at(0) == '/')); |
150 | 105k | } |
151 | | |
152 | | static bool |
153 | | is_pdf_name(std::string const& v) |
154 | 294k | { |
155 | 294k | return ((v.length() > 3) && (v.substr(0, 3) == "n:/")); |
156 | 294k | } |
157 | | |
158 | | bool |
159 | | QPDF::test_json_validators() |
160 | 0 | { |
161 | 0 | bool passed = true; |
162 | 0 | auto check_fn = [&passed](char const* msg, bool expr) { |
163 | 0 | if (!expr) { |
164 | 0 | passed = false; |
165 | 0 | std::cerr << msg << '\n'; |
166 | 0 | } |
167 | 0 | }; |
168 | 0 | #define check(expr) check_fn(#expr, expr) |
169 | |
|
170 | 0 | int obj = 0; |
171 | 0 | int gen = 0; |
172 | 0 | check(!is_indirect_object("", obj, gen)); |
173 | 0 | check(!is_indirect_object("12", obj, gen)); |
174 | 0 | check(!is_indirect_object("x12 0 R", obj, gen)); |
175 | 0 | check(!is_indirect_object("12 0 Rx", obj, gen)); |
176 | 0 | check(!is_indirect_object("12 0R", obj, gen)); |
177 | 0 | check(is_indirect_object("52 1 R", obj, gen)); |
178 | 0 | check(obj == 52); |
179 | 0 | check(gen == 1); |
180 | 0 | check(is_indirect_object("53 20 R", obj, gen)); |
181 | 0 | check(obj == 53); |
182 | 0 | check(gen == 20); |
183 | 0 | check(!is_obj_key("", obj, gen)); |
184 | 0 | check(!is_obj_key("obj:x", obj, gen)); |
185 | 0 | check(!is_obj_key("obj:x", obj, gen)); |
186 | 0 | check(is_obj_key("obj:12 13 R", obj, gen)); |
187 | 0 | check(obj == 12); |
188 | 0 | check(gen == 13); |
189 | 0 | std::string str; |
190 | 0 | check(!is_unicode_string("", str)); |
191 | 0 | check(!is_unicode_string("xyz", str)); |
192 | 0 | check(!is_unicode_string("x:", str)); |
193 | 0 | check(is_unicode_string("u:potato", str)); |
194 | 0 | check(str == "potato"); |
195 | 0 | check(is_unicode_string("u:", str)); |
196 | 0 | check(str.empty()); |
197 | 0 | check(!is_binary_string("", str)); |
198 | 0 | check(!is_binary_string("x:", str)); |
199 | 0 | check(!is_binary_string("b:1", str)); |
200 | 0 | check(!is_binary_string("b:123", str)); |
201 | 0 | check(!is_binary_string("b:gh", str)); |
202 | 0 | check(is_binary_string("b:", str)); |
203 | 0 | check(is_binary_string("b:12", str)); |
204 | 0 | check(is_binary_string("b:123aBC", str)); |
205 | 0 | check(!is_name("")); |
206 | 0 | check(!is_name("/")); |
207 | 0 | check(!is_name("xyz")); |
208 | 0 | check(is_name("/Potato")); |
209 | 0 | check(is_name("/Potato Salad")); |
210 | |
|
211 | 0 | return passed; |
212 | 0 | #undef check_arg |
213 | 0 | } |
214 | | |
215 | | static std::function<void(Pipeline*)> |
216 | | provide_data(std::shared_ptr<InputSource> is, qpdf_offset_t start, qpdf_offset_t end) |
217 | 9.08k | { |
218 | 9.08k | return [is, start, end](Pipeline* p) { |
219 | 0 | auto data = is->read(QIntC::to_size(end - start), start); |
220 | 0 | data = Pl_Base64::decode(data); |
221 | 0 | p->write(reinterpret_cast<const unsigned char*>(data.data()), data.size()); |
222 | 0 | p->finish(); |
223 | 0 | }; |
224 | 9.08k | } |
225 | | |
226 | | class QPDF::JSONReactor: public JSON::Reactor |
227 | | { |
228 | | public: |
229 | | JSONReactor(QPDF& pdf, std::shared_ptr<InputSource> is, bool must_be_complete) : |
230 | 15.6k | pdf(pdf), |
231 | 15.6k | is(is), |
232 | 15.6k | must_be_complete(must_be_complete), |
233 | | descr( |
234 | 15.6k | std::make_shared<QPDFObject::Description>( |
235 | 15.6k | QPDFObject::JSON_Descr(std::make_shared<std::string>(is->getName()), ""))) |
236 | 15.6k | { |
237 | 15.6k | } |
238 | 15.6k | ~JSONReactor() override = default; |
239 | | void dictionaryStart() override; |
240 | | void arrayStart() override; |
241 | | void containerEnd(JSON const& value) override; |
242 | | void topLevelScalar() override; |
243 | | bool dictionaryItem(std::string const& key, JSON const& value) override; |
244 | | bool arrayItem(JSON const& value) override; |
245 | | |
246 | | bool anyErrors() const; |
247 | | |
248 | | private: |
249 | | enum state_e { |
250 | | st_top, |
251 | | st_qpdf, |
252 | | st_qpdf_meta, |
253 | | st_objects, |
254 | | st_trailer, |
255 | | st_object_top, |
256 | | st_stream, |
257 | | st_object, |
258 | | st_ignore, |
259 | | }; |
260 | | |
261 | | struct StackFrame |
262 | | { |
263 | | StackFrame(state_e state) : |
264 | 72.7k | state(state) {}; |
265 | | StackFrame(state_e state, QPDFObjectHandle&& object) : |
266 | 108k | state(state), |
267 | 108k | object(object) {}; |
268 | | state_e state; |
269 | | QPDFObjectHandle object; |
270 | | }; |
271 | | |
272 | | void containerStart(); |
273 | | bool setNextStateIfDictionary(std::string const& key, JSON const& value, state_e); |
274 | | void setObjectDescription(QPDFObjectHandle& oh, JSON const& value); |
275 | | QPDFObjectHandle makeObject(JSON const& value); |
276 | | void error(qpdf_offset_t offset, std::string const& message); |
277 | | void replaceObject(QPDFObjectHandle&& replacement, JSON const& value); |
278 | | |
279 | | QPDF& pdf; |
280 | | QPDF::Doc::Objects& objects = pdf.m->objects; |
281 | | std::shared_ptr<InputSource> is; |
282 | | bool must_be_complete{true}; |
283 | | std::shared_ptr<QPDFObject::Description> descr; |
284 | | bool errors{false}; |
285 | | bool saw_qpdf{false}; |
286 | | bool saw_qpdf_meta{false}; |
287 | | bool saw_objects{false}; |
288 | | bool saw_json_version{false}; |
289 | | bool saw_pdf_version{false}; |
290 | | bool saw_trailer{false}; |
291 | | std::string cur_object; |
292 | | bool saw_value{false}; |
293 | | bool saw_stream{false}; |
294 | | bool saw_dict{false}; |
295 | | bool saw_data{false}; |
296 | | bool saw_datafile{false}; |
297 | | bool this_stream_needs_data{false}; |
298 | | std::vector<StackFrame> stack; |
299 | | QPDFObjectHandle next_obj; |
300 | | state_e next_state{st_top}; |
301 | | }; |
302 | | |
303 | | void |
304 | | QPDF::JSONReactor::error(qpdf_offset_t offset, std::string const& msg) |
305 | 175k | { |
306 | 175k | errors = true; |
307 | 175k | std::string object = this->cur_object; |
308 | 175k | if (is->getName() != pdf.getFilename()) { |
309 | 0 | object += " from " + is->getName(); |
310 | 0 | } |
311 | 175k | pdf.warn(qpdf_e_json, object, offset, msg); |
312 | 175k | } |
313 | | |
314 | | bool |
315 | | QPDF::JSONReactor::anyErrors() const |
316 | 122 | { |
317 | 122 | return errors; |
318 | 122 | } |
319 | | |
320 | | void |
321 | | QPDF::JSONReactor::containerStart() |
322 | 181k | { |
323 | 181k | if (next_obj) { |
324 | 108k | stack.emplace_back(next_state, std::move(next_obj)); |
325 | 108k | next_obj = QPDFObjectHandle(); |
326 | 108k | } else { |
327 | 72.7k | stack.emplace_back(next_state); |
328 | 72.7k | } |
329 | 181k | } |
330 | | |
331 | | void |
332 | | QPDF::JSONReactor::dictionaryStart() |
333 | 136k | { |
334 | 136k | containerStart(); |
335 | 136k | } |
336 | | |
337 | | void |
338 | | QPDF::JSONReactor::arrayStart() |
339 | 45.2k | { |
340 | 45.2k | if (stack.empty()) { |
341 | 702 | QTC::TC("qpdf", "QPDF_json top-level array"); |
342 | 702 | throw std::runtime_error("QPDF JSON must be a dictionary"); |
343 | 702 | } |
344 | 44.5k | containerStart(); |
345 | 44.5k | } |
346 | | |
347 | | void |
348 | | QPDF::JSONReactor::containerEnd(JSON const& value) |
349 | 77.5k | { |
350 | 77.5k | auto from_state = stack.back().state; |
351 | 77.5k | stack.pop_back(); |
352 | 77.5k | if (stack.empty()) { |
353 | 151 | if (!this->saw_qpdf) { |
354 | 41 | QTC::TC("qpdf", "QPDF_json missing qpdf"); |
355 | 41 | error(0, "\"qpdf\" object was not seen"); |
356 | 110 | } else { |
357 | 110 | if (!this->saw_json_version) { |
358 | 86 | QTC::TC("qpdf", "QPDF_json missing json version"); |
359 | 86 | error(0, "\"qpdf[0].jsonversion\" was not seen"); |
360 | 86 | } |
361 | 110 | if (must_be_complete && !this->saw_pdf_version) { |
362 | 84 | QTC::TC("qpdf", "QPDF_json missing pdf version"); |
363 | 84 | error(0, "\"qpdf[0].pdfversion\" was not seen"); |
364 | 84 | } |
365 | 110 | if (!this->saw_objects) { |
366 | 17 | QTC::TC("qpdf", "QPDF_json missing objects"); |
367 | 17 | error(0, "\"qpdf[1]\" was not seen"); |
368 | 93 | } else { |
369 | 93 | if (must_be_complete && !this->saw_trailer) { |
370 | 65 | QTC::TC("qpdf", "QPDF_json missing trailer"); |
371 | 65 | error(0, "\"qpdf[1].trailer\" was not seen"); |
372 | 65 | } |
373 | 93 | } |
374 | 110 | } |
375 | 77.3k | } else if (from_state == st_trailer) { |
376 | 1.80k | if (!saw_value) { |
377 | 1.27k | QTC::TC("qpdf", "QPDF_json trailer no value"); |
378 | 1.27k | error(value.getStart(), "\"trailer\" is missing \"value\""); |
379 | 1.27k | } |
380 | 75.5k | } else if (from_state == st_object_top) { |
381 | 23.8k | if (saw_value == saw_stream) { |
382 | 2.39k | QTC::TC("qpdf", "QPDF_json value stream both or neither"); |
383 | 2.39k | error(value.getStart(), "object must have exactly one of \"value\" or \"stream\""); |
384 | 2.39k | } |
385 | 23.8k | if (saw_stream) { |
386 | 11.5k | if (!saw_dict) { |
387 | 6.01k | QTC::TC("qpdf", "QPDF_json stream no dict"); |
388 | 6.01k | error(value.getStart(), "\"stream\" is missing \"dict\""); |
389 | 6.01k | } |
390 | 11.5k | if (saw_data == saw_datafile) { |
391 | 5.18k | if (this_stream_needs_data) { |
392 | 2.18k | QTC::TC("qpdf", "QPDF_json data datafile both or neither"); |
393 | 2.18k | error( |
394 | 2.18k | value.getStart(), |
395 | 2.18k | "new \"stream\" must have exactly one of \"data\" or \"datafile\""); |
396 | 2.99k | } else if (saw_datafile) { |
397 | 1.07k | QTC::TC("qpdf", "QPDF_json data and datafile"); |
398 | 1.07k | error( |
399 | 1.07k | value.getStart(), |
400 | 1.07k | "existing \"stream\" may at most one of \"data\" or \"datafile\""); |
401 | 1.91k | } else { |
402 | 1.91k | QTC::TC("qpdf", "QPDF_json no stream data in update mode"); |
403 | 1.91k | } |
404 | 5.18k | } |
405 | 11.5k | } |
406 | 23.8k | } |
407 | 77.5k | if (!stack.empty()) { |
408 | 77.3k | auto state = stack.back().state; |
409 | 77.3k | if (state == st_objects) { |
410 | 30.3k | this->cur_object = ""; |
411 | 30.3k | this->saw_dict = false; |
412 | 30.3k | this->saw_data = false; |
413 | 30.3k | this->saw_datafile = false; |
414 | 30.3k | this->saw_value = false; |
415 | 30.3k | this->saw_stream = false; |
416 | 30.3k | } |
417 | 77.3k | } |
418 | 77.5k | } |
419 | | |
420 | | void |
421 | | QPDF::JSONReactor::replaceObject(QPDFObjectHandle&& replacement, JSON const& value) |
422 | 41.1k | { |
423 | 41.1k | auto& tos = stack.back(); |
424 | 41.1k | auto og = tos.object.getObjGen(); |
425 | 41.1k | if (replacement.isIndirect() && !(replacement.isStream() && replacement.getObjGen() == og)) { |
426 | 1.56k | error( |
427 | 1.56k | replacement.offset(), "the value of an object may not be an indirect object reference"); |
428 | 1.56k | return; |
429 | 1.56k | } |
430 | 39.5k | pdf.replaceObject(og, replacement); |
431 | 39.5k | next_obj = pdf.getObject(og); |
432 | 39.5k | setObjectDescription(tos.object, value); |
433 | 39.5k | } |
434 | | |
435 | | void |
436 | | QPDF::JSONReactor::topLevelScalar() |
437 | 119 | { |
438 | 119 | QTC::TC("qpdf", "QPDF_json top-level scalar"); |
439 | 119 | throw std::runtime_error("QPDF JSON must be a dictionary"); |
440 | 119 | } |
441 | | |
442 | | bool |
443 | | QPDF::JSONReactor::setNextStateIfDictionary(std::string const& key, JSON const& value, state_e next) |
444 | 84.4k | { |
445 | | // Use this method when the next state is for processing a nested dictionary. |
446 | 84.4k | if (value.isDictionary()) { |
447 | 69.4k | this->next_state = next; |
448 | 69.4k | return true; |
449 | 69.4k | } |
450 | 15.0k | error(value.getStart(), "\"" + key + "\" must be a dictionary"); |
451 | 15.0k | return false; |
452 | 84.4k | } |
453 | | |
454 | | bool |
455 | | QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) |
456 | 441k | { |
457 | 441k | if (stack.empty()) { |
458 | 0 | throw std::logic_error("stack is empty in dictionaryItem"); |
459 | 0 | } |
460 | 441k | next_state = st_ignore; |
461 | 441k | auto state = stack.back().state; |
462 | 441k | if (state == st_ignore) { |
463 | 13.6k | QTC::TC("qpdf", "QPDF_json ignoring in st_ignore"); |
464 | | // ignore |
465 | 427k | } else if (state == st_top) { |
466 | 28.5k | if (key == "qpdf") { |
467 | 19.1k | this->saw_qpdf = true; |
468 | 19.1k | if (!value.isArray()) { |
469 | 7.61k | QTC::TC("qpdf", "QPDF_json qpdf not array"); |
470 | 7.61k | error(value.getStart(), "\"qpdf\" must be an array"); |
471 | 11.5k | } else { |
472 | 11.5k | next_state = st_qpdf; |
473 | 11.5k | } |
474 | 19.1k | } else { |
475 | | // Ignore all other fields. |
476 | 9.37k | QTC::TC("qpdf", "QPDF_json ignoring unknown top-level key"); |
477 | 9.37k | } |
478 | 399k | } else if (state == st_qpdf_meta) { |
479 | 26.2k | if (key == "pdfversion") { |
480 | 10.9k | this->saw_pdf_version = true; |
481 | 10.9k | std::string v; |
482 | 10.9k | bool okay = false; |
483 | 10.9k | if (value.getString(v)) { |
484 | 5.14k | std::string version; |
485 | 5.14k | char const* p = v.c_str(); |
486 | 5.14k | if (QPDF::validatePDFVersion(p, version) && (*p == '\0')) { |
487 | 639 | this->pdf.m->pdf_version = version; |
488 | 639 | okay = true; |
489 | 639 | } |
490 | 5.14k | } |
491 | 10.9k | if (!okay) { |
492 | 10.2k | QTC::TC("qpdf", "QPDF_json bad pdf version"); |
493 | 10.2k | error(value.getStart(), "invalid PDF version (must be \"x.y\")"); |
494 | 10.2k | } |
495 | 15.3k | } else if (key == "jsonversion") { |
496 | 4.39k | this->saw_json_version = true; |
497 | 4.39k | std::string v; |
498 | 4.39k | bool okay = false; |
499 | 4.39k | if (value.getNumber(v)) { |
500 | 3.99k | std::string version; |
501 | 3.99k | if (QUtil::string_to_int(v.c_str()) == 2) { |
502 | 619 | okay = true; |
503 | 619 | } |
504 | 3.99k | } |
505 | 4.39k | if (!okay) { |
506 | 3.64k | QTC::TC("qpdf", "QPDF_json bad json version"); |
507 | 3.64k | error(value.getStart(), "invalid JSON version (must be numeric value 2)"); |
508 | 3.64k | } |
509 | 10.9k | } else if (key == "pushedinheritedpageresources") { |
510 | 1.17k | bool v; |
511 | 1.17k | if (value.getBool(v)) { |
512 | 476 | if (!this->must_be_complete && v) { |
513 | 0 | this->pdf.pushInheritedAttributesToPage(); |
514 | 0 | } |
515 | 698 | } else { |
516 | 698 | QTC::TC("qpdf", "QPDF_json bad pushedinheritedpageresources"); |
517 | 698 | error(value.getStart(), "pushedinheritedpageresources must be a boolean"); |
518 | 698 | } |
519 | 9.80k | } else if (key == "calledgetallpages") { |
520 | 1.89k | bool v; |
521 | 1.89k | if (value.getBool(v)) { |
522 | 477 | if (!this->must_be_complete && v) { |
523 | 0 | this->pdf.getAllPages(); |
524 | 0 | } |
525 | 1.41k | } else { |
526 | 1.41k | QTC::TC("qpdf", "QPDF_json bad calledgetallpages"); |
527 | 1.41k | error(value.getStart(), "calledgetallpages must be a boolean"); |
528 | 1.41k | } |
529 | 7.91k | } else { |
530 | | // ignore unknown keys for forward compatibility and to skip keys we don't care about |
531 | | // like "maxobjectid". |
532 | 7.91k | QTC::TC("qpdf", "QPDF_json ignore second-level key"); |
533 | 7.91k | } |
534 | 372k | } else if (state == st_objects) { |
535 | 71.1k | int obj = 0; |
536 | 71.1k | int gen = 0; |
537 | 71.1k | if (key == "trailer") { |
538 | 5.66k | this->saw_trailer = true; |
539 | 5.66k | this->cur_object = "trailer"; |
540 | 5.66k | setNextStateIfDictionary(key, value, st_trailer); |
541 | 65.4k | } else if (is_obj_key(key, obj, gen)) { |
542 | 33.2k | this->cur_object = key; |
543 | 33.2k | if (setNextStateIfDictionary(key, value, st_object_top)) { |
544 | 30.6k | next_obj = objects.getObjectForJSON(obj, gen); |
545 | 30.6k | } |
546 | 33.2k | } else { |
547 | 32.1k | QTC::TC("qpdf", "QPDF_json bad object key"); |
548 | 32.1k | error(value.getStart(), "object key should be \"trailer\" or \"obj:n n R\""); |
549 | 32.1k | } |
550 | 301k | } else if (state == st_object_top) { |
551 | 52.7k | if (stack.empty()) { |
552 | 0 | throw std::logic_error("stack empty in st_object_top"); |
553 | 0 | } |
554 | 52.7k | auto& tos = stack.back(); |
555 | 52.7k | if (!tos.object) { |
556 | 0 | throw std::logic_error("current object uninitialized in st_object_top"); |
557 | 0 | } |
558 | 52.7k | if (key == "value") { |
559 | | // Don't use setNextStateIfDictionary since this can have any type. |
560 | 33.4k | this->saw_value = true; |
561 | 33.4k | replaceObject(makeObject(value), value); |
562 | 33.4k | next_state = st_object; |
563 | 33.4k | } else if (key == "stream") { |
564 | 13.7k | this->saw_stream = true; |
565 | 13.7k | if (setNextStateIfDictionary(key, value, st_stream)) { |
566 | 12.7k | this->this_stream_needs_data = false; |
567 | 12.7k | if (tos.object.isStream()) { |
568 | 4.31k | QTC::TC("qpdf", "QPDF_json updating existing stream"); |
569 | 8.43k | } else { |
570 | 8.43k | this_stream_needs_data = true; |
571 | 8.43k | replaceObject( |
572 | 8.43k | qpdf::Stream( |
573 | 8.43k | pdf, tos.object.getObjGen(), QPDFObjectHandle::newDictionary(), 0, 0), |
574 | 8.43k | value); |
575 | 8.43k | } |
576 | 12.7k | next_obj = tos.object; |
577 | 12.7k | } else { |
578 | | // Error message already given above |
579 | 963 | QTC::TC("qpdf", "QPDF_json stream not a dictionary"); |
580 | 963 | } |
581 | 13.7k | } else { |
582 | | // Ignore unknown keys for forward compatibility |
583 | 5.62k | QTC::TC("qpdf", "QPDF_json ignore unknown key in object_top"); |
584 | 5.62k | } |
585 | 248k | } else if (state == st_trailer) { |
586 | 7.09k | if (key == "value") { |
587 | 4.54k | this->saw_value = true; |
588 | | // The trailer must be a dictionary, so we can use setNextStateIfDictionary. |
589 | 4.54k | if (setNextStateIfDictionary("trailer.value", value, st_object)) { |
590 | 2.48k | this->pdf.m->trailer = makeObject(value); |
591 | 2.48k | setObjectDescription(this->pdf.m->trailer, value); |
592 | 2.48k | } |
593 | 4.54k | } else if (key == "stream") { |
594 | | // Don't need to set saw_stream here since there's already an error. |
595 | 1.41k | QTC::TC("qpdf", "QPDF_json trailer stream"); |
596 | 1.41k | error(value.getStart(), "the trailer may not be a stream"); |
597 | 1.41k | } else { |
598 | | // Ignore unknown keys for forward compatibility |
599 | 1.13k | QTC::TC("qpdf", "QPDF_json ignore unknown key in trailer"); |
600 | 1.13k | } |
601 | 241k | } else if (state == st_stream) { |
602 | 28.7k | if (stack.empty()) { |
603 | 0 | throw std::logic_error("stack empty in st_stream"); |
604 | 0 | } |
605 | 28.7k | auto& tos = stack.back(); |
606 | 28.7k | if (!tos.object.isStream()) { |
607 | 0 | throw std::logic_error("current object is not stream in st_stream"); |
608 | 0 | } |
609 | 28.7k | if (key == "dict") { |
610 | 6.02k | this->saw_dict = true; |
611 | 6.02k | if (setNextStateIfDictionary("stream.dict", value, st_object)) { |
612 | 5.05k | tos.object.replaceDict(makeObject(value)); |
613 | 5.05k | } else { |
614 | | // An error had already been given by setNextStateIfDictionary |
615 | 966 | QTC::TC("qpdf", "QPDF_json stream dict not dict"); |
616 | 966 | } |
617 | 22.7k | } else if (key == "data") { |
618 | 14.6k | this->saw_data = true; |
619 | 14.6k | std::string v; |
620 | 14.6k | if (!value.getString(v)) { |
621 | 5.51k | QTC::TC("qpdf", "QPDF_json stream data not string"); |
622 | 5.51k | error(value.getStart(), "\"stream.data\" must be a string"); |
623 | 5.51k | tos.object.replaceStreamData("", {}, {}); |
624 | 9.08k | } else { |
625 | | // The range includes the quotes. |
626 | 9.08k | auto start = value.getStart() + 1; |
627 | 9.08k | auto end = value.getEnd() - 1; |
628 | 9.08k | if (end < start) { |
629 | 0 | throw std::logic_error("QPDF_json: JSON string length < 0"); |
630 | 0 | } |
631 | 9.08k | tos.object.replaceStreamData(provide_data(is, start, end), {}, {}); |
632 | 9.08k | } |
633 | 14.6k | } else if (key == "datafile") { |
634 | 3.96k | this->saw_datafile = true; |
635 | 3.96k | std::string filename; |
636 | 3.96k | if (!value.getString(filename)) { |
637 | 736 | QTC::TC("qpdf", "QPDF_json stream datafile not string"); |
638 | 736 | error( |
639 | 736 | value.getStart(), |
640 | 736 | "\"stream.datafile\" must be a string containing a file name"); |
641 | 736 | tos.object.replaceStreamData("", {}, {}); |
642 | 3.22k | } else { |
643 | 3.22k | tos.object.replaceStreamData(QUtil::file_provider(filename), {}, {}); |
644 | 3.22k | } |
645 | 4.17k | } else { |
646 | | // Ignore unknown keys for forward compatibility. |
647 | 4.17k | QTC::TC("qpdf", "QPDF_json ignore unknown key in stream"); |
648 | 4.17k | } |
649 | 213k | } else if (state == st_object) { |
650 | 213k | if (stack.empty()) { |
651 | 0 | throw std::logic_error("stack empty in st_object"); |
652 | 0 | } |
653 | 213k | auto& tos = stack.back(); |
654 | 213k | auto dict = tos.object; |
655 | 213k | if (dict.isStream()) { |
656 | 0 | dict = dict.getDict(); |
657 | 0 | } |
658 | 213k | if (!dict.isDictionary()) { |
659 | 0 | throw std::logic_error( |
660 | 0 | "current object is not stream or dictionary in st_object dictionary item"); |
661 | 0 | } |
662 | 213k | dict.replaceKey( |
663 | 213k | is_pdf_name(key) ? QPDFObjectHandle::parse(key.substr(2)).getName() : key, |
664 | 213k | makeObject(value)); |
665 | 213k | } else { |
666 | 0 | throw std::logic_error("QPDF_json: unknown state " + std::to_string(state)); |
667 | 0 | } |
668 | 441k | return true; |
669 | 441k | } |
670 | | |
671 | | bool |
672 | | QPDF::JSONReactor::arrayItem(JSON const& value) |
673 | 2.53M | { |
674 | 2.53M | if (stack.empty()) { |
675 | 0 | throw std::logic_error("stack is empty in arrayItem"); |
676 | 0 | } |
677 | 2.53M | next_state = st_ignore; |
678 | 2.53M | auto state = stack.back().state; |
679 | 2.53M | if (state == st_qpdf) { |
680 | 38.3k | if (!this->saw_qpdf_meta) { |
681 | 11.1k | this->saw_qpdf_meta = true; |
682 | 11.1k | setNextStateIfDictionary("qpdf[0]", value, st_qpdf_meta); |
683 | 27.1k | } else if (!this->saw_objects) { |
684 | 10.1k | this->saw_objects = true; |
685 | 10.1k | setNextStateIfDictionary("qpdf[1]", value, st_objects); |
686 | 17.0k | } else { |
687 | 17.0k | QTC::TC("qpdf", "QPDF_json more than two qpdf elements"); |
688 | 17.0k | error(value.getStart(), "\"qpdf\" must have two elements"); |
689 | 17.0k | } |
690 | 2.49M | } else if (state == st_object) { |
691 | 2.48M | stack.back().object.appendItem(makeObject(value)); |
692 | 2.48M | } |
693 | 2.53M | return true; |
694 | 2.53M | } |
695 | | |
696 | | void |
697 | | QPDF::JSONReactor::setObjectDescription(QPDFObjectHandle& oh, JSON const& value) |
698 | 1.85M | { |
699 | 1.85M | auto j_descr = std::get<QPDFObject::JSON_Descr>(*descr); |
700 | 1.85M | if (j_descr.object != cur_object) { |
701 | 24.7k | descr = std::make_shared<QPDFObject::Description>( |
702 | 24.7k | QPDFObject::JSON_Descr(j_descr.input, cur_object)); |
703 | 24.7k | } |
704 | | |
705 | 1.85M | oh.getObjectPtr()->setDescription(&pdf, descr, value.getStart()); |
706 | 1.85M | } |
707 | | |
708 | | QPDFObjectHandle |
709 | | QPDF::JSONReactor::makeObject(JSON const& value) |
710 | 2.73M | { |
711 | 2.73M | QPDFObjectHandle result; |
712 | 2.73M | std::string str_v; |
713 | 2.73M | bool bool_v = false; |
714 | 2.73M | if (value.isDictionary()) { |
715 | 39.6k | result = QPDFObjectHandle::newDictionary(); |
716 | 39.6k | next_obj = result; |
717 | 39.6k | next_state = st_object; |
718 | 2.69M | } else if (value.isArray()) { |
719 | 24.3k | result = QPDFObjectHandle::newArray(); |
720 | 24.3k | next_obj = result; |
721 | 24.3k | next_state = st_object; |
722 | 2.67M | } else if (value.isNull()) { |
723 | 1.34k | result = QPDFObjectHandle::newNull(); |
724 | 2.67M | } else if (value.getBool(bool_v)) { |
725 | 1.75k | result = QPDFObjectHandle::newBool(bool_v); |
726 | 2.66M | } else if (value.getNumber(str_v)) { |
727 | 1.64M | if (QUtil::is_long_long(str_v.c_str())) { |
728 | 1.63M | result = QPDFObjectHandle::newInteger(QUtil::string_to_ll(str_v.c_str())); |
729 | 1.63M | } else { |
730 | | // JSON allows scientific notation, but PDF does not. |
731 | 9.83k | if (str_v.find('e') != std::string::npos || str_v.find('E') != std::string::npos) { |
732 | 6.59k | try { |
733 | 6.59k | auto v = std::stod(str_v); |
734 | 6.59k | str_v = QUtil::double_to_string(v); |
735 | 6.59k | } catch (std::exception&) { |
736 | | // Keep it as it was |
737 | 471 | } |
738 | 6.59k | } |
739 | 9.83k | result = QPDFObjectHandle::newReal(str_v); |
740 | 9.83k | } |
741 | 1.64M | } else if (value.getString(str_v)) { |
742 | 1.02M | int obj = 0; |
743 | 1.02M | int gen = 0; |
744 | 1.02M | std::string str; |
745 | 1.02M | if (is_indirect_object(str_v, obj, gen)) { |
746 | 908k | result = objects.getObjectForJSON(obj, gen); |
747 | 908k | } else if (is_unicode_string(str_v, str)) { |
748 | 11.2k | result = QPDFObjectHandle::newUnicodeString(str); |
749 | 108k | } else if (is_binary_string(str_v, str)) { |
750 | 2.44k | result = QPDFObjectHandle::newString(QUtil::hex_decode(str)); |
751 | 105k | } else if (is_name(str_v)) { |
752 | 24.7k | result = QPDFObjectHandle::newName(str_v); |
753 | 81.0k | } else if (is_pdf_name(str_v)) { |
754 | 15.9k | result = QPDFObjectHandle::parse(str_v.substr(2)); |
755 | 65.0k | } else { |
756 | 65.0k | QTC::TC("qpdf", "QPDF_json unrecognized string value"); |
757 | 65.0k | error(value.getStart(), "unrecognized string value"); |
758 | 65.0k | result = QPDFObjectHandle::newNull(); |
759 | 65.0k | } |
760 | 1.02M | } |
761 | 2.73M | if (!result) { |
762 | 0 | throw std::logic_error("JSONReactor::makeObject didn't initialize the object"); |
763 | 0 | } |
764 | | |
765 | 2.73M | if (!result.hasObjectDescription()) { |
766 | 1.81M | setObjectDescription(result, value); |
767 | 1.81M | } |
768 | 2.73M | return result; |
769 | 2.73M | } |
770 | | |
771 | | void |
772 | | QPDF::createFromJSON(std::string const& json_file) |
773 | 0 | { |
774 | 0 | createFromJSON(std::make_shared<FileInputSource>(json_file.c_str())); |
775 | 0 | } |
776 | | |
777 | | void |
778 | | QPDF::createFromJSON(std::shared_ptr<InputSource> is) |
779 | 15.6k | { |
780 | 15.6k | processMemoryFile(is->getName().c_str(), JSON_PDF, strlen(JSON_PDF)); |
781 | 15.6k | importJSON(is, true); |
782 | 15.6k | } |
783 | | |
784 | | void |
785 | | QPDF::updateFromJSON(std::string const& json_file) |
786 | 0 | { |
787 | 0 | updateFromJSON(std::make_shared<FileInputSource>(json_file.c_str())); |
788 | 0 | } |
789 | | |
790 | | void |
791 | | QPDF::updateFromJSON(std::shared_ptr<InputSource> is) |
792 | 0 | { |
793 | 0 | importJSON(is, false); |
794 | 0 | } |
795 | | |
796 | | void |
797 | | QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete) |
798 | 15.6k | { |
799 | 15.6k | JSONReactor reactor(*this, is, must_be_complete); |
800 | 15.6k | try { |
801 | 15.6k | JSON::parse(*is, &reactor); |
802 | 15.6k | } catch (std::runtime_error& e) { |
803 | 15.5k | throw std::runtime_error(is->getName() + ": " + e.what()); |
804 | 15.5k | } |
805 | 122 | if (reactor.anyErrors()) { |
806 | 101 | throw std::runtime_error(is->getName() + ": errors found in JSON"); |
807 | 101 | } |
808 | 122 | } |
809 | | |
810 | | void |
811 | | writeJSONStreamFile( |
812 | | int version, |
813 | | JSON::Writer& jw, |
814 | | qpdf::Stream& stream, |
815 | | int id, |
816 | | qpdf_stream_decode_level_e decode_level, |
817 | | std::string const& file_prefix) |
818 | 0 | { |
819 | 0 | auto filename = file_prefix + "-" + std::to_string(id); |
820 | 0 | auto* f = QUtil::safe_fopen(filename.c_str(), "wb"); |
821 | 0 | Pl_StdioFile f_pl{"stream data", f}; |
822 | 0 | stream.writeStreamJSON(version, jw, qpdf_sj_file, decode_level, &f_pl, filename); |
823 | 0 | f_pl.finish(); |
824 | 0 | fclose(f); |
825 | 0 | } |
826 | | |
827 | | void |
828 | | QPDF::writeJSON( |
829 | | int version, |
830 | | Pipeline* p, |
831 | | qpdf_stream_decode_level_e decode_level, |
832 | | qpdf_json_stream_data_e json_stream_data, |
833 | | std::string const& file_prefix, |
834 | | std::set<std::string> wanted_objects) |
835 | 0 | { |
836 | 0 | bool first = true; |
837 | 0 | writeJSON(version, p, true, first, decode_level, json_stream_data, file_prefix, wanted_objects); |
838 | 0 | } |
839 | | |
840 | | void |
841 | | QPDF::writeJSON( |
842 | | int version, |
843 | | Pipeline* p, |
844 | | bool complete, |
845 | | bool& first_key, |
846 | | qpdf_stream_decode_level_e decode_level, |
847 | | qpdf_json_stream_data_e json_stream_data, |
848 | | std::string const& file_prefix, |
849 | | std::set<std::string> wanted_objects) |
850 | 0 | { |
851 | 0 | if (version != 2) { |
852 | 0 | throw std::runtime_error("QPDF::writeJSON: only version 2 is supported"); |
853 | 0 | } |
854 | 0 | JSON::Writer jw{p, 4}; |
855 | 0 | if (complete) { |
856 | 0 | jw << "{"; |
857 | 0 | } else if (!first_key) { |
858 | 0 | jw << ","; |
859 | 0 | } |
860 | 0 | first_key = false; |
861 | | |
862 | | /* clang-format off */ |
863 | 0 | jw << "\n" |
864 | 0 | " \"qpdf\": [\n" |
865 | 0 | " {\n" |
866 | 0 | " \"jsonversion\": " << std::to_string(version) << ",\n" |
867 | 0 | " \"pdfversion\": \"" << getPDFVersion() << "\",\n" |
868 | 0 | " \"pushedinheritedpageresources\": " << (everPushedInheritedAttributesToPages() ? "true" : "false") << ",\n" |
869 | 0 | " \"calledgetallpages\": " << (everCalledGetAllPages() ? "true" : "false") << ",\n" |
870 | 0 | " \"maxobjectid\": " << std::to_string(getObjectCount()) << "\n" |
871 | 0 | " },\n" |
872 | 0 | " {"; |
873 | | /* clang-format on */ |
874 | |
|
875 | 0 | bool all_objects = wanted_objects.empty(); |
876 | 0 | bool first = true; |
877 | 0 | for (auto& obj: getAllObjects()) { |
878 | 0 | auto const og = obj.getObjGen(); |
879 | 0 | std::string key = "obj:" + og.unparse(' ') + " R"; |
880 | 0 | if (all_objects || wanted_objects.contains(key)) { |
881 | 0 | if (first) { |
882 | 0 | jw << "\n \"" << key; |
883 | 0 | first = false; |
884 | 0 | } else { |
885 | 0 | jw << "\n },\n \"" << key; |
886 | 0 | } |
887 | 0 | if (Stream stream = obj) { |
888 | 0 | jw << "\": {\n \"stream\": "; |
889 | 0 | if (json_stream_data == qpdf_sj_file) { |
890 | 0 | writeJSONStreamFile( |
891 | 0 | version, jw, stream, og.getObj(), decode_level, file_prefix); |
892 | 0 | } else { |
893 | 0 | stream.writeStreamJSON( |
894 | 0 | version, jw, json_stream_data, decode_level, nullptr, ""); |
895 | 0 | } |
896 | 0 | } else { |
897 | 0 | jw << "\": {\n \"value\": "; |
898 | 0 | obj.writeJSON(version, jw, true); |
899 | 0 | } |
900 | 0 | } |
901 | 0 | } |
902 | 0 | if (all_objects || wanted_objects.contains("trailer")) { |
903 | 0 | if (!first) { |
904 | 0 | jw << "\n },"; |
905 | 0 | } |
906 | 0 | jw << "\n \"trailer\": {\n \"value\": "; |
907 | 0 | getTrailer().writeJSON(version, jw, true); |
908 | 0 | first = false; |
909 | 0 | } |
910 | 0 | if (!first) { |
911 | 0 | jw << "\n }"; |
912 | 0 | } |
913 | | /* clang-format off */ |
914 | 0 | jw << "\n" |
915 | 0 | " }\n" |
916 | 0 | " ]"; |
917 | | /* clang-format on */ |
918 | 0 | if (complete) { |
919 | 0 | jw << "\n}\n"; |
920 | 0 | p->finish(); |
921 | 0 | } |
922 | 0 | } |