Coverage Report

Created: 2025-12-05 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDF_json.cc
Line
Count
Source
1
#include <qpdf/QPDF.hh>
2
3
#include <qpdf/FileInputSource.hh>
4
#include <qpdf/InputSource_private.hh>
5
#include <qpdf/JSON_writer.hh>
6
#include <qpdf/Pl_Base64.hh>
7
#include <qpdf/Pl_StdioFile.hh>
8
#include <qpdf/QIntC.hh>
9
#include <qpdf/QPDFObjectHandle_private.hh>
10
#include <qpdf/QPDFObject_private.hh>
11
#include <qpdf/QTC.hh>
12
#include <qpdf/QUtil.hh>
13
#include <qpdf/Util.hh>
14
15
#include <algorithm>
16
#include <cstring>
17
18
using namespace qpdf;
19
20
// This chart shows an example of the state transitions that would occur in parsing a minimal file.
21
22
//                                |
23
// {                              |   -> st_top
24
//   "qpdf": [                    |   -> st_qpdf
25
//     {                          |   -> st_qpdf_meta
26
//       ...                      |   ...
27
//     },                         |   ...
28
//     {                          |   -> st_objects
29
//       "obj:1 0 R": {           |   -> st_object_top
30
//         "value": {             |   -> st_object
31
//           "/Pages": "2 0 R",   |   ...
32
//           "/Type": "/Catalog"  |   ...
33
//         }                      |   <- st_object_top
34
//       },                       |   <- st_objects
35
//       "obj:2 0 R": {           |   -> st_object_top
36
//         "value": 12            |   -> st_object
37
//         }                      |   <- st_object_top
38
//       },                       |   <- st_objects
39
//       "obj:4 0 R": {           |   -> st_object_top
40
//         "stream": {            |   -> st_stream
41
//           "data": "cG90YXRv",  |   ...
42
//           "dict": {            |   -> st_object
43
//             "/K": true         |   ...
44
//           }                    |   <- st_stream
45
//         }                      |   <- st_object_top
46
//       },                       |   <- st_objects
47
//       "trailer": {             |   -> st_trailer
48
//         "value": {             |   -> st_object
49
//           "/Root": "1 0 R",    |   ...
50
//           "/Size": 7           |   ...
51
//         }                      |   <- st_trailer
52
//       }                        |   <- st_objects
53
//     }                          |   <- st_qpdf
54
//   ]                            |   <- st_top
55
// }                              |
56
57
static char const* JSON_PDF = (
58
    // force line break
59
    "%PDF-1.3\n"
60
    "xref\n"
61
    "0 1\n"
62
    "0000000000 65535 f \n"
63
    "trailer << /Size 1 >>\n"
64
    "startxref\n"
65
    "9\n"
66
    "%%EOF\n");
67
68
// Validator methods -- these are much more performant than std::regex.
69
static bool
70
is_indirect_object(std::string const& v, int& obj, int& gen)
71
925k
{
72
925k
    char const* p = v.c_str();
73
925k
    std::string o_str;
74
925k
    std::string g_str;
75
925k
    if (!util::is_digit(*p)) {
76
100k
        return false;
77
100k
    }
78
2.08M
    while (util::is_digit(*p)) {
79
1.25M
        o_str.append(1, *p++);
80
1.25M
    }
81
825k
    if (*p != ' ') {
82
6.55k
        return false;
83
6.55k
    }
84
6.36M
    while (*p == ' ') {
85
5.54M
        ++p;
86
5.54M
    }
87
819k
    if (!util::is_digit(*p)) {
88
2.22k
        return false;
89
2.22k
    }
90
13.3M
    while (util::is_digit(*p)) {
91
12.5M
        g_str.append(1, *p++);
92
12.5M
    }
93
816k
    if (*p != ' ') {
94
3.07k
        return false;
95
3.07k
    }
96
16.0M
    while (*p == ' ') {
97
15.2M
        ++p;
98
15.2M
    }
99
813k
    if (*p++ != 'R') {
100
2.15k
        return false;
101
2.15k
    }
102
811k
    if (*p) {
103
1.59k
        return false;
104
1.59k
    }
105
810k
    obj = QUtil::string_to_int(o_str.c_str());
106
810k
    gen = QUtil::string_to_int(g_str.c_str());
107
810k
    return obj > 0;
108
811k
}
109
110
static bool
111
is_obj_key(std::string const& v, int& obj, int& gen)
112
61.1k
{
113
61.1k
    if (v.substr(0, 4) != "obj:") {
114
23.5k
        return false;
115
23.5k
    }
116
37.6k
    return is_indirect_object(v.substr(4), obj, gen);
117
61.1k
}
118
119
static bool
120
is_unicode_string(std::string const& v, std::string& str)
121
108k
{
122
108k
    if (v.substr(0, 2) == "u:") {
123
11.4k
        str = v.substr(2);
124
11.4k
        return true;
125
11.4k
    }
126
96.8k
    return false;
127
108k
}
128
129
static bool
130
is_binary_string(std::string const& v, std::string& str)
131
96.8k
{
132
96.8k
    if (v.substr(0, 2) == "b:") {
133
6.09k
        str = v.substr(2);
134
6.09k
        int count = 0;
135
70.4k
        for (char c: str) {
136
70.4k
            if (!util::is_hex_digit(c)) {
137
2.79k
                return false;
138
2.79k
            }
139
67.6k
            ++count;
140
67.6k
        }
141
3.30k
        return (count % 2 == 0);
142
6.09k
    }
143
90.7k
    return false;
144
96.8k
}
145
146
static bool
147
is_name(std::string const& v)
148
94.6k
{
149
94.6k
    return ((v.length() > 1) && (v.at(0) == '/'));
150
94.6k
}
151
152
static bool
153
is_pdf_name(std::string const& v)
154
265k
{
155
265k
    return ((v.length() > 3) && (v.substr(0, 3) == "n:/"));
156
265k
}
157
158
bool
159
QPDF::test_json_validators()
160
0
{
161
0
    bool passed = true;
162
0
    auto check_fn = [&passed](char const* msg, bool expr) {
163
0
        if (!expr) {
164
0
            passed = false;
165
0
            std::cerr << msg << '\n';
166
0
        }
167
0
    };
168
0
#define check(expr) check_fn(#expr, expr)
169
170
0
    int obj = 0;
171
0
    int gen = 0;
172
0
    check(!is_indirect_object("", obj, gen));
173
0
    check(!is_indirect_object("12", obj, gen));
174
0
    check(!is_indirect_object("x12 0 R", obj, gen));
175
0
    check(!is_indirect_object("12 0 Rx", obj, gen));
176
0
    check(!is_indirect_object("12 0R", obj, gen));
177
0
    check(is_indirect_object("52 1 R", obj, gen));
178
0
    check(obj == 52);
179
0
    check(gen == 1);
180
0
    check(is_indirect_object("53  20  R", obj, gen));
181
0
    check(obj == 53);
182
0
    check(gen == 20);
183
0
    check(!is_obj_key("", obj, gen));
184
0
    check(!is_obj_key("obj:x", obj, gen));
185
0
    check(!is_obj_key("obj:x", obj, gen));
186
0
    check(is_obj_key("obj:12 13 R", obj, gen));
187
0
    check(obj == 12);
188
0
    check(gen == 13);
189
0
    std::string str;
190
0
    check(!is_unicode_string("", str));
191
0
    check(!is_unicode_string("xyz", str));
192
0
    check(!is_unicode_string("x:", str));
193
0
    check(is_unicode_string("u:potato", str));
194
0
    check(str == "potato");
195
0
    check(is_unicode_string("u:", str));
196
0
    check(str.empty());
197
0
    check(!is_binary_string("", str));
198
0
    check(!is_binary_string("x:", str));
199
0
    check(!is_binary_string("b:1", str));
200
0
    check(!is_binary_string("b:123", str));
201
0
    check(!is_binary_string("b:gh", str));
202
0
    check(is_binary_string("b:", str));
203
0
    check(is_binary_string("b:12", str));
204
0
    check(is_binary_string("b:123aBC", str));
205
0
    check(!is_name(""));
206
0
    check(!is_name("/"));
207
0
    check(!is_name("xyz"));
208
0
    check(is_name("/Potato"));
209
0
    check(is_name("/Potato Salad"));
210
211
0
    return passed;
212
0
#undef check_arg
213
0
}
214
215
static std::function<void(Pipeline*)>
216
provide_data(std::shared_ptr<InputSource> is, qpdf_offset_t start, qpdf_offset_t end)
217
7.72k
{
218
7.72k
    return [is, start, end](Pipeline* p) {
219
0
        auto data = is->read(QIntC::to_size(end - start), start);
220
0
        data = Pl_Base64::decode(data);
221
0
        p->write(reinterpret_cast<const unsigned char*>(data.data()), data.size());
222
0
        p->finish();
223
0
    };
224
7.72k
}
225
226
class QPDF::JSONReactor: public JSON::Reactor
227
{
228
  public:
229
    JSONReactor(QPDF& pdf, std::shared_ptr<InputSource> is, bool must_be_complete) :
230
15.4k
        pdf(pdf),
231
15.4k
        is(is),
232
15.4k
        must_be_complete(must_be_complete),
233
        descr(
234
15.4k
            std::make_shared<QPDFObject::Description>(
235
15.4k
                QPDFObject::JSON_Descr(std::make_shared<std::string>(is->getName()), "")))
236
15.4k
    {
237
15.4k
    }
238
15.4k
    ~JSONReactor() override = default;
239
    void dictionaryStart() override;
240
    void arrayStart() override;
241
    void containerEnd(JSON const& value) override;
242
    void topLevelScalar() override;
243
    bool dictionaryItem(std::string const& key, JSON const& value) override;
244
    bool arrayItem(JSON const& value) override;
245
246
    bool anyErrors() const;
247
248
  private:
249
    enum state_e {
250
        st_top,
251
        st_qpdf,
252
        st_qpdf_meta,
253
        st_objects,
254
        st_trailer,
255
        st_object_top,
256
        st_stream,
257
        st_object,
258
        st_ignore,
259
    };
260
261
    struct StackFrame
262
    {
263
        StackFrame(state_e state) :
264
69.7k
            state(state) {};
265
        StackFrame(state_e state, QPDFObjectHandle&& object) :
266
98.7k
            state(state),
267
98.7k
            object(object) {};
268
        state_e state;
269
        QPDFObjectHandle object;
270
    };
271
272
    void containerStart();
273
    bool setNextStateIfDictionary(std::string const& key, JSON const& value, state_e);
274
    void setObjectDescription(QPDFObjectHandle& oh, JSON const& value);
275
    QPDFObjectHandle makeObject(JSON const& value);
276
    void error(qpdf_offset_t offset, std::string const& message);
277
    void replaceObject(QPDFObjectHandle&& replacement, JSON const& value);
278
279
    QPDF& pdf;
280
    QPDF::Doc::Objects& objects = pdf.m->objects;
281
    std::shared_ptr<InputSource> is;
282
    bool must_be_complete{true};
283
    std::shared_ptr<QPDFObject::Description> descr;
284
    bool errors{false};
285
    bool saw_qpdf{false};
286
    bool saw_qpdf_meta{false};
287
    bool saw_objects{false};
288
    bool saw_json_version{false};
289
    bool saw_pdf_version{false};
290
    bool saw_trailer{false};
291
    std::string cur_object;
292
    bool saw_value{false};
293
    bool saw_stream{false};
294
    bool saw_dict{false};
295
    bool saw_data{false};
296
    bool saw_datafile{false};
297
    bool this_stream_needs_data{false};
298
    std::vector<StackFrame> stack;
299
    QPDFObjectHandle next_obj;
300
    state_e next_state{st_top};
301
};
302
303
void
304
QPDF::JSONReactor::error(qpdf_offset_t offset, std::string const& msg)
305
169k
{
306
169k
    errors = true;
307
169k
    std::string object = this->cur_object;
308
169k
    if (is->getName() != pdf.getFilename()) {
309
0
        object += " from " + is->getName();
310
0
    }
311
169k
    pdf.warn(qpdf_e_json, object, offset, msg);
312
169k
}
313
314
bool
315
QPDF::JSONReactor::anyErrors() const
316
119
{
317
119
    return errors;
318
119
}
319
320
void
321
QPDF::JSONReactor::containerStart()
322
168k
{
323
168k
    if (next_obj) {
324
98.7k
        stack.emplace_back(next_state, std::move(next_obj));
325
98.7k
        next_obj = QPDFObjectHandle();
326
98.7k
    } else {
327
69.7k
        stack.emplace_back(next_state);
328
69.7k
    }
329
168k
}
330
331
void
332
QPDF::JSONReactor::dictionaryStart()
333
124k
{
334
124k
    containerStart();
335
124k
}
336
337
void
338
QPDF::JSONReactor::arrayStart()
339
45.0k
{
340
45.0k
    if (stack.empty()) {
341
729
        QTC::TC("qpdf", "QPDF_json top-level array");
342
729
        throw std::runtime_error("QPDF JSON must be a dictionary");
343
729
    }
344
44.2k
    containerStart();
345
44.2k
}
346
347
void
348
QPDF::JSONReactor::containerEnd(JSON const& value)
349
66.0k
{
350
66.0k
    auto from_state = stack.back().state;
351
66.0k
    stack.pop_back();
352
66.0k
    if (stack.empty()) {
353
143
        if (!this->saw_qpdf) {
354
37
            QTC::TC("qpdf", "QPDF_json missing qpdf");
355
37
            error(0, "\"qpdf\" object was not seen");
356
106
        } else {
357
106
            if (!this->saw_json_version) {
358
88
                QTC::TC("qpdf", "QPDF_json missing json version");
359
88
                error(0, "\"qpdf[0].jsonversion\" was not seen");
360
88
            }
361
106
            if (must_be_complete && !this->saw_pdf_version) {
362
86
                QTC::TC("qpdf", "QPDF_json missing pdf version");
363
86
                error(0, "\"qpdf[0].pdfversion\" was not seen");
364
86
            }
365
106
            if (!this->saw_objects) {
366
17
                QTC::TC("qpdf", "QPDF_json missing objects");
367
17
                error(0, "\"qpdf[1]\" was not seen");
368
89
            } else {
369
89
                if (must_be_complete && !this->saw_trailer) {
370
67
                    QTC::TC("qpdf", "QPDF_json missing trailer");
371
67
                    error(0, "\"qpdf[1].trailer\" was not seen");
372
67
                }
373
89
            }
374
106
        }
375
65.8k
    } else if (from_state == st_trailer) {
376
1.79k
        if (!saw_value) {
377
1.28k
            QTC::TC("qpdf", "QPDF_json trailer no value");
378
1.28k
            error(value.getStart(), "\"trailer\" is missing \"value\"");
379
1.28k
        }
380
64.0k
    } else if (from_state == st_object_top) {
381
20.2k
        if (saw_value == saw_stream) {
382
2.36k
            QTC::TC("qpdf", "QPDF_json value stream both or neither");
383
2.36k
            error(value.getStart(), "object must have exactly one of \"value\" or \"stream\"");
384
2.36k
        }
385
20.2k
        if (saw_stream) {
386
9.76k
            if (!saw_dict) {
387
5.82k
                QTC::TC("qpdf", "QPDF_json stream no dict");
388
5.82k
                error(value.getStart(), "\"stream\" is missing \"dict\"");
389
5.82k
            }
390
9.76k
            if (saw_data == saw_datafile) {
391
5.09k
                if (this_stream_needs_data) {
392
2.33k
                    QTC::TC("qpdf", "QPDF_json data datafile both or neither");
393
2.33k
                    error(
394
2.33k
                        value.getStart(),
395
2.33k
                        "new \"stream\" must have exactly one of \"data\" or \"datafile\"");
396
2.76k
                } else if (saw_datafile) {
397
875
                    QTC::TC("qpdf", "QPDF_json data and datafile");
398
875
                    error(
399
875
                        value.getStart(),
400
875
                        "existing \"stream\" may at most one of \"data\" or \"datafile\"");
401
1.89k
                } else {
402
1.89k
                    QTC::TC("qpdf", "QPDF_json no stream data in update mode");
403
1.89k
                }
404
5.09k
            }
405
9.76k
        }
406
20.2k
    }
407
66.0k
    if (!stack.empty()) {
408
65.8k
        auto state = stack.back().state;
409
65.8k
        if (state == st_objects) {
410
26.4k
            this->cur_object = "";
411
26.4k
            this->saw_dict = false;
412
26.4k
            this->saw_data = false;
413
26.4k
            this->saw_datafile = false;
414
26.4k
            this->saw_value = false;
415
26.4k
            this->saw_stream = false;
416
26.4k
        }
417
65.8k
    }
418
66.0k
}
419
420
void
421
QPDF::JSONReactor::replaceObject(QPDFObjectHandle&& replacement, JSON const& value)
422
39.7k
{
423
39.7k
    auto& tos = stack.back();
424
39.7k
    auto og = tos.object.getObjGen();
425
39.7k
    if (replacement.isIndirect() && !(replacement.isStream() && replacement.getObjGen() == og)) {
426
1.31k
        error(
427
1.31k
            replacement.offset(), "the value of an object may not be an indirect object reference");
428
1.31k
        return;
429
1.31k
    }
430
38.3k
    pdf.replaceObject(og, replacement);
431
38.3k
    next_obj = pdf.getObject(og);
432
38.3k
    setObjectDescription(tos.object, value);
433
38.3k
}
434
435
void
436
QPDF::JSONReactor::topLevelScalar()
437
120
{
438
120
    QTC::TC("qpdf", "QPDF_json top-level scalar");
439
120
    throw std::runtime_error("QPDF JSON must be a dictionary");
440
120
}
441
442
bool
443
QPDF::JSONReactor::setNextStateIfDictionary(std::string const& key, JSON const& value, state_e next)
444
78.5k
{
445
    // Use this method when the next state is for processing a nested dictionary.
446
78.5k
    if (value.isDictionary()) {
447
61.9k
        this->next_state = next;
448
61.9k
        return true;
449
61.9k
    }
450
16.6k
    error(value.getStart(), "\"" + key + "\" must be a dictionary");
451
16.6k
    return false;
452
78.5k
}
453
454
bool
455
QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
456
403k
{
457
403k
    if (stack.empty()) {
458
0
        throw std::logic_error("stack is empty in dictionaryItem");
459
0
    }
460
403k
    next_state = st_ignore;
461
403k
    auto state = stack.back().state;
462
403k
    if (state == st_ignore) {
463
13.0k
        return true; // ignore
464
13.0k
    }
465
390k
    if (state == st_top) {
466
26.6k
        if (key == "qpdf") {
467
18.0k
            saw_qpdf = true;
468
18.0k
            if (!value.isArray()) {
469
6.60k
                error(value.getStart(), "\"qpdf\" must be an array");
470
11.4k
            } else {
471
11.4k
                next_state = st_qpdf;
472
11.4k
            }
473
18.0k
            return true;
474
18.0k
        }
475
8.54k
        return true; // Ignore all other fields.
476
26.6k
    }
477
478
363k
    if (state == st_qpdf_meta) {
479
25.8k
        if (key == "pdfversion") {
480
10.7k
            saw_pdf_version = true;
481
10.7k
            std::string v;
482
10.7k
            if (value.getString(v)) {
483
4.89k
                std::string version;
484
4.89k
                char const* p = v.c_str();
485
4.89k
                if (objects.validatePDFVersion(p, version) && *p == '\0') {
486
647
                    pdf.m->pdf_version = version;
487
647
                    return true;
488
647
                }
489
4.89k
            }
490
10.0k
            error(value.getStart(), "invalid PDF version (must be \"x.y\")");
491
10.0k
            return true;
492
10.7k
        }
493
15.1k
        if (key == "jsonversion") {
494
4.49k
            saw_json_version = true;
495
4.49k
            std::string v;
496
4.49k
            if (value.getNumber(v)) {
497
4.09k
                std::string version;
498
4.09k
                if (QUtil::string_to_int(v.c_str()) == 2) {
499
481
                    return true;
500
481
                }
501
4.09k
            }
502
4.01k
            error(value.getStart(), "invalid JSON version (must be numeric value 2)");
503
4.01k
            return true;
504
4.49k
        }
505
10.6k
        if (key == "pushedinheritedpageresources") {
506
1.21k
            bool v;
507
1.21k
            if (value.getBool(v)) {
508
427
                if (!must_be_complete && v) {
509
0
                    pdf.pushInheritedAttributesToPage();
510
0
                }
511
427
                return true;
512
427
            }
513
787
            error(value.getStart(), "pushedinheritedpageresources must be a boolean");
514
787
            return true;
515
1.21k
        }
516
9.47k
        if (key == "calledgetallpages") {
517
1.84k
            bool v;
518
1.84k
            if (value.getBool(v)) {
519
429
                if (!must_be_complete && v) {
520
0
                    (void)pdf.doc().pages().all();
521
0
                }
522
429
                return true;
523
429
            }
524
1.41k
            error(value.getStart(), "calledgetallpages must be a boolean");
525
1.41k
            return true;
526
1.84k
        }
527
        // ignore unknown keys for forward compatibility and to skip keys we don't care about
528
        // like "maxobjectid".
529
7.62k
        return true;
530
9.47k
    }
531
532
337k
    if (state == st_objects) {
533
67.0k
        if (key == "trailer") {
534
5.81k
            saw_trailer = true;
535
5.81k
            cur_object = "trailer";
536
5.81k
            setNextStateIfDictionary(key, value, st_trailer);
537
5.81k
            return true;
538
5.81k
        }
539
540
61.1k
        int obj = 0;
541
61.1k
        int gen = 0;
542
61.1k
        if (is_obj_key(key, obj, gen)) {
543
29.9k
            cur_object = key;
544
29.9k
            if (setNextStateIfDictionary(key, value, st_object_top)) {
545
26.8k
                next_obj = objects.getObjectForJSON(obj, gen);
546
26.8k
            }
547
29.9k
            return true;
548
29.9k
        }
549
31.2k
        error(value.getStart(), "object key should be \"trailer\" or \"obj:n n R\"");
550
31.2k
        return true;
551
61.1k
    }
552
553
270k
    if (state == st_object_top) {
554
51.1k
        util::assertion(!stack.empty(), "QPDF_json: stack empty in st_object_top");
555
51.1k
        auto& tos = stack.back();
556
51.1k
        util::assertion(!!tos.object, "current object uninitialized in st_object_top");
557
51.1k
        if (key == "value") {
558
            // Don't use setNextStateIfDictionary since this can have any type.
559
33.4k
            saw_value = true;
560
33.4k
            replaceObject(makeObject(value), value);
561
33.4k
            next_state = st_object;
562
33.4k
            return true;
563
33.4k
        }
564
17.6k
        if (key == "stream") {
565
11.7k
            saw_stream = true;
566
11.7k
            if (setNextStateIfDictionary(key, value, st_stream)) {
567
10.8k
                this_stream_needs_data = false;
568
10.8k
                if (tos.object.isStream()) {
569
3.92k
                    QTC::TC("qpdf", "QPDF_json updating existing stream");
570
6.92k
                } else {
571
6.92k
                    this_stream_needs_data = true;
572
6.92k
                    replaceObject(
573
6.92k
                        qpdf::Stream(
574
6.92k
                            pdf, tos.object.getObjGen(), QPDFObjectHandle::newDictionary(), 0, 0),
575
6.92k
                        value);
576
6.92k
                }
577
10.8k
                next_obj = tos.object;
578
10.8k
                return true;
579
10.8k
            }
580
861
            return true; // Error message already given above
581
11.7k
        }
582
5.97k
        return true; // Ignore unknown keys for forward compatibility
583
17.6k
    }
584
585
219k
    if (state == st_trailer) {
586
6.84k
        if (key == "value") {
587
4.34k
            saw_value = true;
588
            // The trailer must be a dictionary, so we can use setNextStateIfDictionary.
589
4.34k
            if (setNextStateIfDictionary("trailer.value", value, st_object)) {
590
2.54k
                pdf.m->trailer = makeObject(value);
591
2.54k
                setObjectDescription(pdf.m->trailer, value);
592
2.54k
            }
593
4.34k
            return true;
594
4.34k
        }
595
2.50k
        if (key == "stream") {
596
            // Don't need to set saw_stream here since there's already an error.
597
1.39k
            error(value.getStart(), "the trailer may not be a stream");
598
1.39k
            return true;
599
1.39k
        }
600
1.10k
        return true; // Ignore unknown keys for forward compatibility
601
2.50k
    }
602
603
212k
    if (state == st_stream) {
604
24.5k
        util::assertion(!stack.empty(), "stack empty in st_stream");
605
24.5k
        auto& tos = stack.back();
606
24.5k
        util::assertion(tos.object.isStream(), "current object is not stream in st_stream");
607
24.5k
        if (key == "dict") {
608
5.62k
            saw_dict = true;
609
5.62k
            if (setNextStateIfDictionary("stream.dict", value, st_object)) {
610
3.66k
                tos.object.replaceDict(makeObject(value));
611
3.66k
                return true;
612
3.66k
            }
613
1.96k
            return true; // An error had already been given by setNextStateIfDictionary
614
5.62k
        }
615
18.9k
        if (key == "data") {
616
12.7k
            saw_data = true;
617
12.7k
            std::string v;
618
12.7k
            if (!value.getString(v)) {
619
5.04k
                error(value.getStart(), "\"stream.data\" must be a string");
620
5.04k
                tos.object.replaceStreamData("", {}, {});
621
5.04k
                return true;
622
5.04k
            }
623
            // The range includes the quotes.
624
7.72k
            auto start = value.getStart() + 1;
625
7.72k
            auto end = value.getEnd() - 1;
626
7.72k
            util::assertion(end >= start, "QPDF_json: JSON string length < 0");
627
7.72k
            tos.object.replaceStreamData(provide_data(is, start, end), {}, {});
628
7.72k
            return true;
629
12.7k
        }
630
6.18k
        if (key == "datafile") {
631
3.79k
            saw_datafile = true;
632
3.79k
            std::string filename;
633
3.79k
            if (!value.getString(filename)) {
634
822
                error(
635
822
                    value.getStart(),
636
822
                    "\"stream.datafile\" must be a string containing a file name");
637
822
                tos.object.replaceStreamData("", {}, {});
638
822
                return true;
639
822
            }
640
2.97k
            tos.object.replaceStreamData(QUtil::file_provider(filename), {}, {});
641
2.97k
            return true;
642
3.79k
        }
643
2.38k
        return true; // Ignore unknown keys for forward compatibility.
644
6.18k
    }
645
646
187k
    util::assertion(state == st_object, "QPDF_json: unknown state " + std::to_string(state));
647
187k
    util::assertion(!stack.empty(), "stack empty in st_object");
648
187k
    auto& tos = stack.back();
649
187k
    auto dict = tos.object;
650
187k
    if (dict.isStream()) {
651
0
        dict = dict.getDict();
652
0
    }
653
187k
    util::assertion(
654
187k
        dict.isDictionary(),
655
187k
        "current object is not stream or dictionary in st_object dictionary item");
656
187k
    dict.replaceKey(
657
187k
        is_pdf_name(key) ? QPDFObjectHandle::parse(key.substr(2)).getName() : key,
658
187k
        makeObject(value));
659
187k
    return true;
660
212k
}
661
662
bool
663
QPDF::JSONReactor::arrayItem(JSON const& value)
664
2.42M
{
665
2.42M
    if (stack.empty()) {
666
0
        throw std::logic_error("stack is empty in arrayItem");
667
0
    }
668
2.42M
    next_state = st_ignore;
669
2.42M
    auto state = stack.back().state;
670
2.42M
    if (state == st_qpdf) {
671
36.9k
        if (!this->saw_qpdf_meta) {
672
11.0k
            this->saw_qpdf_meta = true;
673
11.0k
            setNextStateIfDictionary("qpdf[0]", value, st_qpdf_meta);
674
25.8k
        } else if (!this->saw_objects) {
675
10.0k
            this->saw_objects = true;
676
10.0k
            setNextStateIfDictionary("qpdf[1]", value, st_objects);
677
15.7k
        } else {
678
15.7k
            QTC::TC("qpdf", "QPDF_json more than two qpdf elements");
679
15.7k
            error(value.getStart(), "\"qpdf\" must have two elements");
680
15.7k
        }
681
2.39M
    } else if (state == st_object) {
682
2.37M
        stack.back().object.appendItem(makeObject(value));
683
2.37M
    }
684
2.42M
    return true;
685
2.42M
}
686
687
void
688
QPDF::JSONReactor::setObjectDescription(QPDFObjectHandle& oh, JSON const& value)
689
1.85M
{
690
1.85M
    auto j_descr = std::get<QPDFObject::JSON_Descr>(*descr);
691
1.85M
    if (j_descr.object != cur_object) {
692
21.4k
        descr = std::make_shared<QPDFObject::Description>(
693
21.4k
            QPDFObject::JSON_Descr(j_descr.input, cur_object));
694
21.4k
    }
695
696
1.85M
    oh.obj_sp()->setDescription(&pdf, descr, value.getStart());
697
1.85M
}
698
699
QPDFObjectHandle
700
QPDF::JSONReactor::makeObject(JSON const& value)
701
2.60M
{
702
2.60M
    QPDFObjectHandle result;
703
2.60M
    std::string str_v;
704
2.60M
    bool bool_v = false;
705
2.60M
    if (value.isDictionary()) {
706
35.5k
        result = QPDFObjectHandle::newDictionary();
707
35.5k
        next_obj = result;
708
35.5k
        next_state = st_object;
709
2.57M
    } else if (value.isArray()) {
710
24.5k
        result = QPDFObjectHandle::newArray();
711
24.5k
        next_obj = result;
712
24.5k
        next_state = st_object;
713
2.54M
    } else if (value.isNull()) {
714
952
        result = QPDFObjectHandle::newNull();
715
2.54M
    } else if (value.getBool(bool_v)) {
716
1.73k
        result = QPDFObjectHandle::newBool(bool_v);
717
2.54M
    } else if (value.getNumber(str_v)) {
718
1.65M
        if (QUtil::is_long_long(str_v.c_str())) {
719
1.64M
            result = QPDFObjectHandle::newInteger(QUtil::string_to_ll(str_v.c_str()));
720
1.64M
        } else {
721
            // JSON allows scientific notation, but PDF does not.
722
10.2k
            if (str_v.find('e') != std::string::npos || str_v.find('E') != std::string::npos) {
723
6.52k
                try {
724
6.52k
                    auto v = std::stod(str_v);
725
6.52k
                    str_v = QUtil::double_to_string(v);
726
6.52k
                } catch (std::exception&) {
727
                    // Keep it as it was
728
446
                }
729
6.52k
            }
730
10.2k
            result = QPDFObjectHandle::newReal(str_v);
731
10.2k
        }
732
1.65M
    } else if (value.getString(str_v)) {
733
888k
        int obj = 0;
734
888k
        int gen = 0;
735
888k
        std::string str;
736
888k
        if (is_indirect_object(str_v, obj, gen)) {
737
779k
            result = objects.getObjectForJSON(obj, gen);
738
779k
        } else if (is_unicode_string(str_v, str)) {
739
11.4k
            result = QPDFObjectHandle::newUnicodeString(str);
740
96.8k
        } else if (is_binary_string(str_v, str)) {
741
2.21k
            result = QPDFObjectHandle::newString(QUtil::hex_decode(str));
742
94.6k
        } else if (is_name(str_v)) {
743
17.5k
            result = QPDFObjectHandle::newName(str_v);
744
77.1k
        } else if (is_pdf_name(str_v)) {
745
15.9k
            result = QPDFObjectHandle::parse(str_v.substr(2));
746
61.2k
        } else {
747
61.2k
            QTC::TC("qpdf", "QPDF_json unrecognized string value");
748
61.2k
            error(value.getStart(), "unrecognized string value");
749
61.2k
            result = QPDFObjectHandle::newNull();
750
61.2k
        }
751
888k
    }
752
2.60M
    if (!result) {
753
0
        throw std::logic_error("JSONReactor::makeObject didn't initialize the object");
754
0
    }
755
756
2.60M
    if (!result.hasObjectDescription()) {
757
1.81M
        setObjectDescription(result, value);
758
1.81M
    }
759
2.60M
    return result;
760
2.60M
}
761
762
void
763
QPDF::createFromJSON(std::string const& json_file)
764
0
{
765
0
    createFromJSON(std::make_shared<FileInputSource>(json_file.c_str()));
766
0
}
767
768
void
769
QPDF::createFromJSON(std::shared_ptr<InputSource> is)
770
15.4k
{
771
15.4k
    processMemoryFile(is->getName().c_str(), JSON_PDF, strlen(JSON_PDF));
772
15.4k
    importJSON(is, true);
773
15.4k
}
774
775
void
776
QPDF::updateFromJSON(std::string const& json_file)
777
0
{
778
0
    updateFromJSON(std::make_shared<FileInputSource>(json_file.c_str()));
779
0
}
780
781
void
782
QPDF::updateFromJSON(std::shared_ptr<InputSource> is)
783
0
{
784
0
    importJSON(is, false);
785
0
}
786
787
void
788
QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete)
789
15.4k
{
790
15.4k
    JSONReactor reactor(*this, is, must_be_complete);
791
15.4k
    try {
792
15.4k
        JSON::parse(*is, &reactor);
793
15.4k
    } catch (std::runtime_error& e) {
794
15.3k
        throw std::runtime_error(is->getName() + ": " + e.what());
795
15.3k
    }
796
119
    if (reactor.anyErrors()) {
797
105
        throw std::runtime_error(is->getName() + ": errors found in JSON");
798
105
    }
799
119
}
800
801
void
802
writeJSONStreamFile(
803
    int version,
804
    JSON::Writer& jw,
805
    qpdf::Stream& stream,
806
    int id,
807
    qpdf_stream_decode_level_e decode_level,
808
    std::string const& file_prefix)
809
0
{
810
0
    auto filename = file_prefix + "-" + std::to_string(id);
811
0
    auto* f = QUtil::safe_fopen(filename.c_str(), "wb");
812
0
    Pl_StdioFile f_pl{"stream data", f};
813
0
    stream.writeStreamJSON(version, jw, qpdf_sj_file, decode_level, &f_pl, filename);
814
0
    f_pl.finish();
815
0
    fclose(f);
816
0
}
817
818
void
819
QPDF::writeJSON(
820
    int version,
821
    Pipeline* p,
822
    qpdf_stream_decode_level_e decode_level,
823
    qpdf_json_stream_data_e json_stream_data,
824
    std::string const& file_prefix,
825
    std::set<std::string> wanted_objects)
826
0
{
827
0
    bool first = true;
828
0
    writeJSON(version, p, true, first, decode_level, json_stream_data, file_prefix, wanted_objects);
829
0
}
830
831
void
832
QPDF::writeJSON(
833
    int version,
834
    Pipeline* p,
835
    bool complete,
836
    bool& first_key,
837
    qpdf_stream_decode_level_e decode_level,
838
    qpdf_json_stream_data_e json_stream_data,
839
    std::string const& file_prefix,
840
    std::set<std::string> wanted_objects)
841
0
{
842
0
    if (version != 2) {
843
0
        throw std::runtime_error("QPDF::writeJSON: only version 2 is supported");
844
0
    }
845
0
    JSON::Writer jw{p, 4};
846
0
    if (complete) {
847
0
        jw << "{";
848
0
    } else if (!first_key) {
849
0
        jw << ",";
850
0
    }
851
0
    first_key = false;
852
853
    /* clang-format off */
854
0
    jw << "\n"
855
0
          "  \"qpdf\": [\n"
856
0
          "    {\n"
857
0
          "      \"jsonversion\": " << std::to_string(version) << ",\n"
858
0
          "      \"pdfversion\": \"" << getPDFVersion() << "\",\n"
859
0
          "      \"pushedinheritedpageresources\": " <<  (everPushedInheritedAttributesToPages() ? "true" : "false") << ",\n"
860
0
          "      \"calledgetallpages\": " <<  (everCalledGetAllPages() ? "true" : "false") << ",\n"
861
0
          "      \"maxobjectid\": " <<  std::to_string(getObjectCount()) << "\n"
862
0
          "    },\n"
863
0
          "    {";
864
    /* clang-format on */
865
866
0
    bool all_objects = wanted_objects.empty();
867
0
    bool first = true;
868
0
    for (auto& obj: getAllObjects()) {
869
0
        auto const og = obj.getObjGen();
870
0
        std::string key = "obj:" + og.unparse(' ') + " R";
871
0
        if (all_objects || wanted_objects.contains(key)) {
872
0
            if (first) {
873
0
                jw << "\n      \"" << key;
874
0
                first = false;
875
0
            } else {
876
0
                jw << "\n      },\n      \"" << key;
877
0
            }
878
0
            if (Stream stream = obj) {
879
0
                jw << "\": {\n        \"stream\": ";
880
0
                if (json_stream_data == qpdf_sj_file) {
881
0
                    writeJSONStreamFile(
882
0
                        version, jw, stream, og.getObj(), decode_level, file_prefix);
883
0
                } else {
884
0
                    stream.writeStreamJSON(
885
0
                        version, jw, json_stream_data, decode_level, nullptr, "");
886
0
                }
887
0
            } else {
888
0
                jw << "\": {\n        \"value\": ";
889
0
                obj.writeJSON(version, jw, true);
890
0
            }
891
0
        }
892
0
    }
893
0
    if (all_objects || wanted_objects.contains("trailer")) {
894
0
        if (!first) {
895
0
            jw << "\n      },";
896
0
        }
897
0
        jw << "\n      \"trailer\": {\n        \"value\": ";
898
0
        getTrailer().writeJSON(version, jw, true);
899
0
        first = false;
900
0
    }
901
0
    if (!first) {
902
0
        jw << "\n      }";
903
0
    }
904
    /* clang-format off */
905
0
    jw << "\n"
906
0
          "    }\n"
907
0
          "  ]";
908
    /* clang-format on */
909
0
    if (complete) {
910
0
        jw << "\n}\n";
911
0
        p->finish();
912
0
    }
913
0
}