Coverage Report

Created: 2026-01-16 06:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDF_json.cc
Line
Count
Source
1
#include <qpdf/QPDF.hh>
2
3
#include <qpdf/FileInputSource.hh>
4
#include <qpdf/InputSource_private.hh>
5
#include <qpdf/JSON_writer.hh>
6
#include <qpdf/Pl_Base64.hh>
7
#include <qpdf/Pl_StdioFile.hh>
8
#include <qpdf/QIntC.hh>
9
#include <qpdf/QPDFObjectHandle_private.hh>
10
#include <qpdf/QPDFObject_private.hh>
11
#include <qpdf/QTC.hh>
12
#include <qpdf/QUtil.hh>
13
#include <qpdf/Util.hh>
14
15
#include <algorithm>
16
#include <cstring>
17
18
using namespace qpdf;
19
20
// This chart shows an example of the state transitions that would occur in parsing a minimal file.
21
22
//                                |
23
// {                              |   -> st_top
24
//   "qpdf": [                    |   -> st_qpdf
25
//     {                          |   -> st_qpdf_meta
26
//       ...                      |   ...
27
//     },                         |   ...
28
//     {                          |   -> st_objects
29
//       "obj:1 0 R": {           |   -> st_object_top
30
//         "value": {             |   -> st_object
31
//           "/Pages": "2 0 R",   |   ...
32
//           "/Type": "/Catalog"  |   ...
33
//         }                      |   <- st_object_top
34
//       },                       |   <- st_objects
35
//       "obj:2 0 R": {           |   -> st_object_top
36
//         "value": 12            |   -> st_object
37
//         }                      |   <- st_object_top
38
//       },                       |   <- st_objects
39
//       "obj:4 0 R": {           |   -> st_object_top
40
//         "stream": {            |   -> st_stream
41
//           "data": "cG90YXRv",  |   ...
42
//           "dict": {            |   -> st_object
43
//             "/K": true         |   ...
44
//           }                    |   <- st_stream
45
//         }                      |   <- st_object_top
46
//       },                       |   <- st_objects
47
//       "trailer": {             |   -> st_trailer
48
//         "value": {             |   -> st_object
49
//           "/Root": "1 0 R",    |   ...
50
//           "/Size": 7           |   ...
51
//         }                      |   <- st_trailer
52
//       }                        |   <- st_objects
53
//     }                          |   <- st_qpdf
54
//   ]                            |   <- st_top
55
// }                              |
56
57
static char const* JSON_PDF = (
58
    // force line break
59
    "%PDF-1.3\n"
60
    "xref\n"
61
    "0 1\n"
62
    "0000000000 65535 f \n"
63
    "trailer << /Size 1 >>\n"
64
    "startxref\n"
65
    "9\n"
66
    "%%EOF\n");
67
68
// Validator methods -- these are much more performant than std::regex.
69
static bool
70
is_indirect_object(std::string const& v, int& obj, int& gen)
71
1.46M
{
72
1.46M
    char const* p = v.c_str();
73
1.46M
    std::string o_str;
74
1.46M
    std::string g_str;
75
1.46M
    if (!util::is_digit(*p)) {
76
101k
        return false;
77
101k
    }
78
3.15M
    while (util::is_digit(*p)) {
79
1.78M
        o_str.append(1, *p++);
80
1.78M
    }
81
1.36M
    if (*p != ' ') {
82
4.85k
        return false;
83
4.85k
    }
84
6.66M
    while (*p == ' ') {
85
5.30M
        ++p;
86
5.30M
    }
87
1.35M
    if (!util::is_digit(*p)) {
88
3.59k
        return false;
89
3.59k
    }
90
18.9M
    while (util::is_digit(*p)) {
91
17.6M
        g_str.append(1, *p++);
92
17.6M
    }
93
1.35M
    if (*p != ' ') {
94
3.34k
        return false;
95
3.34k
    }
96
15.6M
    while (*p == ' ') {
97
14.3M
        ++p;
98
14.3M
    }
99
1.35M
    if (*p++ != 'R') {
100
3.31k
        return false;
101
3.31k
    }
102
1.34M
    if (*p) {
103
2.83k
        return false;
104
2.83k
    }
105
1.34M
    obj = QUtil::string_to_int(o_str.c_str());
106
1.34M
    gen = QUtil::string_to_int(g_str.c_str());
107
1.34M
    return obj > 0;
108
1.34M
}
109
110
static bool
111
is_obj_key(std::string const& v, int& obj, int& gen)
112
59.7k
{
113
59.7k
    if (v.substr(0, 4) != "obj:") {
114
23.7k
        return false;
115
23.7k
    }
116
35.9k
    return is_indirect_object(v.substr(4), obj, gen);
117
59.7k
}
118
119
static bool
120
is_unicode_string(std::string const& v, std::string& str)
121
113k
{
122
113k
    if (v.substr(0, 2) == "u:") {
123
10.6k
        str = v.substr(2);
124
10.6k
        return true;
125
10.6k
    }
126
102k
    return false;
127
113k
}
128
129
static bool
130
is_binary_string(std::string const& v, std::string& str)
131
102k
{
132
102k
    if (v.substr(0, 2) == "b:") {
133
6.14k
        str = v.substr(2);
134
6.14k
        int count = 0;
135
77.0k
        for (char c: str) {
136
77.0k
            if (!util::is_hex_digit(c)) {
137
2.90k
                return false;
138
2.90k
            }
139
74.1k
            ++count;
140
74.1k
        }
141
3.23k
        return (count % 2 == 0);
142
6.14k
    }
143
96.5k
    return false;
144
102k
}
145
146
static bool
147
is_name(std::string const& v)
148
100k
{
149
100k
    return v.starts_with('/');
150
100k
}
151
152
static bool
153
is_pdf_name(std::string const& v)
154
274k
{
155
274k
    return v.starts_with("n:/");
156
274k
}
157
158
bool
159
QPDF::test_json_validators()
160
0
{
161
0
    bool passed = true;
162
0
    auto check_fn = [&passed](char const* msg, bool expr) {
163
0
        if (!expr) {
164
0
            passed = false;
165
0
            std::cerr << msg << '\n';
166
0
        }
167
0
    };
168
0
#define check(expr) check_fn(#expr, expr)
169
170
0
    int obj = 0;
171
0
    int gen = 0;
172
0
    check(!is_indirect_object("", obj, gen));
173
0
    check(!is_indirect_object("12", obj, gen));
174
0
    check(!is_indirect_object("x12 0 R", obj, gen));
175
0
    check(!is_indirect_object("12 0 Rx", obj, gen));
176
0
    check(!is_indirect_object("12 0R", obj, gen));
177
0
    check(is_indirect_object("52 1 R", obj, gen));
178
0
    check(obj == 52);
179
0
    check(gen == 1);
180
0
    check(is_indirect_object("53  20  R", obj, gen));
181
0
    check(obj == 53);
182
0
    check(gen == 20);
183
0
    check(!is_obj_key("", obj, gen));
184
0
    check(!is_obj_key("obj:x", obj, gen));
185
0
    check(!is_obj_key("obj:x", obj, gen));
186
0
    check(is_obj_key("obj:12 13 R", obj, gen));
187
0
    check(obj == 12);
188
0
    check(gen == 13);
189
0
    std::string str;
190
0
    check(!is_unicode_string("", str));
191
0
    check(!is_unicode_string("xyz", str));
192
0
    check(!is_unicode_string("x:", str));
193
0
    check(is_unicode_string("u:potato", str));
194
0
    check(str == "potato");
195
0
    check(is_unicode_string("u:", str));
196
0
    check(str.empty());
197
0
    check(!is_binary_string("", str));
198
0
    check(!is_binary_string("x:", str));
199
0
    check(!is_binary_string("b:1", str));
200
0
    check(!is_binary_string("b:123", str));
201
0
    check(!is_binary_string("b:gh", str));
202
0
    check(is_binary_string("b:", str));
203
0
    check(is_binary_string("b:12", str));
204
0
    check(is_binary_string("b:123aBC", str));
205
0
    check(!is_name(""));
206
0
    check(is_name("/"));
207
0
    check(!is_name("xyz"));
208
0
    check(is_name("/Potato"));
209
0
    check(is_name("/Potato Salad"));
210
0
    check(!is_pdf_name("n:"));
211
0
    check(is_pdf_name("n:/"));
212
0
    check(!is_pdf_name("n:xyz"));
213
0
    check(is_pdf_name("n:/Potato"));
214
0
    check(is_pdf_name("n:/Potato Salad"));
215
216
0
    return passed;
217
0
#undef check_arg
218
0
}
219
220
static std::function<void(Pipeline*)>
221
provide_data(std::shared_ptr<InputSource> is, qpdf_offset_t start, qpdf_offset_t end)
222
8.19k
{
223
8.19k
    return [is, start, end](Pipeline* p) {
224
0
        auto data = is->read(QIntC::to_size(end - start), start);
225
0
        data = Pl_Base64::decode(data);
226
0
        p->write(reinterpret_cast<const unsigned char*>(data.data()), data.size());
227
0
        p->finish();
228
0
    };
229
8.19k
}
230
231
class QPDF::JSONReactor: public JSON::Reactor
232
{
233
  public:
234
    JSONReactor(QPDF& pdf, std::shared_ptr<InputSource> is, bool must_be_complete) :
235
15.7k
        pdf(pdf),
236
15.7k
        is(is),
237
15.7k
        must_be_complete(must_be_complete),
238
        descr(
239
15.7k
            std::make_shared<QPDFObject::Description>(
240
15.7k
                QPDFObject::JSON_Descr(std::make_shared<std::string>(is->getName()), "")))
241
15.7k
    {
242
15.7k
    }
243
15.7k
    ~JSONReactor() override = default;
244
    void dictionaryStart() override;
245
    void arrayStart() override;
246
    void containerEnd(JSON const& value) override;
247
    void topLevelScalar() override;
248
    bool dictionaryItem(std::string const& key, JSON const& value) override;
249
    bool arrayItem(JSON const& value) override;
250
251
    bool anyErrors() const;
252
253
  private:
254
    enum state_e {
255
        st_top,
256
        st_qpdf,
257
        st_qpdf_meta,
258
        st_objects,
259
        st_trailer,
260
        st_object_top,
261
        st_stream,
262
        st_object,
263
        st_ignore,
264
    };
265
266
    struct StackFrame
267
    {
268
        StackFrame(state_e state) :
269
68.1k
            state(state) {};
270
        StackFrame(state_e state, QPDFObjectHandle&& object) :
271
96.9k
            state(state),
272
96.9k
            object(object) {};
273
        state_e state;
274
        QPDFObjectHandle object;
275
    };
276
277
    void containerStart();
278
    bool setNextStateIfDictionary(std::string const& key, JSON const& value, state_e);
279
    void setObjectDescription(QPDFObjectHandle& oh, JSON const& value);
280
    QPDFObjectHandle makeObject(JSON const& value);
281
    void error(qpdf_offset_t offset, std::string const& message);
282
    void replaceObject(QPDFObjectHandle&& replacement, JSON const& value);
283
284
    QPDF& pdf;
285
    QPDF::Doc::Objects& objects = pdf.m->objects;
286
    std::shared_ptr<InputSource> is;
287
    bool must_be_complete{true};
288
    std::shared_ptr<QPDFObject::Description> descr;
289
    bool errors{false};
290
    bool saw_qpdf{false};
291
    bool saw_qpdf_meta{false};
292
    bool saw_objects{false};
293
    bool saw_json_version{false};
294
    bool saw_pdf_version{false};
295
    bool saw_trailer{false};
296
    std::string cur_object;
297
    bool saw_value{false};
298
    bool saw_stream{false};
299
    bool saw_dict{false};
300
    bool saw_data{false};
301
    bool saw_datafile{false};
302
    bool this_stream_needs_data{false};
303
    std::vector<StackFrame> stack;
304
    QPDFObjectHandle next_obj;
305
    state_e next_state{st_top};
306
};
307
308
void
309
QPDF::JSONReactor::error(qpdf_offset_t offset, std::string const& msg)
310
170k
{
311
170k
    errors = true;
312
170k
    std::string object = this->cur_object;
313
170k
    if (is->getName() != pdf.getFilename()) {
314
0
        object += " from " + is->getName();
315
0
    }
316
170k
    pdf.warn(qpdf_e_json, object, offset, msg);
317
170k
}
318
319
bool
320
QPDF::JSONReactor::anyErrors() const
321
119
{
322
119
    return errors;
323
119
}
324
325
void
326
QPDF::JSONReactor::containerStart()
327
165k
{
328
165k
    if (next_obj) {
329
96.9k
        stack.emplace_back(next_state, std::move(next_obj));
330
96.9k
        next_obj = QPDFObjectHandle();
331
96.9k
    } else {
332
68.1k
        stack.emplace_back(next_state);
333
68.1k
    }
334
165k
}
335
336
void
337
QPDF::JSONReactor::dictionaryStart()
338
120k
{
339
120k
    containerStart();
340
120k
}
341
342
void
343
QPDF::JSONReactor::arrayStart()
344
44.9k
{
345
44.9k
    if (stack.empty()) {
346
705
        QTC::TC("qpdf", "QPDF_json top-level array");
347
705
        throw std::runtime_error("QPDF JSON must be a dictionary");
348
705
    }
349
44.2k
    containerStart();
350
44.2k
}
351
352
void
353
QPDF::JSONReactor::containerEnd(JSON const& value)
354
63.3k
{
355
63.3k
    auto from_state = stack.back().state;
356
63.3k
    stack.pop_back();
357
63.3k
    if (stack.empty()) {
358
144
        if (!this->saw_qpdf) {
359
38
            QTC::TC("qpdf", "QPDF_json missing qpdf");
360
38
            error(0, "\"qpdf\" object was not seen");
361
106
        } else {
362
106
            if (!this->saw_json_version) {
363
92
                QTC::TC("qpdf", "QPDF_json missing json version");
364
92
                error(0, "\"qpdf[0].jsonversion\" was not seen");
365
92
            }
366
106
            if (must_be_complete && !this->saw_pdf_version) {
367
89
                QTC::TC("qpdf", "QPDF_json missing pdf version");
368
89
                error(0, "\"qpdf[0].pdfversion\" was not seen");
369
89
            }
370
106
            if (!this->saw_objects) {
371
17
                QTC::TC("qpdf", "QPDF_json missing objects");
372
17
                error(0, "\"qpdf[1]\" was not seen");
373
89
            } else {
374
89
                if (must_be_complete && !this->saw_trailer) {
375
71
                    QTC::TC("qpdf", "QPDF_json missing trailer");
376
71
                    error(0, "\"qpdf[1].trailer\" was not seen");
377
71
                }
378
89
            }
379
106
        }
380
63.2k
    } else if (from_state == st_trailer) {
381
1.52k
        if (!saw_value) {
382
1.00k
            QTC::TC("qpdf", "QPDF_json trailer no value");
383
1.00k
            error(value.getStart(), "\"trailer\" is missing \"value\"");
384
1.00k
        }
385
61.6k
    } else if (from_state == st_object_top) {
386
20.2k
        if (saw_value == saw_stream) {
387
2.10k
            QTC::TC("qpdf", "QPDF_json value stream both or neither");
388
2.10k
            error(value.getStart(), "object must have exactly one of \"value\" or \"stream\"");
389
2.10k
        }
390
20.2k
        if (saw_stream) {
391
9.44k
            if (!saw_dict) {
392
5.02k
                QTC::TC("qpdf", "QPDF_json stream no dict");
393
5.02k
                error(value.getStart(), "\"stream\" is missing \"dict\"");
394
5.02k
            }
395
9.44k
            if (saw_data == saw_datafile) {
396
4.28k
                if (this_stream_needs_data) {
397
2.19k
                    QTC::TC("qpdf", "QPDF_json data datafile both or neither");
398
2.19k
                    error(
399
2.19k
                        value.getStart(),
400
2.19k
                        "new \"stream\" must have exactly one of \"data\" or \"datafile\"");
401
2.19k
                } else if (saw_datafile) {
402
700
                    QTC::TC("qpdf", "QPDF_json data and datafile");
403
700
                    error(
404
700
                        value.getStart(),
405
700
                        "existing \"stream\" may at most one of \"data\" or \"datafile\"");
406
1.39k
                } else {
407
1.39k
                    QTC::TC("qpdf", "QPDF_json no stream data in update mode");
408
1.39k
                }
409
4.28k
            }
410
9.44k
        }
411
20.2k
    }
412
63.3k
    if (!stack.empty()) {
413
63.1k
        auto state = stack.back().state;
414
63.1k
        if (state == st_objects) {
415
25.0k
            this->cur_object = "";
416
25.0k
            this->saw_dict = false;
417
25.0k
            this->saw_data = false;
418
25.0k
            this->saw_datafile = false;
419
25.0k
            this->saw_value = false;
420
25.0k
            this->saw_stream = false;
421
25.0k
        }
422
63.1k
    }
423
63.3k
}
424
425
void
426
QPDF::JSONReactor::replaceObject(QPDFObjectHandle&& replacement, JSON const& value)
427
38.3k
{
428
38.3k
    auto& tos = stack.back();
429
38.3k
    auto og = tos.object.getObjGen();
430
38.3k
    if (replacement.isIndirect() && !(replacement.isStream() && replacement.getObjGen() == og)) {
431
1.36k
        error(
432
1.36k
            replacement.offset(), "the value of an object may not be an indirect object reference");
433
1.36k
        return;
434
1.36k
    }
435
36.9k
    pdf.replaceObject(og, replacement);
436
36.9k
    next_obj = pdf.getObject(og);
437
36.9k
    setObjectDescription(tos.object, value);
438
36.9k
}
439
440
void
441
QPDF::JSONReactor::topLevelScalar()
442
122
{
443
122
    QTC::TC("qpdf", "QPDF_json top-level scalar");
444
122
    throw std::runtime_error("QPDF JSON must be a dictionary");
445
122
}
446
447
bool
448
QPDF::JSONReactor::setNextStateIfDictionary(std::string const& key, JSON const& value, state_e next)
449
79.3k
{
450
    // Use this method when the next state is for processing a nested dictionary.
451
79.3k
    if (value.isDictionary()) {
452
62.3k
        this->next_state = next;
453
62.3k
        return true;
454
62.3k
    }
455
17.0k
    error(value.getStart(), "\"" + key + "\" must be a dictionary");
456
17.0k
    return false;
457
79.3k
}
458
459
bool
460
QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
461
404k
{
462
404k
    if (stack.empty()) {
463
0
        throw std::logic_error("stack is empty in dictionaryItem");
464
0
    }
465
404k
    next_state = st_ignore;
466
404k
    auto state = stack.back().state;
467
404k
    if (state == st_ignore) {
468
11.9k
        return true; // ignore
469
11.9k
    }
470
392k
    if (state == st_top) {
471
24.3k
        if (key == "qpdf") {
472
16.4k
            saw_qpdf = true;
473
16.4k
            if (!value.isArray()) {
474
4.86k
                error(value.getStart(), "\"qpdf\" must be an array");
475
11.5k
            } else {
476
11.5k
                next_state = st_qpdf;
477
11.5k
            }
478
16.4k
            return true;
479
16.4k
        }
480
7.96k
        return true; // Ignore all other fields.
481
24.3k
    }
482
483
367k
    if (state == st_qpdf_meta) {
484
26.0k
        if (key == "pdfversion") {
485
11.2k
            saw_pdf_version = true;
486
11.2k
            std::string v;
487
11.2k
            if (value.getString(v)) {
488
5.62k
                std::string version;
489
5.62k
                char const* p = v.c_str();
490
5.62k
                if (objects.validatePDFVersion(p, version) && *p == '\0') {
491
1.22k
                    pdf.m->pdf_version = version;
492
1.22k
                    return true;
493
1.22k
                }
494
5.62k
            }
495
10.0k
            error(value.getStart(), "invalid PDF version (must be \"x.y\")");
496
10.0k
            return true;
497
11.2k
        }
498
14.7k
        if (key == "jsonversion") {
499
4.27k
            saw_json_version = true;
500
4.27k
            std::string v;
501
4.27k
            if (value.getNumber(v)) {
502
3.87k
                std::string version;
503
3.87k
                if (QUtil::string_to_int(v.c_str()) == 2) {
504
487
                    return true;
505
487
                }
506
3.87k
            }
507
3.78k
            error(value.getStart(), "invalid JSON version (must be numeric value 2)");
508
3.78k
            return true;
509
4.27k
        }
510
10.5k
        if (key == "pushedinheritedpageresources") {
511
946
            bool v;
512
946
            if (value.getBool(v)) {
513
441
                if (!must_be_complete && v) {
514
0
                    pdf.pushInheritedAttributesToPage();
515
0
                }
516
441
                return true;
517
441
            }
518
505
            error(value.getStart(), "pushedinheritedpageresources must be a boolean");
519
505
            return true;
520
946
        }
521
9.57k
        if (key == "calledgetallpages") {
522
1.94k
            bool v;
523
1.94k
            if (value.getBool(v)) {
524
444
                if (!must_be_complete && v) {
525
0
                    (void)pdf.doc().pages().all();
526
0
                }
527
444
                return true;
528
444
            }
529
1.50k
            error(value.getStart(), "calledgetallpages must be a boolean");
530
1.50k
            return true;
531
1.94k
        }
532
        // ignore unknown keys for forward compatibility and to skip keys we don't care about
533
        // like "maxobjectid".
534
7.62k
        return true;
535
9.57k
    }
536
537
341k
    if (state == st_objects) {
538
65.6k
        if (key == "trailer") {
539
5.87k
            saw_trailer = true;
540
5.87k
            cur_object = "trailer";
541
5.87k
            setNextStateIfDictionary(key, value, st_trailer);
542
5.87k
            return true;
543
5.87k
        }
544
545
59.7k
        int obj = 0;
546
59.7k
        int gen = 0;
547
59.7k
        if (is_obj_key(key, obj, gen)) {
548
29.2k
            cur_object = key;
549
29.2k
            if (setNextStateIfDictionary(key, value, st_object_top)) {
550
26.8k
                next_obj = objects.getObjectForJSON(obj, gen);
551
26.8k
            }
552
29.2k
            return true;
553
29.2k
        }
554
30.5k
        error(value.getStart(), "object key should be \"trailer\" or \"obj:n n R\"");
555
30.5k
        return true;
556
59.7k
    }
557
558
276k
    if (state == st_object_top) {
559
46.9k
        util::assertion(!stack.empty(), "QPDF_json: stack empty in st_object_top");
560
46.9k
        auto& tos = stack.back();
561
46.9k
        util::assertion(!!tos.object, "current object uninitialized in st_object_top");
562
46.9k
        if (key == "value") {
563
            // Don't use setNextStateIfDictionary since this can have any type.
564
31.6k
            saw_value = true;
565
31.6k
            replaceObject(makeObject(value), value);
566
31.6k
            next_state = st_object;
567
31.6k
            return true;
568
31.6k
        }
569
15.3k
        if (key == "stream") {
570
11.3k
            saw_stream = true;
571
11.3k
            if (setNextStateIfDictionary(key, value, st_stream)) {
572
10.3k
                this_stream_needs_data = false;
573
10.3k
                if (tos.object.isStream()) {
574
2.93k
                    QTC::TC("qpdf", "QPDF_json updating existing stream");
575
7.43k
                } else {
576
7.43k
                    this_stream_needs_data = true;
577
7.43k
                    replaceObject(
578
7.43k
                        qpdf::Stream(
579
7.43k
                            pdf, tos.object.getObjGen(), QPDFObjectHandle::newDictionary(), 0, 0),
580
7.43k
                        value);
581
7.43k
                }
582
10.3k
                next_obj = tos.object;
583
10.3k
                return true;
584
10.3k
            }
585
950
            return true; // Error message already given above
586
11.3k
        }
587
4.04k
        return true; // Ignore unknown keys for forward compatibility
588
15.3k
    }
589
590
229k
    if (state == st_trailer) {
591
10.1k
        if (key == "value") {
592
5.50k
            saw_value = true;
593
            // The trailer must be a dictionary, so we can use setNextStateIfDictionary.
594
5.50k
            if (setNextStateIfDictionary("trailer.value", value, st_object)) {
595
2.60k
                pdf.m->trailer = makeObject(value);
596
2.60k
                setObjectDescription(pdf.m->trailer, value);
597
2.60k
            }
598
5.50k
            return true;
599
5.50k
        }
600
4.62k
        if (key == "stream") {
601
            // Don't need to set saw_stream here since there's already an error.
602
1.40k
            error(value.getStart(), "the trailer may not be a stream");
603
1.40k
            return true;
604
1.40k
        }
605
3.22k
        return true; // Ignore unknown keys for forward compatibility
606
4.62k
    }
607
608
219k
    if (state == st_stream) {
609
24.1k
        util::assertion(!stack.empty(), "stack empty in st_stream");
610
24.1k
        auto& tos = stack.back();
611
24.1k
        util::assertion(tos.object.isStream(), "current object is not stream in st_stream");
612
24.1k
        if (key == "dict") {
613
6.16k
            saw_dict = true;
614
6.16k
            if (setNextStateIfDictionary("stream.dict", value, st_object)) {
615
4.48k
                tos.object.replaceDict(makeObject(value));
616
4.48k
                return true;
617
4.48k
            }
618
1.67k
            return true; // An error had already been given by setNextStateIfDictionary
619
6.16k
        }
620
17.9k
        if (key == "data") {
621
12.5k
            saw_data = true;
622
12.5k
            std::string v;
623
12.5k
            if (!value.getString(v)) {
624
4.34k
                error(value.getStart(), "\"stream.data\" must be a string");
625
4.34k
                tos.object.replaceStreamData("", {}, {});
626
4.34k
                return true;
627
4.34k
            }
628
            // The range includes the quotes.
629
8.19k
            auto start = value.getStart() + 1;
630
8.19k
            auto end = value.getEnd() - 1;
631
8.19k
            util::assertion(end >= start, "QPDF_json: JSON string length < 0");
632
8.19k
            tos.object.replaceStreamData(provide_data(is, start, end), {}, {});
633
8.19k
            return true;
634
12.5k
        }
635
5.44k
        if (key == "datafile") {
636
3.59k
            saw_datafile = true;
637
3.59k
            std::string filename;
638
3.59k
            if (!value.getString(filename)) {
639
647
                error(
640
647
                    value.getStart(),
641
647
                    "\"stream.datafile\" must be a string containing a file name");
642
647
                tos.object.replaceStreamData("", {}, {});
643
647
                return true;
644
647
            }
645
2.94k
            tos.object.replaceStreamData(QUtil::file_provider(filename), {}, {});
646
2.94k
            return true;
647
3.59k
        }
648
1.85k
        return true; // Ignore unknown keys for forward compatibility.
649
5.44k
    }
650
651
194k
    util::assertion(state == st_object, "QPDF_json: unknown state " + std::to_string(state));
652
194k
    util::assertion(!stack.empty(), "stack empty in st_object");
653
194k
    auto& tos = stack.back();
654
194k
    auto dict = tos.object;
655
194k
    if (dict.isStream()) {
656
0
        dict = dict.getDict();
657
0
    }
658
194k
    util::assertion(
659
194k
        dict.isDictionary(),
660
194k
        "current object is not stream or dictionary in st_object dictionary item");
661
194k
    dict.replaceKey(
662
194k
        is_pdf_name(key) ? QPDFObjectHandle::parse(key.substr(2)).getName() : key,
663
194k
        makeObject(value));
664
194k
    return true;
665
219k
}
666
667
bool
668
QPDF::JSONReactor::arrayItem(JSON const& value)
669
3.49M
{
670
3.49M
    if (stack.empty()) {
671
0
        throw std::logic_error("stack is empty in arrayItem");
672
0
    }
673
3.49M
    next_state = st_ignore;
674
3.49M
    auto state = stack.back().state;
675
3.49M
    if (state == st_qpdf) {
676
40.5k
        if (!this->saw_qpdf_meta) {
677
11.1k
            this->saw_qpdf_meta = true;
678
11.1k
            setNextStateIfDictionary("qpdf[0]", value, st_qpdf_meta);
679
29.3k
        } else if (!this->saw_objects) {
680
10.0k
            this->saw_objects = true;
681
10.0k
            setNextStateIfDictionary("qpdf[1]", value, st_objects);
682
19.3k
        } else {
683
19.3k
            QTC::TC("qpdf", "QPDF_json more than two qpdf elements");
684
19.3k
            error(value.getStart(), "\"qpdf\" must have two elements");
685
19.3k
        }
686
3.45M
    } else if (state == st_object) {
687
3.44M
        stack.back().object.appendItem(makeObject(value));
688
3.44M
    }
689
3.49M
    return true;
690
3.49M
}
691
692
void
693
QPDF::JSONReactor::setObjectDescription(QPDFObjectHandle& oh, JSON const& value)
694
2.38M
{
695
2.38M
    auto j_descr = std::get<QPDFObject::JSON_Descr>(*descr);
696
2.38M
    if (j_descr.object != cur_object) {
697
22.1k
        descr = std::make_shared<QPDFObject::Description>(
698
22.1k
            QPDFObject::JSON_Descr(j_descr.input, cur_object));
699
22.1k
    }
700
701
2.38M
    oh.obj_sp()->setDescription(&pdf, descr, value.getStart());
702
2.38M
}
703
704
QPDFObjectHandle
705
QPDF::JSONReactor::makeObject(JSON const& value)
706
3.67M
{
707
3.67M
    QPDFObjectHandle result;
708
3.67M
    std::string str_v;
709
3.67M
    bool bool_v = false;
710
3.67M
    if (value.isDictionary()) {
711
34.7k
        result = QPDFObjectHandle::newDictionary();
712
34.7k
        next_obj = result;
713
34.7k
        next_state = st_object;
714
3.64M
    } else if (value.isArray()) {
715
24.2k
        result = QPDFObjectHandle::newArray();
716
24.2k
        next_obj = result;
717
24.2k
        next_state = st_object;
718
3.61M
    } else if (value.isNull()) {
719
818
        result = QPDFObjectHandle::newNull();
720
3.61M
    } else if (value.getBool(bool_v)) {
721
2.04k
        result = QPDFObjectHandle::newBool(bool_v);
722
3.61M
    } else if (value.getNumber(str_v)) {
723
2.18M
        if (QUtil::is_long_long(str_v.c_str())) {
724
2.17M
            result = QPDFObjectHandle::newInteger(QUtil::string_to_ll(str_v.c_str()));
725
2.17M
        } else {
726
            // JSON allows scientific notation, but PDF does not.
727
9.83k
            if (str_v.find('e') != std::string::npos || str_v.find('E') != std::string::npos) {
728
5.32k
                try {
729
5.32k
                    auto v = std::stod(str_v);
730
5.32k
                    str_v = QUtil::double_to_string(v);
731
5.32k
                } catch (std::exception&) {
732
                    // Keep it as it was
733
433
                }
734
5.32k
            }
735
9.83k
            result = QPDFObjectHandle::newReal(str_v);
736
9.83k
        }
737
2.18M
    } else if (value.getString(str_v)) {
738
1.43M
        int obj = 0;
739
1.43M
        int gen = 0;
740
1.43M
        std::string str;
741
1.43M
        if (is_indirect_object(str_v, obj, gen)) {
742
1.31M
            result = objects.getObjectForJSON(obj, gen);
743
1.31M
        } else if (is_unicode_string(str_v, str)) {
744
10.6k
            result = QPDFObjectHandle::newUnicodeString(str);
745
102k
        } else if (is_binary_string(str_v, str)) {
746
1.97k
            result = QPDFObjectHandle::newString(QUtil::hex_decode(str));
747
100k
        } else if (is_name(str_v)) {
748
21.4k
            result = QPDFObjectHandle::newName(str_v);
749
79.2k
        } else if (is_pdf_name(str_v)) {
750
15.3k
            result = QPDFObjectHandle::parse(str_v.substr(2));
751
63.8k
        } else {
752
63.8k
            QTC::TC("qpdf", "QPDF_json unrecognized string value");
753
63.8k
            error(value.getStart(), "unrecognized string value");
754
63.8k
            result = QPDFObjectHandle::newNull();
755
63.8k
        }
756
1.43M
    }
757
3.67M
    if (!result) {
758
0
        throw std::logic_error("JSONReactor::makeObject didn't initialize the object");
759
0
    }
760
761
3.67M
    if (!result.hasObjectDescription()) {
762
2.34M
        setObjectDescription(result, value);
763
2.34M
    }
764
3.67M
    return result;
765
3.67M
}
766
767
void
768
QPDF::createFromJSON(std::string const& json_file)
769
0
{
770
0
    createFromJSON(std::make_shared<FileInputSource>(json_file.c_str()));
771
0
}
772
773
void
774
QPDF::createFromJSON(std::shared_ptr<InputSource> is)
775
15.7k
{
776
15.7k
    processMemoryFile(is->getName().c_str(), JSON_PDF, strlen(JSON_PDF));
777
15.7k
    importJSON(is, true);
778
15.7k
}
779
780
void
781
QPDF::updateFromJSON(std::string const& json_file)
782
0
{
783
0
    updateFromJSON(std::make_shared<FileInputSource>(json_file.c_str()));
784
0
}
785
786
void
787
QPDF::updateFromJSON(std::shared_ptr<InputSource> is)
788
0
{
789
0
    importJSON(is, false);
790
0
}
791
792
void
793
QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete)
794
15.7k
{
795
15.7k
    JSONReactor reactor(*this, is, must_be_complete);
796
15.7k
    try {
797
15.7k
        JSON::parse(*is, &reactor);
798
15.7k
    } catch (std::runtime_error& e) {
799
15.6k
        throw std::runtime_error(is->getName() + ": " + e.what());
800
15.6k
    }
801
119
    if (reactor.anyErrors()) {
802
108
        throw std::runtime_error(is->getName() + ": errors found in JSON");
803
108
    }
804
119
}
805
806
void
807
writeJSONStreamFile(
808
    int version,
809
    JSON::Writer& jw,
810
    qpdf::Stream& stream,
811
    int id,
812
    qpdf_stream_decode_level_e decode_level,
813
    std::string const& file_prefix)
814
0
{
815
0
    auto filename = file_prefix + "-" + std::to_string(id);
816
0
    auto* f = QUtil::safe_fopen(filename.c_str(), "wb");
817
0
    Pl_StdioFile f_pl{"stream data", f};
818
0
    stream.writeStreamJSON(version, jw, qpdf_sj_file, decode_level, &f_pl, filename);
819
0
    f_pl.finish();
820
0
    fclose(f);
821
0
}
822
823
void
824
QPDF::writeJSON(
825
    int version,
826
    Pipeline* p,
827
    qpdf_stream_decode_level_e decode_level,
828
    qpdf_json_stream_data_e json_stream_data,
829
    std::string const& file_prefix,
830
    std::set<std::string> wanted_objects)
831
0
{
832
0
    bool first = true;
833
0
    writeJSON(version, p, true, first, decode_level, json_stream_data, file_prefix, wanted_objects);
834
0
}
835
836
void
837
QPDF::writeJSON(
838
    int version,
839
    Pipeline* p,
840
    bool complete,
841
    bool& first_key,
842
    qpdf_stream_decode_level_e decode_level,
843
    qpdf_json_stream_data_e json_stream_data,
844
    std::string const& file_prefix,
845
    std::set<std::string> wanted_objects)
846
0
{
847
0
    if (version != 2) {
848
0
        throw std::runtime_error("QPDF::writeJSON: only version 2 is supported");
849
0
    }
850
0
    JSON::Writer jw{p, 4};
851
0
    if (complete) {
852
0
        jw << "{";
853
0
    } else if (!first_key) {
854
0
        jw << ",";
855
0
    }
856
0
    first_key = false;
857
858
    /* clang-format off */
859
0
    jw << "\n"
860
0
          "  \"qpdf\": [\n"
861
0
          "    {\n"
862
0
          "      \"jsonversion\": " << std::to_string(version) << ",\n"
863
0
          "      \"pdfversion\": \"" << getPDFVersion() << "\",\n"
864
0
          "      \"pushedinheritedpageresources\": " <<  (everPushedInheritedAttributesToPages() ? "true" : "false") << ",\n"
865
0
          "      \"calledgetallpages\": " <<  (everCalledGetAllPages() ? "true" : "false") << ",\n"
866
0
          "      \"maxobjectid\": " <<  std::to_string(getObjectCount()) << "\n"
867
0
          "    },\n"
868
0
          "    {";
869
    /* clang-format on */
870
871
0
    bool all_objects = wanted_objects.empty();
872
0
    bool first = true;
873
0
    for (auto& obj: getAllObjects()) {
874
0
        auto const og = obj.getObjGen();
875
0
        std::string key = "obj:" + og.unparse(' ') + " R";
876
0
        if (all_objects || wanted_objects.contains(key)) {
877
0
            if (first) {
878
0
                jw << "\n      \"" << key;
879
0
                first = false;
880
0
            } else {
881
0
                jw << "\n      },\n      \"" << key;
882
0
            }
883
0
            if (Stream stream = obj) {
884
0
                jw << "\": {\n        \"stream\": ";
885
0
                if (json_stream_data == qpdf_sj_file) {
886
0
                    writeJSONStreamFile(
887
0
                        version, jw, stream, og.getObj(), decode_level, file_prefix);
888
0
                } else {
889
0
                    stream.writeStreamJSON(
890
0
                        version, jw, json_stream_data, decode_level, nullptr, "");
891
0
                }
892
0
            } else {
893
0
                jw << "\": {\n        \"value\": ";
894
0
                obj.writeJSON(version, jw, true);
895
0
            }
896
0
        }
897
0
    }
898
0
    if (all_objects || wanted_objects.contains("trailer")) {
899
0
        if (!first) {
900
0
            jw << "\n      },";
901
0
        }
902
0
        jw << "\n      \"trailer\": {\n        \"value\": ";
903
0
        getTrailer().writeJSON(version, jw, true);
904
0
        first = false;
905
0
    }
906
0
    if (!first) {
907
0
        jw << "\n      }";
908
0
    }
909
    /* clang-format off */
910
0
    jw << "\n"
911
0
          "    }\n"
912
0
          "  ]";
913
    /* clang-format on */
914
0
    if (complete) {
915
0
        jw << "\n}\n";
916
0
        p->finish();
917
0
    }
918
0
}