Coverage Report

Created: 2025-08-29 06:53

/src/qpdf/libqpdf/QPDF_json.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/QPDF.hh>
2
3
#include <qpdf/FileInputSource.hh>
4
#include <qpdf/InputSource_private.hh>
5
#include <qpdf/JSON_writer.hh>
6
#include <qpdf/Pl_Base64.hh>
7
#include <qpdf/Pl_StdioFile.hh>
8
#include <qpdf/QIntC.hh>
9
#include <qpdf/QPDFObjectHandle_private.hh>
10
#include <qpdf/QPDFObject_private.hh>
11
#include <qpdf/QTC.hh>
12
#include <qpdf/QUtil.hh>
13
#include <qpdf/Util.hh>
14
15
#include <algorithm>
16
#include <cstring>
17
18
using namespace qpdf;
19
20
// This chart shows an example of the state transitions that would occur in parsing a minimal file.
21
22
//                                |
23
// {                              |   -> st_top
24
//   "qpdf": [                    |   -> st_qpdf
25
//     {                          |   -> st_qpdf_meta
26
//       ...                      |   ...
27
//     },                         |   ...
28
//     {                          |   -> st_objects
29
//       "obj:1 0 R": {           |   -> st_object_top
30
//         "value": {             |   -> st_object
31
//           "/Pages": "2 0 R",   |   ...
32
//           "/Type": "/Catalog"  |   ...
33
//         }                      |   <- st_object_top
34
//       },                       |   <- st_objects
35
//       "obj:2 0 R": {           |   -> st_object_top
36
//         "value": 12            |   -> st_object
37
//         }                      |   <- st_object_top
38
//       },                       |   <- st_objects
39
//       "obj:4 0 R": {           |   -> st_object_top
40
//         "stream": {            |   -> st_stream
41
//           "data": "cG90YXRv",  |   ...
42
//           "dict": {            |   -> st_object
43
//             "/K": true         |   ...
44
//           }                    |   <- st_stream
45
//         }                      |   <- st_object_top
46
//       },                       |   <- st_objects
47
//       "trailer": {             |   -> st_trailer
48
//         "value": {             |   -> st_object
49
//           "/Root": "1 0 R",    |   ...
50
//           "/Size": 7           |   ...
51
//         }                      |   <- st_trailer
52
//       }                        |   <- st_objects
53
//     }                          |   <- st_qpdf
54
//   ]                            |   <- st_top
55
// }                              |
56
57
static char const* JSON_PDF = (
58
    // force line break
59
    "%PDF-1.3\n"
60
    "xref\n"
61
    "0 1\n"
62
    "0000000000 65535 f \n"
63
    "trailer << /Size 1 >>\n"
64
    "startxref\n"
65
    "9\n"
66
    "%%EOF\n");
67
68
// Validator methods -- these are much more performant than std::regex.
69
static bool
70
is_indirect_object(std::string const& v, int& obj, int& gen)
71
272k
{
72
272k
    char const* p = v.c_str();
73
272k
    std::string o_str;
74
272k
    std::string g_str;
75
272k
    if (!util::is_digit(*p)) {
76
52.5k
        return false;
77
52.5k
    }
78
474k
    while (util::is_digit(*p)) {
79
253k
        o_str.append(1, *p++);
80
253k
    }
81
220k
    if (*p != ' ') {
82
3.87k
        return false;
83
3.87k
    }
84
434k
    while (*p == ' ') {
85
218k
        ++p;
86
218k
    }
87
216k
    if (!util::is_digit(*p)) {
88
886
        return false;
89
886
    }
90
4.76M
    while (util::is_digit(*p)) {
91
4.54M
        g_str.append(1, *p++);
92
4.54M
    }
93
215k
    if (*p != ' ') {
94
1.93k
        return false;
95
1.93k
    }
96
429k
    while (*p == ' ') {
97
216k
        ++p;
98
216k
    }
99
213k
    if (*p++ != 'R') {
100
761
        return false;
101
761
    }
102
212k
    if (*p) {
103
680
        return false;
104
680
    }
105
211k
    obj = QUtil::string_to_int(o_str.c_str());
106
211k
    gen = QUtil::string_to_int(g_str.c_str());
107
211k
    return obj > 0;
108
212k
}
109
110
static bool
111
is_obj_key(std::string const& v, int& obj, int& gen)
112
37.5k
{
113
37.5k
    if (v.substr(0, 4) != "obj:") {
114
15.1k
        return false;
115
15.1k
    }
116
22.3k
    return is_indirect_object(v.substr(4), obj, gen);
117
37.5k
}
118
119
static bool
120
is_unicode_string(std::string const& v, std::string& str)
121
55.2k
{
122
55.2k
    if (v.substr(0, 2) == "u:") {
123
5.29k
        str = v.substr(2);
124
5.29k
        return true;
125
5.29k
    }
126
49.9k
    return false;
127
55.2k
}
128
129
static bool
130
is_binary_string(std::string const& v, std::string& str)
131
49.9k
{
132
49.9k
    if (v.substr(0, 2) == "b:") {
133
3.10k
        str = v.substr(2);
134
3.10k
        int count = 0;
135
41.0k
        for (char c: str) {
136
41.0k
            if (!util::is_hex_digit(c)) {
137
1.42k
                return false;
138
1.42k
            }
139
39.6k
            ++count;
140
39.6k
        }
141
1.68k
        return (count % 2 == 0);
142
3.10k
    }
143
46.8k
    return false;
144
49.9k
}
145
146
static bool
147
is_name(std::string const& v)
148
48.7k
{
149
48.7k
    return ((v.length() > 1) && (v.at(0) == '/'));
150
48.7k
}
151
152
static bool
153
is_pdf_name(std::string const& v)
154
147k
{
155
147k
    return ((v.length() > 3) && (v.substr(0, 3) == "n:/"));
156
147k
}
157
158
bool
159
QPDF::test_json_validators()
160
0
{
161
0
    bool passed = true;
162
0
    auto check_fn = [&passed](char const* msg, bool expr) {
163
0
        if (!expr) {
164
0
            passed = false;
165
0
            std::cerr << msg << '\n';
166
0
        }
167
0
    };
168
0
#define check(expr) check_fn(#expr, expr)
169
170
0
    int obj = 0;
171
0
    int gen = 0;
172
0
    check(!is_indirect_object("", obj, gen));
173
0
    check(!is_indirect_object("12", obj, gen));
174
0
    check(!is_indirect_object("x12 0 R", obj, gen));
175
0
    check(!is_indirect_object("12 0 Rx", obj, gen));
176
0
    check(!is_indirect_object("12 0R", obj, gen));
177
0
    check(is_indirect_object("52 1 R", obj, gen));
178
0
    check(obj == 52);
179
0
    check(gen == 1);
180
0
    check(is_indirect_object("53  20  R", obj, gen));
181
0
    check(obj == 53);
182
0
    check(gen == 20);
183
0
    check(!is_obj_key("", obj, gen));
184
0
    check(!is_obj_key("obj:x", obj, gen));
185
0
    check(!is_obj_key("obj:x", obj, gen));
186
0
    check(is_obj_key("obj:12 13 R", obj, gen));
187
0
    check(obj == 12);
188
0
    check(gen == 13);
189
0
    std::string str;
190
0
    check(!is_unicode_string("", str));
191
0
    check(!is_unicode_string("xyz", str));
192
0
    check(!is_unicode_string("x:", str));
193
0
    check(is_unicode_string("u:potato", str));
194
0
    check(str == "potato");
195
0
    check(is_unicode_string("u:", str));
196
0
    check(str.empty());
197
0
    check(!is_binary_string("", str));
198
0
    check(!is_binary_string("x:", str));
199
0
    check(!is_binary_string("b:1", str));
200
0
    check(!is_binary_string("b:123", str));
201
0
    check(!is_binary_string("b:gh", str));
202
0
    check(is_binary_string("b:", str));
203
0
    check(is_binary_string("b:12", str));
204
0
    check(is_binary_string("b:123aBC", str));
205
0
    check(!is_name(""));
206
0
    check(!is_name("/"));
207
0
    check(!is_name("xyz"));
208
0
    check(is_name("/Potato"));
209
0
    check(is_name("/Potato Salad"));
210
211
0
    return passed;
212
0
#undef check_arg
213
0
}
214
215
static std::function<void(Pipeline*)>
216
provide_data(std::shared_ptr<InputSource> is, qpdf_offset_t start, qpdf_offset_t end)
217
4.50k
{
218
4.50k
    return [is, start, end](Pipeline* p) {
219
0
        auto data = is->read(QIntC::to_size(end - start), start);
220
0
        data = Pl_Base64::decode(data);
221
0
        p->write(reinterpret_cast<const unsigned char*>(data.data()), data.size());
222
0
        p->finish();
223
0
    };
224
4.50k
}
225
226
class QPDF::JSONReactor: public JSON::Reactor
227
{
228
  public:
229
    JSONReactor(QPDF& pdf, std::shared_ptr<InputSource> is, bool must_be_complete) :
230
7.52k
        pdf(pdf),
231
7.52k
        is(is),
232
7.52k
        must_be_complete(must_be_complete),
233
        descr(
234
7.52k
            std::make_shared<QPDFObject::Description>(
235
7.52k
                QPDFObject::JSON_Descr(std::make_shared<std::string>(is->getName()), "")))
236
7.52k
    {
237
7.52k
    }
238
7.52k
    ~JSONReactor() override = default;
239
    void dictionaryStart() override;
240
    void arrayStart() override;
241
    void containerEnd(JSON const& value) override;
242
    void topLevelScalar() override;
243
    bool dictionaryItem(std::string const& key, JSON const& value) override;
244
    bool arrayItem(JSON const& value) override;
245
246
    bool anyErrors() const;
247
248
  private:
249
    enum state_e {
250
        st_top,
251
        st_qpdf,
252
        st_qpdf_meta,
253
        st_objects,
254
        st_trailer,
255
        st_object_top,
256
        st_stream,
257
        st_object,
258
        st_ignore,
259
    };
260
261
    struct StackFrame
262
    {
263
        StackFrame(state_e state) :
264
32.5k
            state(state) {};
265
        StackFrame(state_e state, QPDFObjectHandle&& object) :
266
56.6k
            state(state),
267
56.6k
            object(object) {};
268
        state_e state;
269
        QPDFObjectHandle object;
270
    };
271
272
    void containerStart();
273
    bool setNextStateIfDictionary(std::string const& key, JSON const& value, state_e);
274
    void setObjectDescription(QPDFObjectHandle& oh, JSON const& value);
275
    QPDFObjectHandle makeObject(JSON const& value);
276
    void error(qpdf_offset_t offset, std::string const& message);
277
    void replaceObject(QPDFObjectHandle&& replacement, JSON const& value);
278
279
    QPDF& pdf;
280
    std::shared_ptr<InputSource> is;
281
    bool must_be_complete{true};
282
    std::shared_ptr<QPDFObject::Description> descr;
283
    bool errors{false};
284
    bool saw_qpdf{false};
285
    bool saw_qpdf_meta{false};
286
    bool saw_objects{false};
287
    bool saw_json_version{false};
288
    bool saw_pdf_version{false};
289
    bool saw_trailer{false};
290
    std::string cur_object;
291
    bool saw_value{false};
292
    bool saw_stream{false};
293
    bool saw_dict{false};
294
    bool saw_data{false};
295
    bool saw_datafile{false};
296
    bool this_stream_needs_data{false};
297
    std::vector<StackFrame> stack;
298
    QPDFObjectHandle next_obj;
299
    state_e next_state{st_top};
300
};
301
302
void
303
QPDF::JSONReactor::error(qpdf_offset_t offset, std::string const& msg)
304
81.6k
{
305
81.6k
    errors = true;
306
81.6k
    std::string object = this->cur_object;
307
81.6k
    if (is->getName() != pdf.getFilename()) {
308
0
        object += " from " + is->getName();
309
0
    }
310
81.6k
    pdf.warn(qpdf_e_json, object, offset, msg);
311
81.6k
}
312
313
bool
314
QPDF::JSONReactor::anyErrors() const
315
69
{
316
69
    return errors;
317
69
}
318
319
void
320
QPDF::JSONReactor::containerStart()
321
89.1k
{
322
89.1k
    if (next_obj) {
323
56.6k
        stack.emplace_back(next_state, std::move(next_obj));
324
56.6k
        next_obj = QPDFObjectHandle();
325
56.6k
    } else {
326
32.5k
        stack.emplace_back(next_state);
327
32.5k
    }
328
89.1k
}
329
330
void
331
QPDF::JSONReactor::dictionaryStart()
332
63.8k
{
333
63.8k
    containerStart();
334
63.8k
}
335
336
void
337
QPDF::JSONReactor::arrayStart()
338
25.6k
{
339
25.6k
    if (stack.empty()) {
340
324
        QTC::TC("qpdf", "QPDF_json top-level array");
341
324
        throw std::runtime_error("QPDF JSON must be a dictionary");
342
324
    }
343
25.3k
    containerStart();
344
25.3k
}
345
346
void
347
QPDF::JSONReactor::containerEnd(JSON const& value)
348
36.4k
{
349
36.4k
    auto from_state = stack.back().state;
350
36.4k
    stack.pop_back();
351
36.4k
    if (stack.empty()) {
352
80
        if (!this->saw_qpdf) {
353
18
            QTC::TC("qpdf", "QPDF_json missing qpdf");
354
18
            error(0, "\"qpdf\" object was not seen");
355
62
        } else {
356
62
            if (!this->saw_json_version) {
357
46
                QTC::TC("qpdf", "QPDF_json missing json version");
358
46
                error(0, "\"qpdf[0].jsonversion\" was not seen");
359
46
            }
360
62
            if (must_be_complete && !this->saw_pdf_version) {
361
45
                QTC::TC("qpdf", "QPDF_json missing pdf version");
362
45
                error(0, "\"qpdf[0].pdfversion\" was not seen");
363
45
            }
364
62
            if (!this->saw_objects) {
365
9
                QTC::TC("qpdf", "QPDF_json missing objects");
366
9
                error(0, "\"qpdf[1]\" was not seen");
367
53
            } else {
368
53
                if (must_be_complete && !this->saw_trailer) {
369
35
                    QTC::TC("qpdf", "QPDF_json missing trailer");
370
35
                    error(0, "\"qpdf[1].trailer\" was not seen");
371
35
                }
372
53
            }
373
62
        }
374
36.4k
    } else if (from_state == st_trailer) {
375
641
        if (!saw_value) {
376
420
            QTC::TC("qpdf", "QPDF_json trailer no value");
377
420
            error(value.getStart(), "\"trailer\" is missing \"value\"");
378
420
        }
379
35.7k
    } else if (from_state == st_object_top) {
380
12.4k
        if (saw_value == saw_stream) {
381
1.16k
            QTC::TC("qpdf", "QPDF_json value stream both or neither");
382
1.16k
            error(value.getStart(), "object must have exactly one of \"value\" or \"stream\"");
383
1.16k
        }
384
12.4k
        if (saw_stream) {
385
5.50k
            if (!saw_dict) {
386
2.58k
                QTC::TC("qpdf", "QPDF_json stream no dict");
387
2.58k
                error(value.getStart(), "\"stream\" is missing \"dict\"");
388
2.58k
            }
389
5.50k
            if (saw_data == saw_datafile) {
390
2.30k
                if (this_stream_needs_data) {
391
1.21k
                    QTC::TC("qpdf", "QPDF_json data datafile both or neither");
392
1.21k
                    error(
393
1.21k
                        value.getStart(),
394
1.21k
                        "new \"stream\" must have exactly one of \"data\" or \"datafile\"");
395
1.21k
                } else if (saw_datafile) {
396
227
                    QTC::TC("qpdf", "QPDF_json data and datafile");
397
227
                    error(
398
227
                        value.getStart(),
399
227
                        "existing \"stream\" may at most one of \"data\" or \"datafile\"");
400
870
                } else {
401
870
                    QTC::TC("qpdf", "QPDF_json no stream data in update mode");
402
870
                }
403
2.30k
            }
404
5.50k
        }
405
12.4k
    }
406
36.4k
    if (!stack.empty()) {
407
36.3k
        auto state = stack.back().state;
408
36.3k
        if (state == st_objects) {
409
15.9k
            this->cur_object = "";
410
15.9k
            this->saw_dict = false;
411
15.9k
            this->saw_data = false;
412
15.9k
            this->saw_datafile = false;
413
15.9k
            this->saw_value = false;
414
15.9k
            this->saw_stream = false;
415
15.9k
        }
416
36.3k
    }
417
36.4k
}
418
419
void
420
QPDF::JSONReactor::replaceObject(QPDFObjectHandle&& replacement, JSON const& value)
421
20.8k
{
422
20.8k
    auto& tos = stack.back();
423
20.8k
    auto og = tos.object.getObjGen();
424
20.8k
    if (replacement.isIndirect() && !(replacement.isStream() && replacement.getObjGen() == og)) {
425
867
        error(
426
867
            replacement.getParsedOffset(),
427
867
            "the value of an object may not be an indirect object reference");
428
867
        return;
429
867
    }
430
19.9k
    pdf.replaceObject(og, replacement);
431
19.9k
    next_obj = pdf.getObject(og);
432
19.9k
    setObjectDescription(tos.object, value);
433
19.9k
}
434
435
void
436
QPDF::JSONReactor::topLevelScalar()
437
60
{
438
60
    QTC::TC("qpdf", "QPDF_json top-level scalar");
439
60
    throw std::runtime_error("QPDF JSON must be a dictionary");
440
60
}
441
442
bool
443
QPDF::JSONReactor::setNextStateIfDictionary(std::string const& key, JSON const& value, state_e next)
444
39.4k
{
445
    // Use this method when the next state is for processing a nested dictionary.
446
39.4k
    if (value.isDictionary()) {
447
34.0k
        this->next_state = next;
448
34.0k
        return true;
449
34.0k
    }
450
5.38k
    error(value.getStart(), "\"" + key + "\" must be a dictionary");
451
5.38k
    return false;
452
39.4k
}
453
454
bool
455
QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
456
225k
{
457
225k
    if (stack.empty()) {
458
0
        throw std::logic_error("stack is empty in dictionaryItem");
459
0
    }
460
225k
    next_state = st_ignore;
461
225k
    auto state = stack.back().state;
462
225k
    if (state == st_ignore) {
463
6.94k
        QTC::TC("qpdf", "QPDF_json ignoring in st_ignore");
464
        // ignore
465
218k
    } else if (state == st_top) {
466
13.6k
        if (key == "qpdf") {
467
8.70k
            this->saw_qpdf = true;
468
8.70k
            if (!value.isArray()) {
469
3.19k
                QTC::TC("qpdf", "QPDF_json qpdf not array");
470
3.19k
                error(value.getStart(), "\"qpdf\" must be an array");
471
5.50k
            } else {
472
5.50k
                next_state = st_qpdf;
473
5.50k
            }
474
8.70k
        } else {
475
            // Ignore all other fields.
476
4.93k
            QTC::TC("qpdf", "QPDF_json ignoring unknown top-level key");
477
4.93k
        }
478
204k
    } else if (state == st_qpdf_meta) {
479
13.6k
        if (key == "pdfversion") {
480
7.01k
            this->saw_pdf_version = true;
481
7.01k
            std::string v;
482
7.01k
            bool okay = false;
483
7.01k
            if (value.getString(v)) {
484
2.76k
                std::string version;
485
2.76k
                char const* p = v.c_str();
486
2.76k
                if (QPDF::validatePDFVersion(p, version) && (*p == '\0')) {
487
357
                    this->pdf.m->pdf_version = version;
488
357
                    okay = true;
489
357
                }
490
2.76k
            }
491
7.01k
            if (!okay) {
492
6.65k
                QTC::TC("qpdf", "QPDF_json bad pdf version");
493
6.65k
                error(value.getStart(), "invalid PDF version (must be \"x.y\")");
494
6.65k
            }
495
7.01k
        } else if (key == "jsonversion") {
496
2.11k
            this->saw_json_version = true;
497
2.11k
            std::string v;
498
2.11k
            bool okay = false;
499
2.11k
            if (value.getNumber(v)) {
500
1.91k
                std::string version;
501
1.91k
                if (QUtil::string_to_int(v.c_str()) == 2) {
502
258
                    okay = true;
503
258
                }
504
1.91k
            }
505
2.11k
            if (!okay) {
506
1.78k
                QTC::TC("qpdf", "QPDF_json bad json version");
507
1.78k
                error(value.getStart(), "invalid JSON version (must be numeric value 2)");
508
1.78k
            }
509
4.54k
        } else if (key == "pushedinheritedpageresources") {
510
491
            bool v;
511
491
            if (value.getBool(v)) {
512
227
                if (!this->must_be_complete && v) {
513
0
                    this->pdf.pushInheritedAttributesToPage();
514
0
                }
515
264
            } else {
516
264
                QTC::TC("qpdf", "QPDF_json bad pushedinheritedpageresources");
517
264
                error(value.getStart(), "pushedinheritedpageresources must be a boolean");
518
264
            }
519
4.05k
        } else if (key == "calledgetallpages") {
520
457
            bool v;
521
457
            if (value.getBool(v)) {
522
230
                if (!this->must_be_complete && v) {
523
0
                    this->pdf.getAllPages();
524
0
                }
525
230
            } else {
526
227
                QTC::TC("qpdf", "QPDF_json bad calledgetallpages");
527
227
                error(value.getStart(), "calledgetallpages must be a boolean");
528
227
            }
529
3.60k
        } else {
530
            // ignore unknown keys for forward compatibility and to skip keys we don't care about
531
            // like "maxobjectid".
532
3.60k
            QTC::TC("qpdf", "QPDF_json ignore second-level key");
533
3.60k
        }
534
191k
    } else if (state == st_objects) {
535
39.4k
        int obj = 0;
536
39.4k
        int gen = 0;
537
39.4k
        if (key == "trailer") {
538
1.93k
            this->saw_trailer = true;
539
1.93k
            this->cur_object = "trailer";
540
1.93k
            setNextStateIfDictionary(key, value, st_trailer);
541
37.5k
        } else if (is_obj_key(key, obj, gen)) {
542
16.7k
            this->cur_object = key;
543
16.7k
            if (setNextStateIfDictionary(key, value, st_object_top)) {
544
16.1k
                next_obj = pdf.getObjectForJSON(obj, gen);
545
16.1k
            }
546
20.7k
        } else {
547
20.7k
            QTC::TC("qpdf", "QPDF_json bad object key");
548
20.7k
            error(value.getStart(), "object key should be \"trailer\" or \"obj:n n R\"");
549
20.7k
        }
550
151k
    } else if (state == st_object_top) {
551
25.2k
        if (stack.empty()) {
552
0
            throw std::logic_error("stack empty in st_object_top");
553
0
        }
554
25.2k
        auto& tos = stack.back();
555
25.2k
        if (!tos.object) {
556
0
            throw std::logic_error("current object uninitialized in st_object_top");
557
0
        }
558
25.2k
        if (key == "value") {
559
            // Don't use setNextStateIfDictionary since this can have any type.
560
16.7k
            this->saw_value = true;
561
16.7k
            replaceObject(makeObject(value), value);
562
16.7k
            next_state = st_object;
563
16.7k
        } else if (key == "stream") {
564
6.38k
            this->saw_stream = true;
565
6.38k
            if (setNextStateIfDictionary(key, value, st_stream)) {
566
6.00k
                this->this_stream_needs_data = false;
567
6.00k
                if (tos.object.isStream()) {
568
1.72k
                    QTC::TC("qpdf", "QPDF_json updating existing stream");
569
4.28k
                } else {
570
4.28k
                    this_stream_needs_data = true;
571
4.28k
                    replaceObject(
572
4.28k
                        qpdf::Stream(
573
4.28k
                            pdf, tos.object.getObjGen(), QPDFObjectHandle::newDictionary(), 0, 0),
574
4.28k
                        value);
575
4.28k
                }
576
6.00k
                next_obj = tos.object;
577
6.00k
            } else {
578
                // Error message already given above
579
374
                QTC::TC("qpdf", "QPDF_json stream not a dictionary");
580
374
            }
581
6.38k
        } else {
582
            // Ignore unknown keys for forward compatibility
583
2.09k
            QTC::TC("qpdf", "QPDF_json ignore unknown key in object_top");
584
2.09k
        }
585
126k
    } else if (state == st_trailer) {
586
1.87k
        if (key == "value") {
587
1.33k
            this->saw_value = true;
588
            // The trailer must be a dictionary, so we can use setNextStateIfDictionary.
589
1.33k
            if (setNextStateIfDictionary("trailer.value", value, st_object)) {
590
731
                this->pdf.m->trailer = makeObject(value);
591
731
                setObjectDescription(this->pdf.m->trailer, value);
592
731
            }
593
1.33k
        } else if (key == "stream") {
594
            // Don't need to set saw_stream here since there's already an error.
595
200
            QTC::TC("qpdf", "QPDF_json trailer stream");
596
200
            error(value.getStart(), "the trailer may not be a stream");
597
338
        } else {
598
            // Ignore unknown keys for forward compatibility
599
338
            QTC::TC("qpdf", "QPDF_json ignore unknown key in trailer");
600
338
        }
601
124k
    } else if (state == st_stream) {
602
11.3k
        if (stack.empty()) {
603
0
            throw std::logic_error("stack empty in st_stream");
604
0
        }
605
11.3k
        auto& tos = stack.back();
606
11.3k
        if (!tos.object.isStream()) {
607
0
            throw std::logic_error("current object is not stream in st_stream");
608
0
        }
609
11.3k
        if (key == "dict") {
610
2.95k
            this->saw_dict = true;
611
2.95k
            if (setNextStateIfDictionary("stream.dict", value, st_object)) {
612
2.72k
                tos.object.replaceDict(makeObject(value));
613
2.72k
            } else {
614
                // An error had already been given by setNextStateIfDictionary
615
226
                QTC::TC("qpdf", "QPDF_json stream dict not dict");
616
226
            }
617
8.39k
        } else if (key == "data") {
618
5.89k
            this->saw_data = true;
619
5.89k
            std::string v;
620
5.89k
            if (!value.getString(v)) {
621
1.38k
                QTC::TC("qpdf", "QPDF_json stream data not string");
622
1.38k
                error(value.getStart(), "\"stream.data\" must be a string");
623
1.38k
                tos.object.replaceStreamData("", {}, {});
624
4.50k
            } else {
625
                // The range includes the quotes.
626
4.50k
                auto start = value.getStart() + 1;
627
4.50k
                auto end = value.getEnd() - 1;
628
4.50k
                if (end < start) {
629
0
                    throw std::logic_error("QPDF_json: JSON string length < 0");
630
0
                }
631
4.50k
                tos.object.replaceStreamData(provide_data(is, start, end), {}, {});
632
4.50k
            }
633
5.89k
        } else if (key == "datafile") {
634
1.64k
            this->saw_datafile = true;
635
1.64k
            std::string filename;
636
1.64k
            if (!value.getString(filename)) {
637
478
                QTC::TC("qpdf", "QPDF_json stream datafile not string");
638
478
                error(
639
478
                    value.getStart(),
640
478
                    "\"stream.datafile\" must be a string containing a file name");
641
478
                tos.object.replaceStreamData("", {}, {});
642
1.16k
            } else {
643
1.16k
                tos.object.replaceStreamData(QUtil::file_provider(filename), {}, {});
644
1.16k
            }
645
1.64k
        } else {
646
            // Ignore unknown keys for forward compatibility.
647
856
            QTC::TC("qpdf", "QPDF_json ignore unknown key in stream");
648
856
        }
649
113k
    } else if (state == st_object) {
650
113k
        if (stack.empty()) {
651
0
            throw std::logic_error("stack empty in st_object");
652
0
        }
653
113k
        auto& tos = stack.back();
654
113k
        auto dict = tos.object;
655
113k
        if (dict.isStream()) {
656
0
            dict = dict.getDict();
657
0
        }
658
113k
        if (!dict.isDictionary()) {
659
0
            throw std::logic_error(
660
0
                "current object is not stream or dictionary in st_object dictionary item");
661
0
        }
662
113k
        dict.replaceKey(
663
113k
            is_pdf_name(key) ? QPDFObjectHandle::parse(key.substr(2)).getName() : key,
664
113k
            makeObject(value));
665
113k
    } else {
666
0
        throw std::logic_error("QPDF_json: unknown state " + std::to_string(state));
667
0
    }
668
225k
    return true;
669
225k
}
670
671
bool
672
QPDF::JSONReactor::arrayItem(JSON const& value)
673
1.54M
{
674
1.54M
    if (stack.empty()) {
675
0
        throw std::logic_error("stack is empty in arrayItem");
676
0
    }
677
1.54M
    next_state = st_ignore;
678
1.54M
    auto state = stack.back().state;
679
1.54M
    if (state == st_qpdf) {
680
18.9k
        if (!this->saw_qpdf_meta) {
681
5.30k
            this->saw_qpdf_meta = true;
682
5.30k
            setNextStateIfDictionary("qpdf[0]", value, st_qpdf_meta);
683
13.6k
        } else if (!this->saw_objects) {
684
4.78k
            this->saw_objects = true;
685
4.78k
            setNextStateIfDictionary("qpdf[1]", value, st_objects);
686
8.83k
        } else {
687
8.83k
            QTC::TC("qpdf", "QPDF_json more than two qpdf elements");
688
8.83k
            error(value.getStart(), "\"qpdf\" must have two elements");
689
8.83k
        }
690
1.52M
    } else if (state == st_object) {
691
1.51M
        stack.back().object.appendItem(makeObject(value));
692
1.51M
    }
693
1.54M
    return true;
694
1.54M
}
695
696
void
697
QPDF::JSONReactor::setObjectDescription(QPDFObjectHandle& oh, JSON const& value)
698
1.46M
{
699
1.46M
    auto j_descr = std::get<QPDFObject::JSON_Descr>(*descr);
700
1.46M
    if (j_descr.object != cur_object) {
701
13.0k
        descr = std::make_shared<QPDFObject::Description>(
702
13.0k
            QPDFObject::JSON_Descr(j_descr.input, cur_object));
703
13.0k
    }
704
705
1.46M
    oh.getObjectPtr()->setDescription(&pdf, descr, value.getStart());
706
1.46M
}
707
708
QPDFObjectHandle
709
QPDF::JSONReactor::makeObject(JSON const& value)
710
1.65M
{
711
1.65M
    QPDFObjectHandle result;
712
1.65M
    std::string str_v;
713
1.65M
    bool bool_v = false;
714
1.65M
    if (value.isDictionary()) {
715
18.2k
        result = QPDFObjectHandle::newDictionary();
716
18.2k
        next_obj = result;
717
18.2k
        next_state = st_object;
718
1.63M
    } else if (value.isArray()) {
719
15.7k
        result = QPDFObjectHandle::newArray();
720
15.7k
        next_obj = result;
721
15.7k
        next_state = st_object;
722
1.61M
    } else if (value.isNull()) {
723
229
        result = QPDFObjectHandle::newNull();
724
1.61M
    } else if (value.getBool(bool_v)) {
725
1.06k
        result = QPDFObjectHandle::newBool(bool_v);
726
1.61M
    } else if (value.getNumber(str_v)) {
727
1.36M
        if (QUtil::is_long_long(str_v.c_str())) {
728
1.36M
            result = QPDFObjectHandle::newInteger(QUtil::string_to_ll(str_v.c_str()));
729
1.36M
        } else {
730
            // JSON allows scientific notation, but PDF does not.
731
5.17k
            if (str_v.find('e') != std::string::npos || str_v.find('E') != std::string::npos) {
732
2.33k
                try {
733
2.33k
                    auto v = std::stod(str_v);
734
2.33k
                    str_v = QUtil::double_to_string(v);
735
2.33k
                } catch (std::exception&) {
736
                    // Keep it as it was
737
217
                }
738
2.33k
            }
739
5.17k
            result = QPDFObjectHandle::newReal(str_v);
740
5.17k
        }
741
1.36M
    } else if (value.getString(str_v)) {
742
250k
        int obj = 0;
743
250k
        int gen = 0;
744
250k
        std::string str;
745
250k
        if (is_indirect_object(str_v, obj, gen)) {
746
195k
            result = pdf.getObjectForJSON(obj, gen);
747
195k
        } else if (is_unicode_string(str_v, str)) {
748
5.29k
            result = QPDFObjectHandle::newUnicodeString(str);
749
50.0k
        } else if (is_binary_string(str_v, str)) {
750
1.27k
            result = QPDFObjectHandle::newString(QUtil::hex_decode(str));
751
48.7k
        } else if (is_name(str_v)) {
752
14.5k
            result = QPDFObjectHandle::newName(str_v);
753
34.2k
        } else if (is_pdf_name(str_v)) {
754
8.31k
            result = QPDFObjectHandle::parse(str_v.substr(2));
755
25.9k
        } else {
756
25.9k
            QTC::TC("qpdf", "QPDF_json unrecognized string value");
757
25.9k
            error(value.getStart(), "unrecognized string value");
758
25.9k
            result = QPDFObjectHandle::newNull();
759
25.9k
        }
760
250k
    }
761
1.65M
    if (!result) {
762
0
        throw std::logic_error("JSONReactor::makeObject didn't initialize the object");
763
0
    }
764
765
1.65M
    if (!result.hasObjectDescription()) {
766
1.44M
        setObjectDescription(result, value);
767
1.44M
    }
768
1.65M
    return result;
769
1.65M
}
770
771
void
772
QPDF::createFromJSON(std::string const& json_file)
773
0
{
774
0
    createFromJSON(std::make_shared<FileInputSource>(json_file.c_str()));
775
0
}
776
777
void
778
QPDF::createFromJSON(std::shared_ptr<InputSource> is)
779
7.52k
{
780
7.52k
    processMemoryFile(is->getName().c_str(), JSON_PDF, strlen(JSON_PDF));
781
7.52k
    importJSON(is, true);
782
7.52k
}
783
784
void
785
QPDF::updateFromJSON(std::string const& json_file)
786
0
{
787
0
    updateFromJSON(std::make_shared<FileInputSource>(json_file.c_str()));
788
0
}
789
790
void
791
QPDF::updateFromJSON(std::shared_ptr<InputSource> is)
792
0
{
793
0
    importJSON(is, false);
794
0
}
795
796
void
797
QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete)
798
7.52k
{
799
7.52k
    JSONReactor reactor(*this, is, must_be_complete);
800
7.52k
    try {
801
7.52k
        JSON::parse(*is, &reactor);
802
7.52k
    } catch (std::runtime_error& e) {
803
7.45k
        throw std::runtime_error(is->getName() + ": " + e.what());
804
7.45k
    }
805
69
    if (reactor.anyErrors()) {
806
56
        throw std::runtime_error(is->getName() + ": errors found in JSON");
807
56
    }
808
69
}
809
810
void
811
writeJSONStreamFile(
812
    int version,
813
    JSON::Writer& jw,
814
    qpdf::Stream& stream,
815
    int id,
816
    qpdf_stream_decode_level_e decode_level,
817
    std::string const& file_prefix)
818
0
{
819
0
    auto filename = file_prefix + "-" + std::to_string(id);
820
0
    auto* f = QUtil::safe_fopen(filename.c_str(), "wb");
821
0
    Pl_StdioFile f_pl{"stream data", f};
822
0
    stream.writeStreamJSON(version, jw, qpdf_sj_file, decode_level, &f_pl, filename);
823
0
    f_pl.finish();
824
0
    fclose(f);
825
0
}
826
827
void
828
QPDF::writeJSON(
829
    int version,
830
    Pipeline* p,
831
    qpdf_stream_decode_level_e decode_level,
832
    qpdf_json_stream_data_e json_stream_data,
833
    std::string const& file_prefix,
834
    std::set<std::string> wanted_objects)
835
0
{
836
0
    bool first = true;
837
0
    writeJSON(version, p, true, first, decode_level, json_stream_data, file_prefix, wanted_objects);
838
0
}
839
840
void
841
QPDF::writeJSON(
842
    int version,
843
    Pipeline* p,
844
    bool complete,
845
    bool& first_key,
846
    qpdf_stream_decode_level_e decode_level,
847
    qpdf_json_stream_data_e json_stream_data,
848
    std::string const& file_prefix,
849
    std::set<std::string> wanted_objects)
850
0
{
851
0
    if (version != 2) {
852
0
        throw std::runtime_error("QPDF::writeJSON: only version 2 is supported");
853
0
    }
854
0
    JSON::Writer jw{p, 4};
855
0
    if (complete) {
856
0
        jw << "{";
857
0
    } else if (!first_key) {
858
0
        jw << ",";
859
0
    }
860
0
    first_key = false;
861
862
    /* clang-format off */
863
0
    jw << "\n"
864
0
          "  \"qpdf\": [\n"
865
0
          "    {\n"
866
0
          "      \"jsonversion\": " << std::to_string(version) << ",\n"
867
0
          "      \"pdfversion\": \"" << getPDFVersion() << "\",\n"
868
0
          "      \"pushedinheritedpageresources\": " <<  (everPushedInheritedAttributesToPages() ? "true" : "false") << ",\n"
869
0
          "      \"calledgetallpages\": " <<  (everCalledGetAllPages() ? "true" : "false") << ",\n"
870
0
          "      \"maxobjectid\": " <<  std::to_string(getObjectCount()) << "\n"
871
0
          "    },\n"
872
0
          "    {";
873
    /* clang-format on */
874
875
0
    bool all_objects = wanted_objects.empty();
876
0
    bool first = true;
877
0
    for (auto& obj: getAllObjects()) {
878
0
        auto const og = obj.getObjGen();
879
0
        std::string key = "obj:" + og.unparse(' ') + " R";
880
0
        if (all_objects || wanted_objects.contains(key)) {
881
0
            if (first) {
882
0
                jw << "\n      \"" << key;
883
0
                first = false;
884
0
            } else {
885
0
                jw << "\n      },\n      \"" << key;
886
0
            }
887
0
            if (auto stream = obj.as_stream()) {
888
0
                jw << "\": {\n        \"stream\": ";
889
0
                if (json_stream_data == qpdf_sj_file) {
890
0
                    writeJSONStreamFile(
891
0
                        version, jw, stream, og.getObj(), decode_level, file_prefix);
892
0
                } else {
893
0
                    stream.writeStreamJSON(
894
0
                        version, jw, json_stream_data, decode_level, nullptr, "");
895
0
                }
896
0
            } else {
897
0
                jw << "\": {\n        \"value\": ";
898
0
                obj.writeJSON(version, jw, true);
899
0
            }
900
0
        }
901
0
    }
902
0
    if (all_objects || wanted_objects.contains("trailer")) {
903
0
        if (!first) {
904
0
            jw << "\n      },";
905
0
        }
906
0
        jw << "\n      \"trailer\": {\n        \"value\": ";
907
0
        getTrailer().writeJSON(version, jw, true);
908
0
        first = false;
909
0
    }
910
0
    if (!first) {
911
0
        jw << "\n      }";
912
0
    }
913
    /* clang-format off */
914
0
    jw << "\n"
915
0
          "    }\n"
916
0
          "  ]";
917
    /* clang-format on */
918
0
    if (complete) {
919
0
        jw << "\n}\n";
920
0
        p->finish();
921
0
    }
922
0
}