Coverage Report

Created: 2024-09-08 06:06

/src/qpdf/libqpdf/QPDF_json.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/QPDF.hh>
2
3
#include <qpdf/FileInputSource.hh>
4
#include <qpdf/JSON_writer.hh>
5
#include <qpdf/Pl_Base64.hh>
6
#include <qpdf/Pl_StdioFile.hh>
7
#include <qpdf/QIntC.hh>
8
#include <qpdf/QPDFObject_private.hh>
9
#include <qpdf/QPDFValue.hh>
10
#include <qpdf/QPDF_Null.hh>
11
#include <qpdf/QPDF_Stream.hh>
12
#include <qpdf/QTC.hh>
13
#include <qpdf/QUtil.hh>
14
#include <algorithm>
15
#include <cstring>
16
17
// This chart shows an example of the state transitions that would occur in parsing a minimal file.
18
19
//                                |
20
// {                              |   -> st_top
21
//   "qpdf": [                    |   -> st_qpdf
22
//     {                          |   -> st_qpdf_meta
23
//       ...                      |   ...
24
//     },                         |   ...
25
//     {                          |   -> st_objects
26
//       "obj:1 0 R": {           |   -> st_object_top
27
//         "value": {             |   -> st_object
28
//           "/Pages": "2 0 R",   |   ...
29
//           "/Type": "/Catalog"  |   ...
30
//         }                      |   <- st_object_top
31
//       },                       |   <- st_objects
32
//       "obj:2 0 R": {           |   -> st_object_top
33
//         "value": 12            |   -> st_object
34
//         }                      |   <- st_object_top
35
//       },                       |   <- st_objects
36
//       "obj:4 0 R": {           |   -> st_object_top
37
//         "stream": {            |   -> st_stream
38
//           "data": "cG90YXRv",  |   ...
39
//           "dict": {            |   -> st_object
40
//             "/K": true         |   ...
41
//           }                    |   <- st_stream
42
//         }                      |   <- st_object_top
43
//       },                       |   <- st_objects
44
//       "trailer": {             |   -> st_trailer
45
//         "value": {             |   -> st_object
46
//           "/Root": "1 0 R",    |   ...
47
//           "/Size": 7           |   ...
48
//         }                      |   <- st_trailer
49
//       }                        |   <- st_objects
50
//     }                          |   <- st_qpdf
51
//   ]                            |   <- st_top
52
// }                              |
53
54
static char const* JSON_PDF = (
55
    // force line break
56
    "%PDF-1.3\n"
57
    "xref\n"
58
    "0 1\n"
59
    "0000000000 65535 f \n"
60
    "trailer << /Size 1 >>\n"
61
    "startxref\n"
62
    "9\n"
63
    "%%EOF\n");
64
65
// Validator methods -- these are much more performant than std::regex.
66
static bool
67
is_indirect_object(std::string const& v, int& obj, int& gen)
68
782k
{
69
782k
    char const* p = v.c_str();
70
782k
    std::string o_str;
71
782k
    std::string g_str;
72
782k
    if (!QUtil::is_digit(*p)) {
73
104k
        return false;
74
104k
    }
75
4.09M
    while (QUtil::is_digit(*p)) {
76
3.42M
        o_str.append(1, *p++);
77
3.42M
    }
78
677k
    if (*p != ' ') {
79
12.8k
        return false;
80
12.8k
    }
81
16.1M
    while (*p == ' ') {
82
15.5M
        ++p;
83
15.5M
    }
84
664k
    if (!QUtil::is_digit(*p)) {
85
6.80k
        return false;
86
6.80k
    }
87
17.1M
    while (QUtil::is_digit(*p)) {
88
16.4M
        g_str.append(1, *p++);
89
16.4M
    }
90
657k
    if (*p != ' ') {
91
1.48k
        return false;
92
1.48k
    }
93
2.78M
    while (*p == ' ') {
94
2.12M
        ++p;
95
2.12M
    }
96
656k
    if (*p++ != 'R') {
97
1.68k
        return false;
98
1.68k
    }
99
654k
    if (*p) {
100
1.05k
        return false;
101
1.05k
    }
102
653k
    obj = QUtil::string_to_int(o_str.c_str());
103
653k
    gen = QUtil::string_to_int(g_str.c_str());
104
653k
    return obj > 0;
105
654k
}
106
107
static bool
108
is_obj_key(std::string const& v, int& obj, int& gen)
109
26.5k
{
110
26.5k
    if (v.substr(0, 4) != "obj:") {
111
2.00k
        return false;
112
2.00k
    }
113
24.5k
    return is_indirect_object(v.substr(4), obj, gen);
114
26.5k
}
115
116
static bool
117
is_unicode_string(std::string const& v, std::string& str)
118
128k
{
119
128k
    if (v.substr(0, 2) == "u:") {
120
10.7k
        str = v.substr(2);
121
10.7k
        return true;
122
10.7k
    }
123
117k
    return false;
124
128k
}
125
126
static bool
127
is_binary_string(std::string const& v, std::string& str)
128
117k
{
129
117k
    if (v.substr(0, 2) == "b:") {
130
7.98k
        str = v.substr(2);
131
7.98k
        int count = 0;
132
2.99M
        for (char c: str) {
133
2.99M
            if (!QUtil::is_hex_digit(c)) {
134
4.27k
                return false;
135
4.27k
            }
136
2.99M
            ++count;
137
2.99M
        }
138
3.71k
        return (count % 2 == 0);
139
7.98k
    }
140
109k
    return false;
141
117k
}
142
143
static bool
144
is_name(std::string const& v)
145
114k
{
146
114k
    return ((v.length() > 1) && (v.at(0) == '/'));
147
114k
}
148
149
static bool
150
is_pdf_name(std::string const& v)
151
308k
{
152
308k
    return ((v.length() > 3) && (v.substr(0, 3) == "n:/"));
153
308k
}
154
155
bool
156
QPDF::test_json_validators()
157
0
{
158
0
    bool passed = true;
159
0
    auto check_fn = [&passed](char const* msg, bool expr) {
160
0
        if (!expr) {
161
0
            passed = false;
162
0
            std::cerr << msg << std::endl;
163
0
        }
164
0
    };
165
0
#define check(expr) check_fn(#expr, expr)
166
167
0
    int obj = 0;
168
0
    int gen = 0;
169
0
    check(!is_indirect_object("", obj, gen));
170
0
    check(!is_indirect_object("12", obj, gen));
171
0
    check(!is_indirect_object("x12 0 R", obj, gen));
172
0
    check(!is_indirect_object("12 0 Rx", obj, gen));
173
0
    check(!is_indirect_object("12 0R", obj, gen));
174
0
    check(is_indirect_object("52 1 R", obj, gen));
175
0
    check(obj == 52);
176
0
    check(gen == 1);
177
0
    check(is_indirect_object("53  20  R", obj, gen));
178
0
    check(obj == 53);
179
0
    check(gen == 20);
180
0
    check(!is_obj_key("", obj, gen));
181
0
    check(!is_obj_key("obj:x", obj, gen));
182
0
    check(!is_obj_key("obj:x", obj, gen));
183
0
    check(is_obj_key("obj:12 13 R", obj, gen));
184
0
    check(obj == 12);
185
0
    check(gen == 13);
186
0
    std::string str;
187
0
    check(!is_unicode_string("", str));
188
0
    check(!is_unicode_string("xyz", str));
189
0
    check(!is_unicode_string("x:", str));
190
0
    check(is_unicode_string("u:potato", str));
191
0
    check(str == "potato");
192
0
    check(is_unicode_string("u:", str));
193
0
    check(str == "");
194
0
    check(!is_binary_string("", str));
195
0
    check(!is_binary_string("x:", str));
196
0
    check(!is_binary_string("b:1", str));
197
0
    check(!is_binary_string("b:123", str));
198
0
    check(!is_binary_string("b:gh", str));
199
0
    check(is_binary_string("b:", str));
200
0
    check(is_binary_string("b:12", str));
201
0
    check(is_binary_string("b:123aBC", str));
202
0
    check(!is_name(""));
203
0
    check(!is_name("/"));
204
0
    check(!is_name("xyz"));
205
0
    check(is_name("/Potato"));
206
0
    check(is_name("/Potato Salad"));
207
208
0
    return passed;
209
0
#undef check_arg
210
0
}
211
212
static std::function<void(Pipeline*)>
213
provide_data(std::shared_ptr<InputSource> is, qpdf_offset_t start, qpdf_offset_t end)
214
6.50k
{
215
6.50k
    return [is, start, end](Pipeline* p) {
216
0
        Pl_Base64 decode("base64-decode", p, Pl_Base64::a_decode);
217
0
        p = &decode;
218
0
        size_t bytes = QIntC::to_size(end - start);
219
0
        char buf[8192];
220
0
        is->seek(start, SEEK_SET);
221
0
        size_t len = 0;
222
0
        while ((len = is->read(buf, std::min(bytes, sizeof(buf)))) > 0) {
223
0
            p->write(buf, len);
224
0
            bytes -= len;
225
0
            if (bytes == 0) {
226
0
                break;
227
0
            }
228
0
        }
229
0
        decode.finish();
230
0
    };
231
6.50k
}
232
233
class QPDF::JSONReactor: public JSON::Reactor
234
{
235
  public:
236
    JSONReactor(QPDF& pdf, std::shared_ptr<InputSource> is, bool must_be_complete) :
237
        pdf(pdf),
238
        is(is),
239
        must_be_complete(must_be_complete),
240
        descr(std::make_shared<QPDFValue::Description>(
241
            QPDFValue::JSON_Descr(std::make_shared<std::string>(is->getName()), "")))
242
13.2k
    {
243
13.2k
    }
244
13.2k
    ~JSONReactor() override = default;
245
    void dictionaryStart() override;
246
    void arrayStart() override;
247
    void containerEnd(JSON const& value) override;
248
    void topLevelScalar() override;
249
    bool dictionaryItem(std::string const& key, JSON const& value) override;
250
    bool arrayItem(JSON const& value) override;
251
252
    bool anyErrors() const;
253
254
  private:
255
    enum state_e {
256
        st_top,
257
        st_qpdf,
258
        st_qpdf_meta,
259
        st_objects,
260
        st_trailer,
261
        st_object_top,
262
        st_stream,
263
        st_object,
264
        st_ignore,
265
    };
266
267
    struct StackFrame
268
    {
269
        StackFrame(state_e state) :
270
50.8k
            state(state) {};
271
        StackFrame(state_e state, QPDFObjectHandle&& object) :
272
            state(state),
273
123k
            object(object) {};
274
        state_e state;
275
        QPDFObjectHandle object;
276
    };
277
278
    void containerStart();
279
    bool setNextStateIfDictionary(std::string const& key, JSON const& value, state_e);
280
    void setObjectDescription(QPDFObjectHandle& oh, JSON const& value);
281
    QPDFObjectHandle makeObject(JSON const& value);
282
    void error(qpdf_offset_t offset, std::string const& message);
283
    void replaceObject(QPDFObjectHandle&& replacement, JSON const& value);
284
285
    QPDF& pdf;
286
    std::shared_ptr<InputSource> is;
287
    bool must_be_complete{true};
288
    std::shared_ptr<QPDFValue::Description> descr;
289
    bool errors{false};
290
    bool saw_qpdf{false};
291
    bool saw_qpdf_meta{false};
292
    bool saw_objects{false};
293
    bool saw_json_version{false};
294
    bool saw_pdf_version{false};
295
    bool saw_trailer{false};
296
    std::string cur_object;
297
    bool saw_value{false};
298
    bool saw_stream{false};
299
    bool saw_dict{false};
300
    bool saw_data{false};
301
    bool saw_datafile{false};
302
    bool this_stream_needs_data{false};
303
    std::vector<StackFrame> stack;
304
    QPDFObjectHandle next_obj;
305
    state_e next_state{st_top};
306
};
307
308
void
309
QPDF::JSONReactor::error(qpdf_offset_t offset, std::string const& msg)
310
90.5k
{
311
90.5k
    errors = true;
312
90.5k
    std::string object = this->cur_object;
313
90.5k
    if (is->getName() != pdf.getFilename()) {
314
0
        object += " from " + is->getName();
315
0
    }
316
90.5k
    pdf.warn(qpdf_e_json, object, offset, msg);
317
90.5k
}
318
319
bool
320
QPDF::JSONReactor::anyErrors() const
321
88
{
322
88
    return errors;
323
88
}
324
325
void
326
QPDF::JSONReactor::containerStart()
327
174k
{
328
174k
    if (next_obj.isInitialized()) {
329
123k
        stack.emplace_back(next_state, std::move(next_obj));
330
123k
        next_obj = QPDFObjectHandle();
331
123k
    } else {
332
50.8k
        stack.emplace_back(next_state);
333
50.8k
    }
334
174k
}
335
336
void
337
QPDF::JSONReactor::dictionaryStart()
338
120k
{
339
120k
    containerStart();
340
120k
}
341
342
void
343
QPDF::JSONReactor::arrayStart()
344
55.0k
{
345
55.0k
    if (stack.empty()) {
346
829
        QTC::TC("qpdf", "QPDF_json top-level array");
347
829
        throw std::runtime_error("QPDF JSON must be a dictionary");
348
829
    }
349
54.1k
    containerStart();
350
54.1k
}
351
352
void
353
QPDF::JSONReactor::containerEnd(JSON const& value)
354
59.7k
{
355
59.7k
    auto from_state = stack.back().state;
356
59.7k
    stack.pop_back();
357
59.7k
    if (stack.empty()) {
358
103
        if (!this->saw_qpdf) {
359
36
            QTC::TC("qpdf", "QPDF_json missing qpdf");
360
36
            error(0, "\"qpdf\" object was not seen");
361
67
        } else {
362
67
            if (!this->saw_json_version) {
363
45
                QTC::TC("qpdf", "QPDF_json missing json version");
364
45
                error(0, "\"qpdf[0].jsonversion\" was not seen");
365
45
            }
366
67
            if (must_be_complete && !this->saw_pdf_version) {
367
44
                QTC::TC("qpdf", "QPDF_json missing pdf version");
368
44
                error(0, "\"qpdf[0].pdfversion\" was not seen");
369
44
            }
370
67
            if (!this->saw_objects) {
371
8
                QTC::TC("qpdf", "QPDF_json missing objects");
372
8
                error(0, "\"qpdf[1]\" was not seen");
373
59
            } else {
374
59
                if (must_be_complete && !this->saw_trailer) {
375
33
                    QTC::TC("qpdf", "QPDF_json missing trailer");
376
33
                    error(0, "\"qpdf[1].trailer\" was not seen");
377
33
                }
378
59
            }
379
67
        }
380
59.6k
    } else if (from_state == st_trailer) {
381
42
        if (!saw_value) {
382
5
            QTC::TC("qpdf", "QPDF_json trailer no value");
383
5
            error(value.getStart(), "\"trailer\" is missing \"value\"");
384
5
        }
385
59.5k
    } else if (from_state == st_object_top) {
386
17.4k
        if (saw_value == saw_stream) {
387
803
            QTC::TC("qpdf", "QPDF_json value stream both or neither");
388
803
            error(value.getStart(), "object must have exactly one of \"value\" or \"stream\"");
389
803
        }
390
17.4k
        if (saw_stream) {
391
7.47k
            if (!saw_dict) {
392
1.27k
                QTC::TC("qpdf", "QPDF_json stream no dict");
393
1.27k
                error(value.getStart(), "\"stream\" is missing \"dict\"");
394
1.27k
            }
395
7.47k
            if (saw_data == saw_datafile) {
396
969
                if (this_stream_needs_data) {
397
855
                    QTC::TC("qpdf", "QPDF_json data datafile both or neither");
398
855
                    error(
399
855
                        value.getStart(),
400
855
                        "new \"stream\" must have exactly one of \"data\" or "
401
855
                        "\"datafile\"");
402
855
                } else if (saw_datafile) {
403
60
                    QTC::TC("qpdf", "QPDF_json data and datafile");
404
60
                    error(
405
60
                        value.getStart(),
406
60
                        "existing \"stream\" may at most one of \"data\" or "
407
60
                        "\"datafile\"");
408
60
                } else {
409
54
                    QTC::TC("qpdf", "QPDF_json no stream data in update mode");
410
54
                }
411
969
            }
412
7.47k
        }
413
17.4k
    }
414
59.7k
    if (!stack.empty()) {
415
59.6k
        auto state = stack.back().state;
416
59.6k
        if (state == st_objects) {
417
18.0k
            this->cur_object = "";
418
18.0k
            this->saw_dict = false;
419
18.0k
            this->saw_data = false;
420
18.0k
            this->saw_datafile = false;
421
18.0k
            this->saw_value = false;
422
18.0k
            this->saw_stream = false;
423
18.0k
        }
424
59.6k
    }
425
59.7k
}
426
427
void
428
QPDF::JSONReactor::replaceObject(QPDFObjectHandle&& replacement, JSON const& value)
429
21.9k
{
430
21.9k
    if (replacement.isIndirect()) {
431
201
        error(
432
201
            replacement.getParsedOffset(),
433
201
            "the value of an object may not be an indirect object reference");
434
201
        return;
435
201
    }
436
21.7k
    auto& tos = stack.back();
437
21.7k
    auto og = tos.object.getObjGen();
438
21.7k
    this->pdf.replaceObject(og, replacement);
439
21.7k
    next_obj = pdf.getObject(og);
440
21.7k
    setObjectDescription(tos.object, value);
441
21.7k
}
442
443
void
444
QPDF::JSONReactor::topLevelScalar()
445
496
{
446
496
    QTC::TC("qpdf", "QPDF_json top-level scalar");
447
496
    throw std::runtime_error("QPDF JSON must be a dictionary");
448
496
}
449
450
bool
451
QPDF::JSONReactor::setNextStateIfDictionary(std::string const& key, JSON const& value, state_e next)
452
58.9k
{
453
    // Use this method when the next state is for processing a nested dictionary.
454
58.9k
    if (value.isDictionary()) {
455
54.4k
        this->next_state = next;
456
54.4k
        return true;
457
54.4k
    }
458
4.50k
    error(value.getStart(), "\"" + key + "\" must be a dictionary");
459
4.50k
    return false;
460
58.9k
}
461
462
bool
463
QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
464
338k
{
465
338k
    if (stack.empty()) {
466
0
        throw std::logic_error("stack is empty in dictionaryItem");
467
0
    }
468
338k
    next_state = st_ignore;
469
338k
    auto state = stack.back().state;
470
338k
    if (state == st_ignore) {
471
12.4k
        QTC::TC("qpdf", "QPDF_json ignoring in st_ignore");
472
        // ignore
473
326k
    } else if (state == st_top) {
474
14.1k
        if (key == "qpdf") {
475
9.39k
            this->saw_qpdf = true;
476
9.39k
            if (!value.isArray()) {
477
11
                QTC::TC("qpdf", "QPDF_json qpdf not array");
478
11
                error(value.getStart(), "\"qpdf\" must be an array");
479
9.38k
            } else {
480
9.38k
                next_state = st_qpdf;
481
9.38k
            }
482
9.39k
        } else {
483
            // Ignore all other fields.
484
4.74k
            QTC::TC("qpdf", "QPDF_json ignoring unknown top-level key");
485
4.74k
        }
486
311k
    } else if (state == st_qpdf_meta) {
487
3.49k
        if (key == "pdfversion") {
488
299
            this->saw_pdf_version = true;
489
299
            std::string v;
490
299
            bool okay = false;
491
299
            if (value.getString(v)) {
492
290
                std::string version;
493
290
                char const* p = v.c_str();
494
290
                if (QPDF::validatePDFVersion(p, version) && (*p == '\0')) {
495
180
                    this->pdf.m->pdf_version = version;
496
180
                    okay = true;
497
180
                }
498
290
            }
499
299
            if (!okay) {
500
119
                QTC::TC("qpdf", "QPDF_json bad pdf version");
501
119
                error(value.getStart(), "invalid PDF version (must be \"x.y\")");
502
119
            }
503
3.19k
        } else if (key == "jsonversion") {
504
422
            this->saw_json_version = true;
505
422
            std::string v;
506
422
            bool okay = false;
507
422
            if (value.getNumber(v)) {
508
414
                std::string version;
509
414
                if (QUtil::string_to_int(v.c_str()) == 2) {
510
127
                    okay = true;
511
127
                }
512
414
            }
513
422
            if (!okay) {
514
173
                QTC::TC("qpdf", "QPDF_json bad json version");
515
173
                error(value.getStart(), "invalid JSON version (must be numeric value 2)");
516
173
            }
517
2.76k
        } else if (key == "pushedinheritedpageresources") {
518
66
            bool v;
519
66
            if (value.getBool(v)) {
520
64
                if (!this->must_be_complete && v) {
521
0
                    this->pdf.pushInheritedAttributesToPage();
522
0
                }
523
64
            } else {
524
2
                QTC::TC("qpdf", "QPDF_json bad pushedinheritedpageresources");
525
2
                error(value.getStart(), "pushedinheritedpageresources must be a boolean");
526
2
            }
527
2.70k
        } else if (key == "calledgetallpages") {
528
80
            bool v;
529
80
            if (value.getBool(v)) {
530
71
                if (!this->must_be_complete && v) {
531
0
                    this->pdf.getAllPages();
532
0
                }
533
71
            } else {
534
9
                QTC::TC("qpdf", "QPDF_json bad calledgetallpages");
535
9
                error(value.getStart(), "calledgetallpages must be a boolean");
536
9
            }
537
2.62k
        } else {
538
            // ignore unknown keys for forward compatibility and to skip keys we don't care about
539
            // like "maxobjectid".
540
2.62k
            QTC::TC("qpdf", "QPDF_json ignore second-level key");
541
2.62k
        }
542
308k
    } else if (state == st_objects) {
543
28.1k
        int obj = 0;
544
28.1k
        int gen = 0;
545
28.1k
        if (key == "trailer") {
546
1.60k
            this->saw_trailer = true;
547
1.60k
            this->cur_object = "trailer";
548
1.60k
            setNextStateIfDictionary(key, value, st_trailer);
549
26.5k
        } else if (is_obj_key(key, obj, gen)) {
550
23.5k
            this->cur_object = key;
551
23.5k
            if (setNextStateIfDictionary(key, value, st_object_top)) {
552
23.2k
                next_obj = pdf.getObjectForJSON(obj, gen);
553
23.2k
            }
554
23.5k
        } else {
555
2.95k
            QTC::TC("qpdf", "QPDF_json bad object key");
556
2.95k
            error(value.getStart(), "object key should be \"trailer\" or \"obj:n n R\"");
557
2.95k
        }
558
280k
    } else if (state == st_object_top) {
559
24.6k
        if (stack.empty()) {
560
0
            throw std::logic_error("stack empty in st_object_top");
561
0
        }
562
24.6k
        auto& tos = stack.back();
563
24.6k
        if (!tos.object.isInitialized()) {
564
0
            throw std::logic_error("current object uninitialized in st_object_top");
565
0
        }
566
24.6k
        if (key == "value") {
567
            // Don't use setNextStateIfDictionary since this can have any type.
568
14.5k
            this->saw_value = true;
569
14.5k
            replaceObject(makeObject(value), value);
570
14.5k
            next_state = st_object;
571
14.5k
        } else if (key == "stream") {
572
8.02k
            this->saw_stream = true;
573
8.02k
            if (setNextStateIfDictionary(key, value, st_stream)) {
574
7.99k
                this->this_stream_needs_data = false;
575
7.99k
                if (tos.object.isStream()) {
576
186
                    QTC::TC("qpdf", "QPDF_json updating existing stream");
577
7.80k
                } else {
578
7.80k
                    this->this_stream_needs_data = true;
579
7.80k
                    replaceObject(pdf.reserveStream(tos.object.getObjGen()), value);
580
7.80k
                }
581
7.99k
                next_obj = tos.object;
582
7.99k
            } else {
583
                // Error message already given above
584
31
                QTC::TC("qpdf", "QPDF_json stream not a dictionary");
585
31
            }
586
8.02k
        } else {
587
            // Ignore unknown keys for forward compatibility
588
2.03k
            QTC::TC("qpdf", "QPDF_json ignore unknown key in object_top");
589
2.03k
        }
590
255k
    } else if (state == st_trailer) {
591
2.22k
        if (key == "value") {
592
1.55k
            this->saw_value = true;
593
            // The trailer must be a dictionary, so we can use setNextStateIfDictionary.
594
1.55k
            if (setNextStateIfDictionary("trailer.value", value, st_object)) {
595
1.55k
                this->pdf.m->trailer = makeObject(value);
596
1.55k
                setObjectDescription(this->pdf.m->trailer, value);
597
1.55k
            }
598
1.55k
        } else if (key == "stream") {
599
            // Don't need to set saw_stream here since there's already an error.
600
2
            QTC::TC("qpdf", "QPDF_json trailer stream");
601
2
            error(value.getStart(), "the trailer may not be a stream");
602
667
        } else {
603
            // Ignore unknown keys for forward compatibility
604
667
            QTC::TC("qpdf", "QPDF_json ignore unknown key in trailer");
605
667
        }
606
253k
    } else if (state == st_stream) {
607
14.5k
        if (stack.empty()) {
608
0
            throw std::logic_error("stack empty in st_stream");
609
0
        }
610
14.5k
        auto& tos = stack.back();
611
14.5k
        if (!tos.object.isStream()) {
612
0
            throw std::logic_error("current object is not stream in st_stream");
613
0
        }
614
14.5k
        auto uninitialized = QPDFObjectHandle();
615
14.5k
        if (key == "dict") {
616
6.24k
            this->saw_dict = true;
617
6.24k
            if (setNextStateIfDictionary("stream.dict", value, st_object)) {
618
6.19k
                tos.object.replaceDict(makeObject(value));
619
6.19k
            } else {
620
                // An error had already been given by setNextStateIfDictionary
621
46
                QTC::TC("qpdf", "QPDF_json stream dict not dict");
622
46
            }
623
8.31k
        } else if (key == "data") {
624
6.65k
            this->saw_data = true;
625
6.65k
            std::string v;
626
6.65k
            if (!value.getString(v)) {
627
151
                QTC::TC("qpdf", "QPDF_json stream data not string");
628
151
                error(value.getStart(), "\"stream.data\" must be a string");
629
151
                tos.object.replaceStreamData("", uninitialized, uninitialized);
630
6.50k
            } else {
631
                // The range includes the quotes.
632
6.50k
                auto start = value.getStart() + 1;
633
6.50k
                auto end = value.getEnd() - 1;
634
6.50k
                if (end < start) {
635
0
                    throw std::logic_error("QPDF_json: JSON string length < 0");
636
0
                }
637
6.50k
                tos.object.replaceStreamData(
638
6.50k
                    provide_data(is, start, end), uninitialized, uninitialized);
639
6.50k
            }
640
6.65k
        } else if (key == "datafile") {
641
429
            this->saw_datafile = true;
642
429
            std::string filename;
643
429
            if (!value.getString(filename)) {
644
100
                QTC::TC("qpdf", "QPDF_json stream datafile not string");
645
100
                error(
646
100
                    value.getStart(),
647
100
                    "\"stream.datafile\" must be a string containing a file name");
648
100
                tos.object.replaceStreamData("", uninitialized, uninitialized);
649
329
            } else {
650
329
                tos.object.replaceStreamData(
651
329
                    QUtil::file_provider(filename), uninitialized, uninitialized);
652
329
            }
653
1.23k
        } else {
654
            // Ignore unknown keys for forward compatibility.
655
1.23k
            QTC::TC("qpdf", "QPDF_json ignore unknown key in stream");
656
1.23k
        }
657
238k
    } else if (state == st_object) {
658
238k
        if (stack.empty()) {
659
0
            throw std::logic_error("stack empty in st_object");
660
0
        }
661
238k
        auto& tos = stack.back();
662
238k
        auto dict = tos.object;
663
238k
        if (dict.isStream()) {
664
0
            dict = dict.getDict();
665
0
        }
666
238k
        if (!dict.isDictionary()) {
667
0
            throw std::logic_error(
668
0
                "current object is not stream or dictionary in st_object dictionary item");
669
0
        }
670
238k
        dict.replaceKey(
671
238k
            is_pdf_name(key) ? QPDFObjectHandle::parse(key.substr(2)).getName() : key,
672
238k
            makeObject(value));
673
238k
    } else {
674
0
        throw std::logic_error("QPDF_json: unknown state " + std::to_string(state));
675
0
    }
676
338k
    return true;
677
338k
}
678
679
bool
680
QPDF::JSONReactor::arrayItem(JSON const& value)
681
6.12M
{
682
6.12M
    if (stack.empty()) {
683
0
        throw std::logic_error("stack is empty in arrayItem");
684
0
    }
685
6.12M
    next_state = st_ignore;
686
6.12M
    auto state = stack.back().state;
687
6.12M
    if (state == st_qpdf) {
688
42.8k
        if (!this->saw_qpdf_meta) {
689
9.37k
            this->saw_qpdf_meta = true;
690
9.37k
            setNextStateIfDictionary("qpdf[0]", value, st_qpdf_meta);
691
33.4k
        } else if (!this->saw_objects) {
692
8.53k
            this->saw_objects = true;
693
8.53k
            setNextStateIfDictionary("qpdf[1]", value, st_objects);
694
24.9k
        } else {
695
24.9k
            QTC::TC("qpdf", "QPDF_json more than two qpdf elements");
696
24.9k
            error(value.getStart(), "\"qpdf\" must have two elements");
697
24.9k
        }
698
6.08M
    } else if (state == st_object) {
699
5.54M
        stack.back().object.appendItem(makeObject(value));
700
5.54M
    }
701
6.12M
    return true;
702
6.12M
}
703
704
void
705
QPDF::JSONReactor::setObjectDescription(QPDFObjectHandle& oh, JSON const& value)
706
5.18M
{
707
5.18M
    auto j_descr = std::get<QPDFValue::JSON_Descr>(*descr);
708
5.18M
    if (j_descr.object != cur_object) {
709
23.1k
        descr = std::make_shared<QPDFValue::Description>(
710
23.1k
            QPDFValue::JSON_Descr(j_descr.input, cur_object));
711
23.1k
    }
712
713
5.18M
    oh.getObjectPtr()->setDescription(&pdf, descr, value.getStart());
714
5.18M
}
715
716
QPDFObjectHandle
717
QPDF::JSONReactor::makeObject(JSON const& value)
718
5.80M
{
719
5.80M
    QPDFObjectHandle result;
720
5.80M
    std::string str_v;
721
5.80M
    bool bool_v = false;
722
5.80M
    if (value.isDictionary()) {
723
54.6k
        result = QPDFObjectHandle::newDictionary();
724
54.6k
        next_obj = result;
725
54.6k
        next_state = st_object;
726
5.75M
    } else if (value.isArray()) {
727
37.7k
        result = QPDFObjectHandle::newArray();
728
37.7k
        next_obj = result;
729
37.7k
        next_state = st_object;
730
5.71M
    } else if (value.isNull()) {
731
1.52k
        result = QPDFObjectHandle::newNull();
732
5.71M
    } else if (value.getBool(bool_v)) {
733
803
        result = QPDFObjectHandle::newBool(bool_v);
734
5.71M
    } else if (value.getNumber(str_v)) {
735
4.95M
        if (QUtil::is_long_long(str_v.c_str())) {
736
4.94M
            result = QPDFObjectHandle::newInteger(QUtil::string_to_ll(str_v.c_str()));
737
4.94M
        } else {
738
            // JSON allows scientific notation, but PDF does not.
739
10.4k
            if (str_v.find('e') != std::string::npos || str_v.find('E') != std::string::npos) {
740
4.49k
                try {
741
4.49k
                    auto v = std::stod(str_v);
742
4.49k
                    str_v = QUtil::double_to_string(v);
743
4.49k
                } catch (std::exception&) {
744
                    // Keep it as it was
745
487
                }
746
4.49k
            }
747
10.4k
            result = QPDFObjectHandle::newReal(str_v);
748
10.4k
        }
749
4.95M
    } else if (value.getString(str_v)) {
750
757k
        int obj = 0;
751
757k
        int gen = 0;
752
757k
        std::string str;
753
757k
        if (is_indirect_object(str_v, obj, gen)) {
754
629k
            result = pdf.getObjectForJSON(obj, gen);
755
629k
        } else if (is_unicode_string(str_v, str)) {
756
10.7k
            result = QPDFObjectHandle::newUnicodeString(str);
757
117k
        } else if (is_binary_string(str_v, str)) {
758
3.06k
            result = QPDFObjectHandle::newString(QUtil::hex_decode(str));
759
114k
        } else if (is_name(str_v)) {
760
44.6k
            result = QPDFObjectHandle::newName(str_v);
761
69.7k
        } else if (is_pdf_name(str_v)) {
762
15.5k
            result = QPDFObjectHandle::parse(str_v.substr(2));
763
54.2k
        } else {
764
54.2k
            QTC::TC("qpdf", "QPDF_json unrecognized string value");
765
54.2k
            error(value.getStart(), "unrecognized string value");
766
54.2k
            result = QPDFObjectHandle::newNull();
767
54.2k
        }
768
757k
    }
769
5.80M
    if (!result.isInitialized()) {
770
0
        throw std::logic_error("JSONReactor::makeObject didn't initialize the object");
771
0
    }
772
773
5.80M
    if (!result.hasObjectDescription()) {
774
5.16M
        setObjectDescription(result, value);
775
5.16M
    }
776
5.80M
    return result;
777
5.80M
}
778
779
void
780
QPDF::createFromJSON(std::string const& json_file)
781
0
{
782
0
    createFromJSON(std::make_shared<FileInputSource>(json_file.c_str()));
783
0
}
784
785
void
786
QPDF::createFromJSON(std::shared_ptr<InputSource> is)
787
13.2k
{
788
13.2k
    processMemoryFile(is->getName().c_str(), JSON_PDF, strlen(JSON_PDF));
789
13.2k
    importJSON(is, true);
790
13.2k
}
791
792
void
793
QPDF::updateFromJSON(std::string const& json_file)
794
0
{
795
0
    updateFromJSON(std::make_shared<FileInputSource>(json_file.c_str()));
796
0
}
797
798
void
799
QPDF::updateFromJSON(std::shared_ptr<InputSource> is)
800
0
{
801
0
    importJSON(is, false);
802
0
}
803
804
void
805
QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete)
806
13.2k
{
807
13.2k
    JSONReactor reactor(*this, is, must_be_complete);
808
13.2k
    try {
809
13.2k
        JSON::parse(*is, &reactor);
810
13.2k
    } catch (std::runtime_error& e) {
811
13.1k
        throw std::runtime_error(is->getName() + ": " + e.what());
812
13.1k
    }
813
88
    if (reactor.anyErrors()) {
814
71
        throw std::runtime_error(is->getName() + ": errors found in JSON");
815
71
    }
816
88
}
817
818
void
819
writeJSONStreamFile(
820
    int version,
821
    JSON::Writer& jw,
822
    QPDF_Stream& stream,
823
    int id,
824
    qpdf_stream_decode_level_e decode_level,
825
    std::string const& file_prefix)
826
0
{
827
0
    auto filename = file_prefix + "-" + std::to_string(id);
828
0
    auto* f = QUtil::safe_fopen(filename.c_str(), "wb");
829
0
    Pl_StdioFile f_pl{"stream data", f};
830
0
    stream.writeStreamJSON(version, jw, qpdf_sj_file, decode_level, &f_pl, filename);
831
0
    f_pl.finish();
832
0
    fclose(f);
833
0
}
834
835
void
836
QPDF::writeJSON(
837
    int version,
838
    Pipeline* p,
839
    qpdf_stream_decode_level_e decode_level,
840
    qpdf_json_stream_data_e json_stream_data,
841
    std::string const& file_prefix,
842
    std::set<std::string> wanted_objects)
843
0
{
844
0
    bool first = true;
845
0
    writeJSON(version, p, true, first, decode_level, json_stream_data, file_prefix, wanted_objects);
846
0
}
847
848
void
849
QPDF::writeJSON(
850
    int version,
851
    Pipeline* p,
852
    bool complete,
853
    bool& first_key,
854
    qpdf_stream_decode_level_e decode_level,
855
    qpdf_json_stream_data_e json_stream_data,
856
    std::string const& file_prefix,
857
    std::set<std::string> wanted_objects)
858
0
{
859
0
    if (version != 2) {
860
0
        throw std::runtime_error("QPDF::writeJSON: only version 2 is supported");
861
0
    }
862
0
    JSON::Writer jw{p, 4};
863
0
    if (complete) {
864
0
        jw << "{";
865
0
    } else if (!first_key) {
866
0
        jw << ",";
867
0
    }
868
0
    first_key = false;
869
870
    /* clang-format off */
871
0
    jw << "\n"
872
0
          "  \"qpdf\": [\n"
873
0
          "    {\n"
874
0
          "      \"jsonversion\": " << std::to_string(version) << ",\n"
875
0
          "      \"pdfversion\": \"" << getPDFVersion() << "\",\n"
876
0
          "      \"pushedinheritedpageresources\": " <<  (everPushedInheritedAttributesToPages() ? "true" : "false") << ",\n"
877
0
          "      \"calledgetallpages\": " <<  (everCalledGetAllPages() ? "true" : "false") << ",\n"
878
0
          "      \"maxobjectid\": " <<  std::to_string(getObjectCount()) << "\n"
879
0
          "    },\n"
880
0
          "    {";
881
    /* clang-format on */
882
883
0
    bool all_objects = wanted_objects.empty();
884
0
    bool first = true;
885
0
    for (auto& obj: getAllObjects()) {
886
0
        auto const og = obj.getObjGen();
887
0
        std::string key = "obj:" + og.unparse(' ') + " R";
888
0
        if (all_objects || wanted_objects.count(key)) {
889
0
            if (first) {
890
0
                jw << "\n      \"" << key;
891
0
                first = false;
892
0
            } else {
893
0
                jw << "\n      },\n      \"" << key;
894
0
            }
895
0
            if (auto* stream = obj.getObjectPtr()->as<QPDF_Stream>()) {
896
0
                jw << "\": {\n        \"stream\": ";
897
0
                if (json_stream_data == qpdf_sj_file) {
898
0
                    writeJSONStreamFile(
899
0
                        version, jw, *stream, og.getObj(), decode_level, file_prefix);
900
0
                } else {
901
0
                    stream->writeStreamJSON(
902
0
                        version, jw, json_stream_data, decode_level, nullptr, "");
903
0
                }
904
0
            } else {
905
0
                jw << "\": {\n        \"value\": ";
906
0
                obj.writeJSON(version, jw, true);
907
0
            }
908
0
        }
909
0
    }
910
0
    if (all_objects || wanted_objects.count("trailer")) {
911
0
        if (!first) {
912
0
            jw << "\n      },";
913
0
        }
914
0
        jw << "\n      \"trailer\": {\n        \"value\": ";
915
0
        getTrailer().writeJSON(version, jw, true);
916
0
        first = false;
917
0
    }
918
0
    if (!first) {
919
0
        jw << "\n      }";
920
0
    }
921
    /* clang-format off */
922
0
    jw << "\n"
923
0
          "    }\n"
924
0
          "  ]";
925
    /* clang-format on */
926
0
    if (complete) {
927
0
        jw << "\n}\n";
928
0
        p->finish();
929
0
    }
930
0
}