Coverage Report

Created: 2025-08-29 06:56

/src/qpdf/include/qpdf/JSON.hh
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) 2005-2021 Jay Berkenbilt
2
// Copyright (c) 2022-2025 Jay Berkenbilt and Manfred Holger
3
//
4
// This file is part of qpdf.
5
//
6
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
7
// in compliance with the License. You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software distributed under the License
12
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
13
// or implied. See the License for the specific language governing permissions and limitations under
14
// the License.
15
//
16
// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
17
// License. At your option, you may continue to consider qpdf to be licensed under those terms.
18
// Please see the manual for additional information.
19
20
#ifndef JSON_HH
21
#define JSON_HH
22
23
#include <qpdf/DLL.h>
24
#include <qpdf/Types.h>
25
26
#include <functional>
27
#include <list>
28
#include <map>
29
#include <memory>
30
#include <set>
31
#include <string>
32
#include <vector>
33
34
class Pipeline;
35
class InputSource;
36
37
// This is a simple JSON serializer and parser, primarily designed for serializing QPDF Objects as
38
// JSON. While it may work as a general-purpose JSON parser/serializer, there are better options.
39
// JSON objects contain their data as smart pointers. When one JSON object is added to another, this
40
// pointer is copied. This means you can create temporary JSON objects on the stack, add them to
41
// other objects, and let them go out of scope safely. It also means that if a JSON object is added
42
// in more than one place, all copies share the underlying data. This makes them similar in
43
// structure and behavior to QPDFObjectHandle and may feel natural within the QPDF codebase, but it
44
// is also a good reason not to use this as a general-purpose JSON package.
45
class JSON
46
{
47
  public:
48
    static int constexpr LATEST = 2;
49
50
0
    JSON() = default;
51
52
    QPDF_DLL
53
    std::string unparse() const;
54
55
    // Write the JSON object through a pipeline. The `depth` parameter specifies how deeply nested
56
    // this is in another JSON structure, which makes it possible to write clean-looking JSON
57
    // incrementally.
58
    QPDF_DLL
59
    void write(Pipeline*, size_t depth = 0) const;
60
61
    // Helper methods for writing JSON incrementally.
62
    //
63
    // "first" -- Several methods take a `bool& first` parameter. The open methods always set it to
64
    // true, and the methods to output items always set it to false. This way, the item and close
65
    // methods can always know whether or not a first item is being written. The intended mode of
66
    // operation is to start with a new `bool first = true` each time a new container is opened and
67
    // to pass that `first` through to all the methods that are called to add top-level items to the
68
    // container as well as to close the container. This lets the JSON object use it to keep track
69
    // of when it's writing a first object and when it's not. If incrementally writing multiple
70
    // levels of depth, a new `first` should be used for each new container that is opened.
71
    //
72
    // "depth" -- Indicate the level of depth. This is used for consistent indentation. When writing
73
    // incrementally, whenever you call a method to add an item to a container, the value of `depth`
74
    // should be one more than whatever value is passed to the container open and close methods.
75
76
    // Open methods ignore the value of first and set it to false
77
    QPDF_DLL
78
    static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0);
79
    QPDF_DLL
80
    static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0);
81
    // Close methods don't modify first. A true value indicates that we are closing an empty object.
82
    QPDF_DLL
83
    static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0);
84
    QPDF_DLL
85
    static void writeArrayClose(Pipeline*, bool first, size_t depth = 0);
86
    // The item methods use the value of first to determine if this is the first item and always set
87
    // it to false.
88
    QPDF_DLL
89
    static void writeDictionaryItem(
90
        Pipeline*, bool& first, std::string const& key, JSON const& value, size_t depth = 0);
91
    // Write just the key of a new dictionary item, useful if writing nested structures. Calls
92
    // writeNext.
93
    QPDF_DLL
94
    static void
95
    writeDictionaryKey(Pipeline* p, bool& first, std::string const& key, size_t depth = 0);
96
    QPDF_DLL
97
    static void writeArrayItem(Pipeline*, bool& first, JSON const& element, size_t depth = 0);
98
    // If writing nested structures incrementally, call writeNext before opening a new array or
99
    // container in the midst of an existing one. The `first` you pass to writeNext should be the
100
    // one for the parent object. The depth should be the one for the child object. Then start a new
101
    // `first` for the nested item. Note that writeDictionaryKey and writeArrayItem call writeNext
102
    // for you, so this is most important when writing subsequent items or container openers to an
103
    // array.
104
    QPDF_DLL
105
    static void writeNext(Pipeline* p, bool& first, size_t depth = 0);
106
107
    // The JSON spec calls dictionaries "objects", but that creates too much confusion when
108
    // referring to instances of the JSON class.
109
    QPDF_DLL
110
    static JSON makeDictionary();
111
    // addDictionaryMember returns the newly added item.
112
    QPDF_DLL
113
    JSON addDictionaryMember(std::string const& key, JSON const&);
114
    QPDF_DLL
115
    static JSON makeArray();
116
    // addArrayElement returns the newly added item.
117
    QPDF_DLL
118
    JSON addArrayElement(JSON const&);
119
    QPDF_DLL
120
    static JSON makeString(std::string const& utf8);
121
    QPDF_DLL
122
    static JSON makeInt(long long int value);
123
    QPDF_DLL
124
    static JSON makeReal(double value);
125
    QPDF_DLL
126
    static JSON makeNumber(std::string const& encoded);
127
    QPDF_DLL
128
    static JSON makeBool(bool value);
129
    QPDF_DLL
130
    static JSON makeNull();
131
132
    // A blob serializes as a string. The function will be called by JSON with a pipeline and should
133
    // write binary data to the pipeline but not call finish(). JSON will call finish() at the right
134
    // time.
135
    QPDF_DLL
136
    static JSON makeBlob(std::function<void(Pipeline*)>);
137
138
    QPDF_DLL
139
    bool isArray() const;
140
141
    QPDF_DLL
142
    bool isDictionary() const;
143
144
    // Accessors. Accessor behavior:
145
    //
146
    // - If argument is wrong type, including null, return false
147
    // - If argument is right type, return true and initialize the value
148
149
    QPDF_DLL
150
    bool getString(std::string& utf8) const;
151
    QPDF_DLL
152
    bool getNumber(std::string& value) const;
153
    QPDF_DLL
154
    bool getBool(bool& value) const;
155
    QPDF_DLL
156
    bool isNull() const;
157
    QPDF_DLL
158
    JSON getDictItem(std::string const& key) const;
159
    QPDF_DLL
160
    bool forEachDictItem(std::function<void(std::string const& key, JSON value)> fn) const;
161
    QPDF_DLL
162
    bool forEachArrayItem(std::function<void(JSON value)> fn) const;
163
164
    // Check this JSON object against a "schema". This is not a schema according to any standard.
165
    // It's just a template of what the JSON is supposed to contain. The checking does the
166
    // following:
167
    //
168
    //   * The schema is a nested structure containing dictionaries, single-element arrays, and
169
    //     strings only.
170
    //   * Recursively walk the schema. In the items below, "schema object" refers to an object in
171
    //     the schema, and "checked object" refers to the corresponding part of the object being
172
    //     checked.
173
    //   * If the schema object is a dictionary, the checked object must have a dictionary in the
174
    //     same place with the same keys. If flags contains f_optional, a key in the schema does not
175
    //     have to be present in the object. Otherwise, all keys have to be present. Any key in the
176
    //     object must be present in the schema.
177
    //   * If the schema object is an array of length 1, the checked object may either be a single
178
    //     item or an array of items. The single item or each element of the checked object's
179
    //     array is validated against the single element of the schema's array. The rationale behind
180
    //     this logic is that a single element may appear wherever the schema allows a
181
    //     variable-length array. This makes it possible to start allowing an array in the future
182
    //     where a single element was previously required without breaking backward compatibility.
183
    //   * If the schema object is an array of length > 1, the checked object must be an array of
184
    //     the same length. In this case, each element of the checked object array is validated
185
    //     against the corresponding element of the schema array.
186
    //   * Otherwise, the value must be a string whose value is a description of the object's
187
    //     corresponding value, which may have any type.
188
    //
189
    // QPDF's JSON output conforms to certain strict compatibility rules as discussed in the manual.
190
    // The idea is that a JSON structure created manually in qpdf.cc doubles as both JSON help
191
    // information and a schema for validating the JSON that qpdf generates. Any discrepancies are a
192
    // bug in qpdf.
193
    //
194
    // Flags is a bitwise or of values from check_flags_e.
195
    enum check_flags_e {
196
        f_none = 0,
197
        f_optional = 1 << 0,
198
    };
199
    QPDF_DLL
200
    bool checkSchema(JSON schema, unsigned long flags, std::list<std::string>& errors);
201
202
    // Same as passing 0 for flags
203
    QPDF_DLL
204
    bool checkSchema(JSON schema, std::list<std::string>& errors);
205
206
    // A pointer to a Reactor class can be passed to parse, which will enable the caller to react
207
    // to incremental events in the construction of the JSON object. This makes it possible to
208
    // implement SAX-like handling of very large JSON objects.
209
    class QPDF_DLL_CLASS Reactor
210
    {
211
      public:
212
        virtual ~Reactor() = default;
213
214
        // The start/end methods are called when parsing of a dictionary or array is started or
215
        // ended. The item methods are called when an item is added to a dictionary or array. When
216
        // adding a container to another container, the item method is called with an empty
217
        // container before the lower container's start method is called. See important notes in
218
        // "Item methods" below.
219
220
        // During parsing of a JSON string, the parser is operating on a single object at a time.
221
        // When a dictionary or array is started, a new context begins, and when that dictionary or
222
        // array is ended, the previous context is resumed. So, for
223
        // example, if you have `{"a": [1]}`, you will receive the
224
        // following method calls
225
        //
226
        // dictionaryStart -- current object is the top-level dictionary
227
        // dictionaryItem  -- called with "a" and an empty array
228
        // arrayStart      -- current object is the array
229
        // arrayItem       -- called with the "1" object
230
        // containerEnd    -- now current object is the dictionary again
231
        // containerEnd    -- current object is undefined
232
        //
233
        // If the top-level item in a JSON string is a scalar, the topLevelScalar() method will be
234
        // called. No argument is passed since the object is the same as what is returned by
235
        // parse().
236
237
        QPDF_DLL
238
        virtual void dictionaryStart() = 0;
239
        QPDF_DLL
240
        virtual void arrayStart() = 0;
241
        QPDF_DLL
242
        virtual void containerEnd(JSON const& value) = 0;
243
        QPDF_DLL
244
        virtual void topLevelScalar() = 0;
245
246
        // Item methods:
247
        //
248
        // The return value of the item methods indicate whether the item has been "consumed". If
249
        // the item method returns true, then the item will not be added to the containing JSON
250
        // object. This is what allows arbitrarily large JSON objects
251
        // to be parsed and not have to be kept in memory.
252
        //
253
        // NOTE: When a dictionary or an array is added to a container, the dictionaryItem or
254
        // arrayItem method is called when the child item's start delimiter is encountered, so the
255
        // JSON object passed in at that time will always be in its initial, empty state.
256
        // Additionally, the child item's start method is not called until after the parent item's
257
        // item method is called. This makes it possible to keep track of the current depth level by
258
        // incrementing level on start methods and decrementing on end methods.
259
260
        QPDF_DLL
261
        virtual bool dictionaryItem(std::string const& key, JSON const& value) = 0;
262
        QPDF_DLL
263
        virtual bool arrayItem(JSON const& value) = 0;
264
    };
265
266
    // Create a JSON object from a string.
267
    QPDF_DLL
268
    static JSON parse(std::string const&);
269
    // Create a JSON object from an input source. See above for information about how to use the
270
    // Reactor.
271
    QPDF_DLL
272
    static JSON parse(InputSource&, Reactor* reactor = nullptr);
273
274
    // parse calls setOffsets to set the inclusive start and non-inclusive end offsets of an object
275
    // relative to its input string. Otherwise, both values are 0.
276
    QPDF_DLL
277
    void setStart(qpdf_offset_t);
278
    QPDF_DLL
279
    void setEnd(qpdf_offset_t);
280
    QPDF_DLL
281
    qpdf_offset_t getStart() const;
282
    QPDF_DLL
283
    qpdf_offset_t getEnd() const;
284
285
    // The following class does not form part of the public API and is for internal use only.
286
287
    class Writer;
288
289
  private:
290
    static void writeClose(Pipeline* p, bool first, size_t depth, char const* delimeter);
291
292
    enum value_type_e {
293
        vt_none,
294
        vt_dictionary,
295
        vt_array,
296
        vt_string,
297
        vt_number,
298
        vt_bool,
299
        vt_null,
300
        vt_blob,
301
    };
302
303
    struct JSON_value
304
    {
305
        JSON_value(value_type_e type_code) :
306
0
            type_code(type_code)
307
0
        {
308
0
        }
309
0
        virtual ~JSON_value() = default;
310
        virtual void write(Pipeline*, size_t depth) const = 0;
311
        const value_type_e type_code{vt_none};
312
    };
313
    struct JSON_dictionary: public JSON_value
314
    {
315
        JSON_dictionary() :
316
0
            JSON_value(vt_dictionary)
317
0
        {
318
0
        }
319
0
        ~JSON_dictionary() override = default;
320
        void write(Pipeline*, size_t depth) const override;
321
        std::map<std::string, JSON> members;
322
    };
323
    struct JSON_array;
324
    struct JSON_string: public JSON_value
325
    {
326
        JSON_string(std::string const& utf8);
327
0
        ~JSON_string() override = default;
328
        void write(Pipeline*, size_t depth) const override;
329
        std::string utf8;
330
    };
331
    struct JSON_number: public JSON_value
332
    {
333
        JSON_number(long long val);
334
        JSON_number(double val);
335
        JSON_number(std::string const& val);
336
0
        ~JSON_number() override = default;
337
        void write(Pipeline*, size_t depth) const override;
338
        std::string encoded;
339
    };
340
    struct JSON_bool: public JSON_value
341
    {
342
        JSON_bool(bool val);
343
        ~JSON_bool() override = default;
344
        void write(Pipeline*, size_t depth) const override;
345
        bool value;
346
    };
347
    struct JSON_null: public JSON_value
348
    {
349
        JSON_null() :
350
0
            JSON_value(vt_null)
351
0
        {
352
0
        }
353
        ~JSON_null() override = default;
354
        void write(Pipeline*, size_t depth) const override;
355
    };
356
    struct JSON_blob: public JSON_value
357
    {
358
        JSON_blob(std::function<void(Pipeline*)> fn);
359
0
        ~JSON_blob() override = default;
360
        void write(Pipeline*, size_t depth) const override;
361
        std::function<void(Pipeline*)> fn;
362
    };
363
364
    JSON(std::unique_ptr<JSON_value>);
365
366
    static bool checkSchemaInternal(
367
        JSON_value* this_v,
368
        JSON_value* sch_v,
369
        unsigned long flags,
370
        std::list<std::string>& errors,
371
        std::string prefix);
372
373
    class Members
374
    {
375
        friend class JSON;
376
377
      public:
378
0
        ~Members() = default;
379
380
      private:
381
        Members(std::unique_ptr<JSON_value>);
382
        Members(Members const&) = delete;
383
384
        std::unique_ptr<JSON_value> value;
385
        // start and end are only populated for objects created by parse
386
        qpdf_offset_t start{0};
387
        qpdf_offset_t end{0};
388
    };
389
390
    std::shared_ptr<Members> m;
391
};
392
393
struct JSON::JSON_array: public JSON_value
394
{
395
    JSON_array() :
396
0
        JSON_value(vt_array)
397
0
    {
398
0
    }
399
0
    ~JSON_array() override = default;
400
    void write(Pipeline*, size_t depth) const override;
401
    std::vector<JSON> elements;
402
};
403
404
#endif // JSON_HH