/src/qpdf/include/qpdf/JSON.hh
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2005-2021 Jay Berkenbilt |
2 | | // Copyright (c) 2022-2025 Jay Berkenbilt and Manfred Holger |
3 | | // |
4 | | // This file is part of qpdf. |
5 | | // |
6 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
7 | | // in compliance with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
12 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
13 | | // or implied. See the License for the specific language governing permissions and limitations under |
14 | | // the License. |
15 | | // |
16 | | // Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic |
17 | | // License. At your option, you may continue to consider qpdf to be licensed under those terms. |
18 | | // Please see the manual for additional information. |
19 | | |
20 | | #ifndef JSON_HH |
21 | | #define JSON_HH |
22 | | |
23 | | #include <qpdf/DLL.h> |
24 | | #include <qpdf/Types.h> |
25 | | |
26 | | #include <functional> |
27 | | #include <list> |
28 | | #include <map> |
29 | | #include <memory> |
30 | | #include <set> |
31 | | #include <string> |
32 | | #include <vector> |
33 | | |
34 | | class Pipeline; |
35 | | class InputSource; |
36 | | |
37 | | // This is a simple JSON serializer and parser, primarily designed for serializing QPDF Objects as |
38 | | // JSON. While it may work as a general-purpose JSON parser/serializer, there are better options. |
39 | | // JSON objects contain their data as smart pointers. When one JSON object is added to another, this |
40 | | // pointer is copied. This means you can create temporary JSON objects on the stack, add them to |
41 | | // other objects, and let them go out of scope safely. It also means that if a JSON object is added |
42 | | // in more than one place, all copies share the underlying data. This makes them similar in |
43 | | // structure and behavior to QPDFObjectHandle and may feel natural within the QPDF codebase, but it |
44 | | // is also a good reason not to use this as a general-purpose JSON package. |
45 | | class JSON |
46 | | { |
47 | | public: |
48 | | static int constexpr LATEST = 2; |
49 | | |
50 | 1.44M | JSON() = default; |
51 | | |
52 | | QPDF_DLL |
53 | | std::string unparse() const; |
54 | | |
55 | | // Write the JSON object through a pipeline. The `depth` parameter specifies how deeply nested |
56 | | // this is in another JSON structure, which makes it possible to write clean-looking JSON |
57 | | // incrementally. |
58 | | QPDF_DLL |
59 | | void write(Pipeline*, size_t depth = 0) const; |
60 | | |
61 | | // Helper methods for writing JSON incrementally. |
62 | | // |
63 | | // "first" -- Several methods take a `bool& first` parameter. The open methods always set it to |
64 | | // true, and the methods to output items always set it to false. This way, the item and close |
65 | | // methods can always know whether or not a first item is being written. The intended mode of |
66 | | // operation is to start with a new `bool first = true` each time a new container is opened and |
67 | | // to pass that `first` through to all the methods that are called to add top-level items to the |
68 | | // container as well as to close the container. This lets the JSON object use it to keep track |
69 | | // of when it's writing a first object and when it's not. If incrementally writing multiple |
70 | | // levels of depth, a new `first` should be used for each new container that is opened. |
71 | | // |
72 | | // "depth" -- Indicate the level of depth. This is used for consistent indentation. When writing |
73 | | // incrementally, whenever you call a method to add an item to a container, the value of `depth` |
74 | | // should be one more than whatever value is passed to the container open and close methods. |
75 | | |
76 | | // Open methods ignore the value of first and set it to false |
77 | | QPDF_DLL |
78 | | static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0); |
79 | | QPDF_DLL |
80 | | static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0); |
81 | | // Close methods don't modify first. A true value indicates that we are closing an empty object. |
82 | | QPDF_DLL |
83 | | static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0); |
84 | | QPDF_DLL |
85 | | static void writeArrayClose(Pipeline*, bool first, size_t depth = 0); |
86 | | // The item methods use the value of first to determine if this is the first item and always set |
87 | | // it to false. |
88 | | QPDF_DLL |
89 | | static void writeDictionaryItem( |
90 | | Pipeline*, bool& first, std::string const& key, JSON const& value, size_t depth = 0); |
91 | | // Write just the key of a new dictionary item, useful if writing nested structures. Calls |
92 | | // writeNext. |
93 | | QPDF_DLL |
94 | | static void |
95 | | writeDictionaryKey(Pipeline* p, bool& first, std::string const& key, size_t depth = 0); |
96 | | QPDF_DLL |
97 | | static void writeArrayItem(Pipeline*, bool& first, JSON const& element, size_t depth = 0); |
98 | | // If writing nested structures incrementally, call writeNext before opening a new array or |
99 | | // container in the midst of an existing one. The `first` you pass to writeNext should be the |
100 | | // one for the parent object. The depth should be the one for the child object. Then start a new |
101 | | // `first` for the nested item. Note that writeDictionaryKey and writeArrayItem call writeNext |
102 | | // for you, so this is most important when writing subsequent items or container openers to an |
103 | | // array. |
104 | | QPDF_DLL |
105 | | static void writeNext(Pipeline* p, bool& first, size_t depth = 0); |
106 | | |
107 | | // The JSON spec calls dictionaries "objects", but that creates too much confusion when |
108 | | // referring to instances of the JSON class. |
109 | | QPDF_DLL |
110 | | static JSON makeDictionary(); |
111 | | // addDictionaryMember returns the newly added item. |
112 | | QPDF_DLL |
113 | | JSON addDictionaryMember(std::string const& key, JSON const&); |
114 | | QPDF_DLL |
115 | | static JSON makeArray(); |
116 | | // addArrayElement returns the newly added item. |
117 | | QPDF_DLL |
118 | | JSON addArrayElement(JSON const&); |
119 | | QPDF_DLL |
120 | | static JSON makeString(std::string const& utf8); |
121 | | QPDF_DLL |
122 | | static JSON makeInt(long long int value); |
123 | | QPDF_DLL |
124 | | static JSON makeReal(double value); |
125 | | QPDF_DLL |
126 | | static JSON makeNumber(std::string const& encoded); |
127 | | QPDF_DLL |
128 | | static JSON makeBool(bool value); |
129 | | QPDF_DLL |
130 | | static JSON makeNull(); |
131 | | |
132 | | // A blob serializes as a string. The function will be called by JSON with a pipeline and should |
133 | | // write binary data to the pipeline but not call finish(). JSON will call finish() at the right |
134 | | // time. |
135 | | QPDF_DLL |
136 | | static JSON makeBlob(std::function<void(Pipeline*)>); |
137 | | |
138 | | QPDF_DLL |
139 | | bool isArray() const; |
140 | | |
141 | | QPDF_DLL |
142 | | bool isDictionary() const; |
143 | | |
144 | | // Accessors. Accessor behavior: |
145 | | // |
146 | | // - If argument is wrong type, including null, return false |
147 | | // - If argument is right type, return true and initialize the value |
148 | | |
149 | | QPDF_DLL |
150 | | bool getString(std::string& utf8) const; |
151 | | QPDF_DLL |
152 | | bool getNumber(std::string& value) const; |
153 | | QPDF_DLL |
154 | | bool getBool(bool& value) const; |
155 | | QPDF_DLL |
156 | | bool isNull() const; |
157 | | QPDF_DLL |
158 | | JSON getDictItem(std::string const& key) const; |
159 | | QPDF_DLL |
160 | | bool forEachDictItem(std::function<void(std::string const& key, JSON value)> fn) const; |
161 | | QPDF_DLL |
162 | | bool forEachArrayItem(std::function<void(JSON value)> fn) const; |
163 | | |
164 | | // Check this JSON object against a "schema". This is not a schema according to any standard. |
165 | | // It's just a template of what the JSON is supposed to contain. The checking does the |
166 | | // following: |
167 | | // |
168 | | // * The schema is a nested structure containing dictionaries, single-element arrays, and |
169 | | // strings only. |
170 | | // * Recursively walk the schema. In the items below, "schema object" refers to an object in |
171 | | // the schema, and "checked object" refers to the corresponding part of the object being |
172 | | // checked. |
173 | | // * If the schema object is a dictionary, the checked object must have a dictionary in the |
174 | | // same place with the same keys. If flags contains f_optional, a key in the schema does not |
175 | | // have to be present in the object. Otherwise, all keys have to be present. Any key in the |
176 | | // object must be present in the schema. |
177 | | // * If the schema object is an array of length 1, the checked object may either be a single |
178 | | // item or an array of items. The single item or each element of the checked object's |
179 | | // array is validated against the single element of the schema's array. The rationale behind |
180 | | // this logic is that a single element may appear wherever the schema allows a |
181 | | // variable-length array. This makes it possible to start allowing an array in the future |
182 | | // where a single element was previously required without breaking backward compatibility. |
183 | | // * If the schema object is an array of length > 1, the checked object must be an array of |
184 | | // the same length. In this case, each element of the checked object array is validated |
185 | | // against the corresponding element of the schema array. |
186 | | // * Otherwise, the value must be a string whose value is a description of the object's |
187 | | // corresponding value, which may have any type. |
188 | | // |
189 | | // QPDF's JSON output conforms to certain strict compatibility rules as discussed in the manual. |
190 | | // The idea is that a JSON structure created manually in qpdf.cc doubles as both JSON help |
191 | | // information and a schema for validating the JSON that qpdf generates. Any discrepancies are a |
192 | | // bug in qpdf. |
193 | | // |
194 | | // Flags is a bitwise or of values from check_flags_e. |
195 | | enum check_flags_e { |
196 | | f_none = 0, |
197 | | f_optional = 1 << 0, |
198 | | }; |
199 | | QPDF_DLL |
200 | | bool checkSchema(JSON schema, unsigned long flags, std::list<std::string>& errors); |
201 | | |
202 | | // Same as passing 0 for flags |
203 | | QPDF_DLL |
204 | | bool checkSchema(JSON schema, std::list<std::string>& errors); |
205 | | |
206 | | // A pointer to a Reactor class can be passed to parse, which will enable the caller to react |
207 | | // to incremental events in the construction of the JSON object. This makes it possible to |
208 | | // implement SAX-like handling of very large JSON objects. |
209 | | class QPDF_DLL_CLASS Reactor |
210 | | { |
211 | | public: |
212 | | virtual ~Reactor() = default; |
213 | | |
214 | | // The start/end methods are called when parsing of a dictionary or array is started or |
215 | | // ended. The item methods are called when an item is added to a dictionary or array. When |
216 | | // adding a container to another container, the item method is called with an empty |
217 | | // container before the lower container's start method is called. See important notes in |
218 | | // "Item methods" below. |
219 | | |
220 | | // During parsing of a JSON string, the parser is operating on a single object at a time. |
221 | | // When a dictionary or array is started, a new context begins, and when that dictionary or |
222 | | // array is ended, the previous context is resumed. So, for |
223 | | // example, if you have `{"a": [1]}`, you will receive the |
224 | | // following method calls |
225 | | // |
226 | | // dictionaryStart -- current object is the top-level dictionary |
227 | | // dictionaryItem -- called with "a" and an empty array |
228 | | // arrayStart -- current object is the array |
229 | | // arrayItem -- called with the "1" object |
230 | | // containerEnd -- now current object is the dictionary again |
231 | | // containerEnd -- current object is undefined |
232 | | // |
233 | | // If the top-level item in a JSON string is a scalar, the topLevelScalar() method will be |
234 | | // called. No argument is passed since the object is the same as what is returned by |
235 | | // parse(). |
236 | | |
237 | | QPDF_DLL |
238 | | virtual void dictionaryStart() = 0; |
239 | | QPDF_DLL |
240 | | virtual void arrayStart() = 0; |
241 | | QPDF_DLL |
242 | | virtual void containerEnd(JSON const& value) = 0; |
243 | | QPDF_DLL |
244 | | virtual void topLevelScalar() = 0; |
245 | | |
246 | | // Item methods: |
247 | | // |
248 | | // The return value of the item methods indicate whether the item has been "consumed". If |
249 | | // the item method returns true, then the item will not be added to the containing JSON |
250 | | // object. This is what allows arbitrarily large JSON objects |
251 | | // to be parsed and not have to be kept in memory. |
252 | | // |
253 | | // NOTE: When a dictionary or an array is added to a container, the dictionaryItem or |
254 | | // arrayItem method is called when the child item's start delimiter is encountered, so the |
255 | | // JSON object passed in at that time will always be in its initial, empty state. |
256 | | // Additionally, the child item's start method is not called until after the parent item's |
257 | | // item method is called. This makes it possible to keep track of the current depth level by |
258 | | // incrementing level on start methods and decrementing on end methods. |
259 | | |
260 | | QPDF_DLL |
261 | | virtual bool dictionaryItem(std::string const& key, JSON const& value) = 0; |
262 | | QPDF_DLL |
263 | | virtual bool arrayItem(JSON const& value) = 0; |
264 | | }; |
265 | | |
266 | | // Create a JSON object from a string. |
267 | | QPDF_DLL |
268 | | static JSON parse(std::string const&); |
269 | | // Create a JSON object from an input source. See above for information about how to use the |
270 | | // Reactor. |
271 | | QPDF_DLL |
272 | | static JSON parse(InputSource&, Reactor* reactor = nullptr); |
273 | | |
274 | | // parse calls setOffsets to set the inclusive start and non-inclusive end offsets of an object |
275 | | // relative to its input string. Otherwise, both values are 0. |
276 | | QPDF_DLL |
277 | | void setStart(qpdf_offset_t); |
278 | | QPDF_DLL |
279 | | void setEnd(qpdf_offset_t); |
280 | | QPDF_DLL |
281 | | qpdf_offset_t getStart() const; |
282 | | QPDF_DLL |
283 | | qpdf_offset_t getEnd() const; |
284 | | |
285 | | // The following class does not form part of the public API and is for internal use only. |
286 | | |
287 | | class Writer; |
288 | | |
289 | | private: |
290 | | static void writeClose(Pipeline* p, bool first, size_t depth, char const* delimeter); |
291 | | |
292 | | enum value_type_e { |
293 | | vt_none, |
294 | | vt_dictionary, |
295 | | vt_array, |
296 | | vt_string, |
297 | | vt_number, |
298 | | vt_bool, |
299 | | vt_null, |
300 | | vt_blob, |
301 | | }; |
302 | | |
303 | | struct JSON_value |
304 | | { |
305 | | JSON_value(value_type_e type_code) : |
306 | 448k | type_code(type_code) |
307 | 448k | { |
308 | 448k | } |
309 | 448k | virtual ~JSON_value() = default; |
310 | | virtual void write(Pipeline*, size_t depth) const = 0; |
311 | | const value_type_e type_code{vt_none}; |
312 | | }; |
313 | | struct JSON_dictionary: public JSON_value |
314 | | { |
315 | | JSON_dictionary() : |
316 | 55.9k | JSON_value(vt_dictionary) |
317 | 55.9k | { |
318 | 55.9k | } |
319 | 55.9k | ~JSON_dictionary() override = default; |
320 | | void write(Pipeline*, size_t depth) const override; |
321 | | std::map<std::string, JSON> members; |
322 | | }; |
323 | | struct JSON_array; |
324 | | struct JSON_string: public JSON_value |
325 | | { |
326 | | JSON_string(std::string const& utf8); |
327 | 218k | ~JSON_string() override = default; |
328 | | void write(Pipeline*, size_t depth) const override; |
329 | | std::string utf8; |
330 | | }; |
331 | | struct JSON_number: public JSON_value |
332 | | { |
333 | | JSON_number(long long val); |
334 | | JSON_number(double val); |
335 | | JSON_number(std::string const& val); |
336 | 104k | ~JSON_number() override = default; |
337 | | void write(Pipeline*, size_t depth) const override; |
338 | | std::string encoded; |
339 | | }; |
340 | | struct JSON_bool: public JSON_value |
341 | | { |
342 | | JSON_bool(bool val); |
343 | | ~JSON_bool() override = default; |
344 | | void write(Pipeline*, size_t depth) const override; |
345 | | bool value; |
346 | | }; |
347 | | struct JSON_null: public JSON_value |
348 | | { |
349 | | JSON_null() : |
350 | 29.7k | JSON_value(vt_null) |
351 | 29.7k | { |
352 | 29.7k | } |
353 | | ~JSON_null() override = default; |
354 | | void write(Pipeline*, size_t depth) const override; |
355 | | }; |
356 | | struct JSON_blob: public JSON_value |
357 | | { |
358 | | JSON_blob(std::function<void(Pipeline*)> fn); |
359 | 0 | ~JSON_blob() override = default; |
360 | | void write(Pipeline*, size_t depth) const override; |
361 | | std::function<void(Pipeline*)> fn; |
362 | | }; |
363 | | |
364 | | JSON(std::unique_ptr<JSON_value>); |
365 | | |
366 | | static bool checkSchemaInternal( |
367 | | JSON_value* this_v, |
368 | | JSON_value* sch_v, |
369 | | unsigned long flags, |
370 | | std::list<std::string>& errors, |
371 | | std::string prefix); |
372 | | |
373 | | class Members |
374 | | { |
375 | | friend class JSON; |
376 | | |
377 | | public: |
378 | 448k | ~Members() = default; |
379 | | |
380 | | private: |
381 | | Members(std::unique_ptr<JSON_value>); |
382 | | Members(Members const&) = delete; |
383 | | |
384 | | std::unique_ptr<JSON_value> value; |
385 | | // start and end are only populated for objects created by parse |
386 | | qpdf_offset_t start{0}; |
387 | | qpdf_offset_t end{0}; |
388 | | }; |
389 | | |
390 | | std::shared_ptr<Members> m; |
391 | | }; |
392 | | |
393 | | struct JSON::JSON_array: public JSON_value |
394 | | { |
395 | | JSON_array() : |
396 | 31.8k | JSON_value(vt_array) |
397 | 31.8k | { |
398 | 31.8k | } |
399 | 31.8k | ~JSON_array() override = default; |
400 | | void write(Pipeline*, size_t depth) const override; |
401 | | std::vector<JSON> elements; |
402 | | }; |
403 | | |
404 | | #endif // JSON_HH |