Line | Count | Source |
1 | | /* |
2 | | * This file is part of mpv. |
3 | | * |
4 | | * mpv is free software; you can redistribute it and/or |
5 | | * modify it under the terms of the GNU Lesser General Public |
6 | | * License as published by the Free Software Foundation; either |
7 | | * version 2.1 of the License, or (at your option) any later version. |
8 | | * |
9 | | * mpv is distributed in the hope that it will be useful, |
10 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | | * GNU Lesser General Public License for more details. |
13 | | * |
14 | | * You should have received a copy of the GNU Lesser General Public |
15 | | * License along with mpv. If not, see <http://www.gnu.org/licenses/>. |
16 | | */ |
17 | | |
18 | | /* JSON parser: |
19 | | * |
20 | | * Unlike standard JSON, \u escapes don't allow you to specify UTF-16 surrogate |
21 | | * pairs. There may be some differences how numbers are parsed (this parser |
22 | | * doesn't verify what's passed to strtod(), and also prefers parsing numbers |
23 | | * as integers with stroll() if possible). |
24 | | * |
25 | | * It has some non-standard extensions which shouldn't conflict with JSON: |
26 | | * - a list or object item can have a trailing "," |
27 | | * - object syntax accepts "=" in addition of ":" |
28 | | * - object keys can be unquoted, if they start with a character in [A-Za-z_] |
29 | | * and contain only characters in [A-Za-z0-9_] |
30 | | * - byte escapes with "\xAB" are allowed (with AB being a 2 digit hex number) |
31 | | * |
32 | | * Also see: http://tools.ietf.org/html/rfc8259 |
33 | | * |
34 | | * JSON writer: |
35 | | * |
36 | | * Doesn't insert whitespace. It's literally a waste of space. |
37 | | * |
38 | | * Can output invalid UTF-8, if input is invalid UTF-8. Consumers are supposed |
39 | | * to deal with somehow: either by using byte-strings for JSON, or by running |
40 | | * a "fixup" pass on the input data. The latter could for example change |
41 | | * invalid UTF-8 sequences to replacement characters. |
42 | | */ |
43 | | |
44 | | #include <stdlib.h> |
45 | | #include <string.h> |
46 | | #include <math.h> |
47 | | #include <errno.h> |
48 | | #include <inttypes.h> |
49 | | #include <assert.h> |
50 | | |
51 | | #include <mpv/client.h> |
52 | | |
53 | | #include "common/common.h" |
54 | | #include "misc/bstr.h" |
55 | | #include "misc/ctype.h" |
56 | | |
57 | | #include "json.h" |
58 | | |
59 | | static bool eat_c(char **s, char c) |
60 | 1.61M | { |
61 | 1.61M | if (**s == c) { |
62 | 557k | *s += 1; |
63 | 557k | return true; |
64 | 557k | } |
65 | 1.05M | return false; |
66 | 1.61M | } |
67 | | |
68 | | static void eat_ws(char **src) |
69 | 1.61M | { |
70 | 1.61M | while (1) { |
71 | 1.61M | char c = **src; |
72 | 1.61M | if (c != ' ' && c != '\t' && c != '\n' && c != '\r') |
73 | 1.61M | return; |
74 | 4.39k | *src += 1; |
75 | 4.39k | } |
76 | 1.61M | } |
77 | | |
78 | | void json_skip_whitespace(char **src) |
79 | 2.17k | { |
80 | 2.17k | eat_ws(src); |
81 | 2.17k | } |
82 | | |
83 | | static int read_id(void *ta_parent, struct mpv_node *dst, char **src) |
84 | 49.6k | { |
85 | 49.6k | char *start = *src; |
86 | 49.6k | if (!mp_isalpha(**src) && **src != '_') |
87 | 747 | return -1; |
88 | 361k | while (mp_isalnum(**src) || **src == '_') |
89 | 312k | *src += 1; |
90 | 48.9k | if (**src == ' ') { |
91 | 380 | **src = '\0'; // we're allowed to mutate it => can avoid the strndup |
92 | 380 | *src += 1; |
93 | 48.5k | } else { |
94 | 48.5k | start = talloc_strndup(ta_parent, start, *src - start); |
95 | 48.5k | } |
96 | 48.9k | dst->format = MPV_FORMAT_STRING; |
97 | 48.9k | dst->u.string = start; |
98 | 48.9k | return 0; |
99 | 49.6k | } |
100 | | |
101 | | static int read_str(void *ta_parent, struct mpv_node *dst, char **src) |
102 | 2.44k | { |
103 | 2.44k | if (!eat_c(src, '"')) |
104 | 78 | return -1; // not a string |
105 | 2.37k | char *str = *src; |
106 | 2.37k | char *cur = str; |
107 | 2.37k | bool has_escapes = false; |
108 | 2.64M | while (cur[0] && cur[0] != '"') { |
109 | 2.64M | if (cur[0] == '\\') { |
110 | 23.1k | has_escapes = true; |
111 | | // skip >\"< and >\\< (latter to handle >\\"< correctly) |
112 | 23.1k | if (cur[1] == '"' || cur[1] == '\\') |
113 | 2.48k | cur++; |
114 | 23.1k | } |
115 | 2.64M | cur++; |
116 | 2.64M | } |
117 | 2.37k | if (cur[0] != '"') |
118 | 143 | return -1; // invalid termination |
119 | | // Mutate input string so we have a null-terminated string to the literal. |
120 | | // This is a stupid micro-optimization, so we can avoid allocation. |
121 | 2.22k | cur[0] = '\0'; |
122 | 2.22k | *src = cur + 1; |
123 | 2.22k | if (has_escapes) { |
124 | 1.43k | bstr unescaped = {0}; |
125 | 1.43k | bstr r = bstr0(str); |
126 | 1.43k | if (!mp_append_escaped_string(ta_parent, &unescaped, &r)) |
127 | 236 | return -1; // broken escapes |
128 | 1.19k | str = unescaped.start; // the function guarantees null-termination |
129 | 1.19k | } |
130 | 1.99k | dst->format = MPV_FORMAT_STRING; |
131 | 1.99k | dst->u.string = str; |
132 | 1.99k | return 0; |
133 | 2.22k | } |
134 | | |
135 | | static int read_sub(void *ta_parent, struct mpv_node *dst, char **src, |
136 | | int max_depth) |
137 | 6.66k | { |
138 | 6.66k | bool is_arr = eat_c(src, '['); |
139 | 6.66k | bool is_obj = !is_arr && eat_c(src, '{'); |
140 | 6.66k | if (!is_arr && !is_obj) |
141 | 0 | return -1; // not an array or object |
142 | 6.66k | char term = is_obj ? '}' : ']'; |
143 | 6.66k | struct mpv_node_list *list = talloc_zero(ta_parent, struct mpv_node_list); |
144 | 504k | while (1) { |
145 | 504k | eat_ws(src); |
146 | 504k | if (eat_c(src, term)) |
147 | 2.43k | break; |
148 | 502k | if (list->num > 0 && !eat_c(src, ',')) |
149 | 103 | return -1; // missing ',' |
150 | 502k | eat_ws(src); |
151 | | // non-standard extension: allow a trailing "," |
152 | 502k | if (eat_c(src, term)) |
153 | 288 | break; |
154 | 501k | if (is_obj) { |
155 | 49.6k | struct mpv_node keynode; |
156 | | // non-standard extension: allow unquoted strings as keys |
157 | 49.6k | if (read_id(list, &keynode, src) < 0 && |
158 | 747 | read_str(list, &keynode, src) < 0) |
159 | 156 | return -1; // key is not a string |
160 | 49.5k | eat_ws(src); |
161 | | // non-standard extension: allow "=" instead of ":" |
162 | 49.5k | if (!eat_c(src, ':') && !eat_c(src, '=')) |
163 | 206 | return -1; // ':' missing |
164 | 49.3k | eat_ws(src); |
165 | 49.3k | MP_TARRAY_GROW(list, list->keys, list->num); |
166 | 49.3k | list->keys[list->num] = keynode.u.string; |
167 | 49.3k | } |
168 | 501k | MP_TARRAY_GROW(list, list->values, list->num); |
169 | 501k | if (json_parse(ta_parent, &list->values[list->num], src, max_depth) < 0) |
170 | 3.48k | return -1; |
171 | 498k | list->num++; |
172 | 498k | } |
173 | 2.72k | dst->format = is_obj ? MPV_FORMAT_NODE_MAP : MPV_FORMAT_NODE_ARRAY; |
174 | 2.72k | dst->u.list = list; |
175 | 2.72k | return 0; |
176 | 6.66k | } |
177 | | |
178 | | /* Parse the string in *src as JSON, and write the result into *dst. |
179 | | * max_depth limits the recursion and JSON tree depth. |
180 | | * Warning: this overwrites the input string (what *src points to)! |
181 | | * Returns: |
182 | | * 0: success, *dst is valid, *src points to the end (the caller must check |
183 | | * whether *src really terminates) |
184 | | * -1: failure, *dst is invalid, there may be dead allocs under ta_parent |
185 | | * (ta_free_children(ta_parent) is the only way to free them) |
186 | | * The input string can be mutated in both cases. *dst might contain string |
187 | | * elements, which point into the (mutated) input string. |
188 | | */ |
189 | | int json_parse(void *ta_parent, struct mpv_node *dst, char **src, int max_depth) |
190 | 503k | { |
191 | 503k | max_depth -= 1; |
192 | 503k | if (max_depth < 0) |
193 | 18 | return -1; |
194 | | |
195 | 503k | eat_ws(src); |
196 | | |
197 | 503k | char c = **src; |
198 | 503k | if (!c) |
199 | 229 | return -1; // early EOF |
200 | 503k | if (c == 'n' && strncmp(*src, "null", 4) == 0) { |
201 | 288 | *src += 4; |
202 | 288 | dst->format = MPV_FORMAT_NONE; |
203 | 288 | return 0; |
204 | 502k | } else if (c == 't' && strncmp(*src, "true", 4) == 0) { |
205 | 289 | *src += 4; |
206 | 289 | dst->format = MPV_FORMAT_FLAG; |
207 | 289 | dst->u.flag = 1; |
208 | 289 | return 0; |
209 | 502k | } else if (c == 'f' && strncmp(*src, "false", 5) == 0) { |
210 | 194 | *src += 5; |
211 | 194 | dst->format = MPV_FORMAT_FLAG; |
212 | 194 | dst->u.flag = 0; |
213 | 194 | return 0; |
214 | 502k | } else if (c == '"') { |
215 | 1.70k | return read_str(ta_parent, dst, src); |
216 | 500k | } else if (c == '[' || c == '{') { |
217 | 6.66k | return read_sub(ta_parent, dst, src, max_depth); |
218 | 494k | } else if (c == '-' || (c >= '0' && c <= '9')) { |
219 | | // The number could be either a float or an int. JSON doesn't make a |
220 | | // difference, but the client API does. |
221 | 493k | char *nsrci = *src, *nsrcf = *src; |
222 | 493k | errno = 0; |
223 | 493k | long long int numi = strtoll(*src, &nsrci, 0); |
224 | 493k | if (errno) |
225 | 194 | nsrci = *src; |
226 | 493k | errno = 0; |
227 | 493k | double numf = strtod(*src, &nsrcf); |
228 | 493k | if (errno) |
229 | 1 | nsrcf = *src; |
230 | 493k | if (nsrci >= nsrcf) { |
231 | 347k | *src = nsrci; |
232 | 347k | dst->format = MPV_FORMAT_INT64; // long long is usually 64 bits |
233 | 347k | dst->u.int64 = numi; |
234 | 347k | return 0; |
235 | 347k | } |
236 | 146k | if (nsrcf > *src && isfinite(numf)) { |
237 | 146k | *src = nsrcf; |
238 | 146k | dst->format = MPV_FORMAT_DOUBLE; |
239 | 146k | dst->u.double_ = numf; |
240 | 146k | return 0; |
241 | 146k | } |
242 | 1 | return -1; |
243 | 146k | } |
244 | 188 | return -1; // character doesn't start a valid token |
245 | 503k | } |
246 | | |
247 | | |
248 | 38.7M | #define APPEND(b, s) bstr_xappend(NULL, (b), bstr0(s)) |
249 | | |
250 | | static const char special_escape[] = { |
251 | | ['\b'] = 'b', |
252 | | ['\f'] = 'f', |
253 | | ['\n'] = 'n', |
254 | | ['\r'] = 'r', |
255 | | ['\t'] = 't', |
256 | | }; |
257 | | |
258 | | static void write_json_str(bstr *b, unsigned char *str) |
259 | 8.92M | { |
260 | 8.92M | mp_assert(str); |
261 | | |
262 | 8.92M | APPEND(b, "\""); |
263 | 112M | while (1) { |
264 | 112M | unsigned char *cur = str; |
265 | 503M | while (cur[0] >= 32 && cur[0] != '"' && cur[0] != '\\') |
266 | 390M | cur++; |
267 | 112M | if (!cur[0]) |
268 | 8.92M | break; |
269 | 103M | bstr_xappend(NULL, b, (bstr){str, cur - str}); |
270 | 103M | if (cur[0] == '\"') { |
271 | 1.45M | bstr_xappend(NULL, b, (bstr){"\\\"", 2}); |
272 | 102M | } else if (cur[0] == '\\') { |
273 | 717k | bstr_xappend(NULL, b, (bstr){"\\\\", 2}); |
274 | 101M | } else if (cur[0] < sizeof(special_escape) && special_escape[cur[0]]) { |
275 | 39.5M | bstr_xappend_asprintf(NULL, b, "\\%c", special_escape[cur[0]]); |
276 | 62.0M | } else { |
277 | 62.0M | bstr_xappend_asprintf(NULL, b, "\\u%04x", (unsigned char)cur[0]); |
278 | 62.0M | } |
279 | 103M | str = cur + 1; |
280 | 103M | } |
281 | 8.92M | APPEND(b, str); |
282 | 8.92M | APPEND(b, "\""); |
283 | 8.92M | } |
284 | | |
285 | | static void add_indent(bstr *b, int indent) |
286 | 7.61M | { |
287 | 7.61M | if (indent < 0) |
288 | 5.06M | return; |
289 | 2.55M | bstr_xappend(NULL, b, bstr0("\n")); |
290 | 6.93M | for (int n = 0; n < indent; n++) |
291 | 4.38M | bstr_xappend(NULL, b, bstr0(" ")); |
292 | 2.55M | } |
293 | | |
294 | | int json_append(bstr *b, const struct mpv_node *src, int indent) |
295 | 6.88M | { |
296 | 6.88M | switch (src->format) { |
297 | 190 | case MPV_FORMAT_NONE: |
298 | 190 | APPEND(b, "null"); |
299 | 190 | return 0; |
300 | 96.5k | case MPV_FORMAT_FLAG: |
301 | 96.5k | APPEND(b, src->u.flag ? "true" : "false"); |
302 | 96.5k | return 0; |
303 | 319k | case MPV_FORMAT_INT64: |
304 | 319k | bstr_xappend_asprintf(NULL, b, "%"PRId64, src->u.int64); |
305 | 319k | return 0; |
306 | 298k | case MPV_FORMAT_DOUBLE: { |
307 | 298k | const char *px = (isfinite(src->u.double_) || indent == 0) ? "" : "\""; |
308 | 298k | bstr_xappend_asprintf(NULL, b, "%s%f%s", px, src->u.double_, px); |
309 | 298k | return 0; |
310 | 0 | } |
311 | 4.71M | case MPV_FORMAT_STRING: |
312 | 4.71M | if (indent == 0) |
313 | 400 | APPEND(b, src->u.string); |
314 | 4.71M | else |
315 | 4.71M | write_json_str(b, src->u.string); |
316 | 4.71M | return 0; |
317 | 71.3k | case MPV_FORMAT_NODE_ARRAY: |
318 | 1.44M | case MPV_FORMAT_NODE_MAP: { |
319 | 1.44M | struct mpv_node_list *list = src->u.list; |
320 | 1.44M | bool is_obj = src->format == MPV_FORMAT_NODE_MAP; |
321 | 1.44M | APPEND(b, is_obj ? "{" : "["); |
322 | 1.44M | int next_indent = indent >= 0 ? indent + 1 : -1; |
323 | 7.61M | for (int n = 0; n < list->num; n++) { |
324 | 6.16M | if (n) |
325 | 4.75M | APPEND(b, ","); |
326 | 6.16M | add_indent(b, next_indent); |
327 | 6.16M | if (is_obj) { |
328 | 4.20M | write_json_str(b, list->keys[n]); |
329 | 4.20M | APPEND(b, ":"); |
330 | 4.20M | } |
331 | 6.16M | json_append(b, &list->values[n], next_indent); |
332 | 6.16M | } |
333 | 1.44M | add_indent(b, indent); |
334 | 1.44M | APPEND(b, is_obj ? "}" : "]"); |
335 | 1.44M | return 0; |
336 | 71.3k | } |
337 | 6.88M | } |
338 | 0 | return -1; // unknown format |
339 | 6.88M | } |
340 | | |
341 | | static int json_append_str(char **dst, struct mpv_node *src, int indent) |
342 | 715k | { |
343 | 715k | bstr buffer = bstr0(*dst); |
344 | 715k | int r = json_append(&buffer, src, indent); |
345 | 715k | *dst = buffer.start; |
346 | 715k | return r; |
347 | 715k | } |
348 | | |
349 | | /* Write the contents of *src as JSON, and append the JSON string to *dst. |
350 | | * This will use strlen() to determine the start offset, and ta_get_size() |
351 | | * and ta_realloc() to extend the memory allocation of *dst. |
352 | | * Returns: 0 on success, <0 on failure. |
353 | | */ |
354 | | int json_write(char **dst, struct mpv_node *src) |
355 | 711k | { |
356 | 711k | return json_append_str(dst, src, -1); |
357 | 711k | } |
358 | | |
359 | | // Same as json_write(), but add whitespace to make it readable. |
360 | | int json_write_pretty(char **dst, struct mpv_node *src) |
361 | 4.01k | { |
362 | 4.01k | return json_append_str(dst, src, 0); |
363 | 4.01k | } |