/src/samba/third_party/heimdal/lib/base/json.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2010 Kungliga Tekniska Högskolan |
3 | | * (Royal Institute of Technology, Stockholm, Sweden). |
4 | | * All rights reserved. |
5 | | * |
6 | | * Portions Copyright (c) 2010 Apple Inc. All rights reserved. |
7 | | * |
8 | | * Redistribution and use in source and binary forms, with or without |
9 | | * modification, are permitted provided that the following conditions |
10 | | * are met: |
11 | | * |
12 | | * 1. Redistributions of source code must retain the above copyright |
13 | | * notice, this list of conditions and the following disclaimer. |
14 | | * |
15 | | * 2. Redistributions in binary form must reproduce the above copyright |
16 | | * notice, this list of conditions and the following disclaimer in the |
17 | | * documentation and/or other materials provided with the distribution. |
18 | | * |
19 | | * 3. Neither the name of the Institute nor the names of its contributors |
20 | | * may be used to endorse or promote products derived from this software |
21 | | * without specific prior written permission. |
22 | | * |
23 | | * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND |
24 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
25 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
26 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE |
27 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
28 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
29 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
30 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
31 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
32 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
33 | | * SUCH DAMAGE. |
34 | | */ |
35 | | |
36 | | #include "baselocl.h" |
37 | | #include <ctype.h> |
38 | | #include <base64.h> |
39 | | |
40 | | #ifndef WIN32 |
41 | | #include <langinfo.h> |
42 | | #endif |
43 | | |
44 | | static heim_base_once_t heim_json_once = HEIM_BASE_ONCE_INIT; |
45 | | static heim_string_t heim_tid_data_uuid_key = NULL; |
46 | | |
47 | | static void |
48 | | json_init_once(void *arg) |
49 | 0 | { |
50 | 0 | heim_tid_data_uuid_key = __heim_string_constant("heimdal-type-data-76d7fca2-d0da-4b20-a126-1a10f8a0eae6"); |
51 | 0 | } |
52 | | |
53 | | struct twojson { |
54 | | void *ctx; |
55 | | void (*out)(void *, const char *); |
56 | | size_t indent; |
57 | | heim_json_flags_t flags; |
58 | | int ret; |
59 | | int first; |
60 | | }; |
61 | | |
62 | | struct heim_strbuf { |
63 | | char *str; |
64 | | size_t len; |
65 | | size_t alloced; |
66 | | int enomem; |
67 | | heim_json_flags_t flags; |
68 | | }; |
69 | | |
70 | | static int |
71 | | base2json(heim_object_t, struct twojson *, int); |
72 | | |
73 | | static void |
74 | | indent(struct twojson *j) |
75 | 0 | { |
76 | 0 | size_t i = j->indent; |
77 | 0 | if (j->flags & HEIM_JSON_F_ONE_LINE) |
78 | 0 | return; |
79 | 0 | if (j->flags & HEIM_JSON_F_INDENT2) |
80 | 0 | while (i--) |
81 | 0 | j->out(j->ctx, " "); |
82 | 0 | else if (j->flags & HEIM_JSON_F_INDENT4) |
83 | 0 | while (i--) |
84 | 0 | j->out(j->ctx, " "); |
85 | 0 | else if (j->flags & HEIM_JSON_F_INDENT8) |
86 | 0 | while (i--) |
87 | 0 | j->out(j->ctx, " "); |
88 | 0 | else |
89 | 0 | while (i--) |
90 | 0 | j->out(j->ctx, "\t"); |
91 | 0 | } |
92 | | |
93 | | static void |
94 | | array2json(heim_object_t value, void *ctx, int *stop) |
95 | 0 | { |
96 | 0 | struct twojson *j = ctx; |
97 | 0 | if (j->ret) |
98 | 0 | return; |
99 | 0 | if (j->first) { |
100 | 0 | j->first = 0; |
101 | 0 | } else { |
102 | 0 | j->out(j->ctx, NULL); /* eat previous '\n' if possible */ |
103 | 0 | j->out(j->ctx, ",\n"); |
104 | 0 | } |
105 | 0 | j->ret = base2json(value, j, 0); |
106 | 0 | } |
107 | | |
108 | | static void |
109 | | dict2json(heim_object_t key, heim_object_t value, void *ctx) |
110 | 0 | { |
111 | 0 | struct twojson *j = ctx; |
112 | 0 | if (j->ret) |
113 | 0 | return; |
114 | 0 | if (j->first) { |
115 | 0 | j->first = 0; |
116 | 0 | } else { |
117 | 0 | j->out(j->ctx, NULL); /* eat previous '\n' if possible */ |
118 | 0 | j->out(j->ctx, ",\n"); |
119 | 0 | } |
120 | 0 | j->ret = base2json(key, j, 0); |
121 | 0 | if (j->ret) |
122 | 0 | return; |
123 | 0 | switch (heim_get_tid(value)) { |
124 | 0 | case HEIM_TID_ARRAY: |
125 | 0 | case HEIM_TID_DICT: |
126 | 0 | case HEIM_TID_DATA: |
127 | 0 | j->out(j->ctx, ":\n"); |
128 | 0 | j->indent++; |
129 | 0 | j->ret = base2json(value, j, 0); |
130 | 0 | if (j->ret) |
131 | 0 | return; |
132 | 0 | j->indent--; |
133 | 0 | break; |
134 | 0 | default: |
135 | 0 | j->out(j->ctx, ": "); |
136 | 0 | j->ret = base2json(value, j, 1); |
137 | 0 | break; |
138 | 0 | } |
139 | 0 | } |
140 | | |
141 | | #ifndef WIN32 |
142 | | static void |
143 | | init_is_utf8(void *ptr) |
144 | 0 | { |
145 | 0 | *(int *)ptr = strcasecmp("utf-8", nl_langinfo(CODESET)) == 0; |
146 | 0 | } |
147 | | #endif |
148 | | |
149 | | int |
150 | | heim_locale_is_utf8(void) |
151 | 0 | { |
152 | | #ifdef WIN32 |
153 | | return 0; /* XXX Implement */ |
154 | | #else |
155 | 0 | static int locale_is_utf8 = -1; |
156 | 0 | static heim_base_once_t once = HEIM_BASE_ONCE_INIT; |
157 | |
|
158 | 0 | heim_base_once_f(&once, &locale_is_utf8, init_is_utf8); |
159 | 0 | return locale_is_utf8; |
160 | 0 | #endif |
161 | 0 | } |
162 | | |
163 | | static void |
164 | | out_escaped_bmp(struct twojson *j, const unsigned char *p, int nbytes) |
165 | 0 | { |
166 | 0 | unsigned char e[sizeof("\\u0000")]; |
167 | 0 | unsigned codepoint; |
168 | |
|
169 | 0 | if (nbytes == 2) |
170 | 0 | codepoint = ((p[0] & 0x1f) << 6) | (p[1] & 0x3f); |
171 | 0 | else if (nbytes == 3) |
172 | 0 | codepoint = ((p[0] & 0x0f) << 12) | ((p[1] & 0x3f) << 6) | (p[2] & 0x3f); |
173 | 0 | else |
174 | 0 | abort(); |
175 | 0 | e[0] = '\\'; |
176 | 0 | e[1] = 'u'; |
177 | 0 | e[2] = codepoint >> 12; |
178 | 0 | e[2] += (e[2] < 10) ? '0' : ('A' - 10); |
179 | 0 | e[3] = (codepoint >> 8) & 0x0f; |
180 | 0 | e[3] += (e[3] < 10) ? '0' : ('A' - 10); |
181 | 0 | e[4] = (codepoint >> 4) & 0x0f; |
182 | 0 | e[4] += (e[4] < 10) ? '0' : ('A' - 10); |
183 | 0 | e[5] = codepoint & 0x0f; |
184 | 0 | e[5] += (e[5] < 10) ? '0' : ('A' - 10); |
185 | 0 | e[6] = '\0'; |
186 | 0 | j->out(j->ctx, (char *)e); |
187 | 0 | } |
188 | | |
189 | | static int |
190 | | base2json(heim_object_t obj, struct twojson *j, int skip_indent) |
191 | 0 | { |
192 | 0 | heim_tid_t type; |
193 | 0 | int first = 0; |
194 | |
|
195 | 0 | if (obj == NULL) { |
196 | 0 | if (j->flags & HEIM_JSON_F_CNULL2JSNULL) { |
197 | 0 | obj = heim_null_create(); |
198 | 0 | } else if (j->flags & HEIM_JSON_F_NO_C_NULL) { |
199 | 0 | return EINVAL; |
200 | 0 | } else { |
201 | 0 | indent(j); |
202 | 0 | j->out(j->ctx, "<NULL>\n"); /* This is NOT valid JSON! */ |
203 | 0 | return 0; |
204 | 0 | } |
205 | 0 | } |
206 | | |
207 | 0 | type = heim_get_tid(obj); |
208 | 0 | switch (type) { |
209 | 0 | case HEIM_TID_ARRAY: |
210 | 0 | indent(j); |
211 | 0 | j->out(j->ctx, "[\n"); |
212 | 0 | j->indent++; |
213 | 0 | first = j->first; |
214 | 0 | j->first = 1; |
215 | 0 | heim_array_iterate_f(obj, j, array2json); |
216 | 0 | j->indent--; |
217 | 0 | if (!j->first) |
218 | 0 | j->out(j->ctx, "\n"); |
219 | 0 | indent(j); |
220 | 0 | j->out(j->ctx, "]\n"); |
221 | 0 | j->first = first; |
222 | 0 | break; |
223 | | |
224 | 0 | case HEIM_TID_DICT: |
225 | 0 | indent(j); |
226 | 0 | j->out(j->ctx, "{\n"); |
227 | 0 | j->indent++; |
228 | 0 | first = j->first; |
229 | 0 | j->first = 1; |
230 | 0 | heim_dict_iterate_f(obj, j, dict2json); |
231 | 0 | j->indent--; |
232 | 0 | if (!j->first) |
233 | 0 | j->out(j->ctx, "\n"); |
234 | 0 | indent(j); |
235 | 0 | j->out(j->ctx, "}\n"); |
236 | 0 | j->first = first; |
237 | 0 | break; |
238 | | |
239 | 0 | case HEIM_TID_STRING: { |
240 | 0 | const unsigned char *s = (const unsigned char *)heim_string_get_utf8(obj); |
241 | 0 | const unsigned char *p; |
242 | 0 | unsigned int c, cp, ctop, cbot; |
243 | 0 | char e[sizeof("\\u0123\\u3210")]; |
244 | 0 | int good; |
245 | 0 | size_t i; |
246 | |
|
247 | 0 | if (!skip_indent) |
248 | 0 | indent(j); |
249 | 0 | j->out(j->ctx, "\""); |
250 | 0 | for (p = s; (c = *p); p++) { |
251 | 0 | switch (c) { |
252 | | /* ASCII control characters w/ C-like escapes */ |
253 | 0 | case '\b': j->out(j->ctx, "\\b"); continue; |
254 | 0 | case '\f': j->out(j->ctx, "\\f"); continue; |
255 | 0 | case '\n': j->out(j->ctx, "\\n"); continue; |
256 | 0 | case '\r': j->out(j->ctx, "\\r"); continue; |
257 | 0 | case '\t': j->out(j->ctx, "\\t"); continue; |
258 | | /* Other must-escape non-control ASCII characters */ |
259 | 0 | case '"': j->out(j->ctx, "\\\""); continue; |
260 | 0 | case '\\': j->out(j->ctx, "\\\\"); continue; |
261 | 0 | default: break; |
262 | 0 | } |
263 | | |
264 | | /* |
265 | | * JSON string encoding is... complex. |
266 | | * |
267 | | * Invalid UTF-8 w/ HEIM_JSON_F_STRICT_STRINGS set -> return 1 |
268 | | * |
269 | | * Invalid UTF-8 w/o HEIM_JSON_F_STRICT_STRINGS set -> pass |
270 | | * through, a sort of Heimdal WTF-8, but not _the_ WTF-8. |
271 | | */ |
272 | 0 | if (c < 0x20) { |
273 | | /* ASCII control character w/o C-like escape */ |
274 | 0 | e[0] = '\\'; |
275 | 0 | e[1] = 'u'; |
276 | 0 | e[2] = '0'; |
277 | 0 | e[3] = '0'; |
278 | 0 | e[4] = "0123456789ABCDEF"[c>>4]; |
279 | 0 | e[5] = "0123456789ABCDEF"[c & 0x0f]; |
280 | 0 | e[6] = '\0'; |
281 | 0 | j->out(j->ctx, e); |
282 | 0 | continue; |
283 | 0 | } |
284 | 0 | if (c < 0x80) { |
285 | | /* ASCII */ |
286 | 0 | e[0] = c; |
287 | 0 | e[1] = '\0'; |
288 | 0 | j->out(j->ctx, e); |
289 | 0 | continue; |
290 | 0 | } |
291 | 0 | if ((c & 0xc0) == 0x80) { |
292 | | /* UTF-8 bare non-leading byte */ |
293 | 0 | if (!(j->flags & HEIM_JSON_F_STRICT_STRINGS)) { |
294 | 0 | e[0] = c; |
295 | 0 | e[1] = '\0'; |
296 | 0 | j->out(j->ctx, e); |
297 | 0 | continue; |
298 | 0 | } |
299 | 0 | return 1; |
300 | 0 | } |
301 | 0 | if ((c & 0xe0) == 0xc0) { |
302 | | /* UTF-8 leading byte of two-byte sequence */ |
303 | 0 | good = 1; |
304 | 0 | for (i = 1; i < 2 && good && p[i]; i++) { |
305 | 0 | if ((p[i] & 0xc0) != 0x80) |
306 | 0 | good = 0; |
307 | 0 | } |
308 | 0 | if (i != 2) |
309 | 0 | good = 0; |
310 | 0 | if (!good && !(j->flags & HEIM_JSON_F_STRICT_STRINGS)) { |
311 | 0 | e[0] = c; |
312 | 0 | e[1] = '\0'; |
313 | 0 | j->out(j->ctx, e); |
314 | 0 | continue; |
315 | 0 | } else if (!good) { |
316 | 0 | return 1; |
317 | 0 | } |
318 | 0 | if (j->flags & HEIM_JSON_F_ESCAPE_NON_ASCII) { |
319 | 0 | out_escaped_bmp(j, p, 2); |
320 | 0 | p += 1; |
321 | 0 | continue; |
322 | 0 | } |
323 | 0 | e[0] = c; |
324 | 0 | e[1] = p[1]; |
325 | 0 | e[2] = '\0'; |
326 | 0 | j->out(j->ctx, e); |
327 | 0 | p += 1; |
328 | 0 | continue; |
329 | 0 | } |
330 | 0 | if ((c & 0xf0) == 0xe0) { |
331 | | /* UTF-8 leading byte of three-byte sequence */ |
332 | 0 | good = 1; |
333 | 0 | for (i = 1; i < 3 && good && p[i]; i++) { |
334 | 0 | if ((p[i] & 0xc0) != 0x80) |
335 | 0 | good = 0; |
336 | 0 | } |
337 | 0 | if (i != 3) |
338 | 0 | good = 0; |
339 | 0 | if (!good && !(j->flags & HEIM_JSON_F_STRICT_STRINGS)) { |
340 | 0 | e[0] = c; |
341 | 0 | e[1] = '\0'; |
342 | 0 | j->out(j->ctx, e); |
343 | 0 | continue; |
344 | 0 | } else if (!good) { |
345 | 0 | return 1; |
346 | 0 | } |
347 | 0 | if (j->flags & HEIM_JSON_F_ESCAPE_NON_ASCII) { |
348 | 0 | out_escaped_bmp(j, p, 3); |
349 | 0 | p += 2; |
350 | 0 | continue; |
351 | 0 | } |
352 | 0 | e[0] = c; |
353 | 0 | e[1] = p[1]; |
354 | 0 | e[2] = p[2]; |
355 | 0 | e[3] = '\0'; |
356 | 0 | j->out(j->ctx, e); |
357 | 0 | p += 2; |
358 | 0 | continue; |
359 | 0 | } |
360 | | |
361 | 0 | if (c > 0xf7) { |
362 | | /* Invalid UTF-8 leading byte */ |
363 | 0 | if (!(j->flags & HEIM_JSON_F_STRICT_STRINGS)) { |
364 | 0 | e[0] = c; |
365 | 0 | e[1] = '\0'; |
366 | 0 | j->out(j->ctx, e); |
367 | 0 | continue; |
368 | 0 | } |
369 | 0 | return 1; |
370 | 0 | } |
371 | | |
372 | | /* |
373 | | * A codepoint > U+FFFF, needs encoding a la UTF-16 surrogate |
374 | | * pair because JSON takes after JS which uses UTF-16. Ugly. |
375 | | */ |
376 | 0 | cp = c & 0x7; |
377 | 0 | good = 1; |
378 | 0 | for (i = 1; i < 4 && good && p[i]; i++) { |
379 | 0 | if ((p[i] & 0xc0) == 0x80) |
380 | 0 | cp = (cp << 6) | (p[i] & 0x3f); |
381 | 0 | else |
382 | 0 | good = 0; |
383 | 0 | } |
384 | 0 | if (i != 4) |
385 | 0 | good = 0; |
386 | 0 | if (!good && !(j->flags & HEIM_JSON_F_STRICT_STRINGS)) { |
387 | 0 | e[0] = c; |
388 | 0 | e[1] = '\0'; |
389 | 0 | j->out(j->ctx, e); |
390 | 0 | continue; |
391 | 0 | } else if (!good) { |
392 | 0 | return 1; |
393 | 0 | } |
394 | 0 | p += 3; |
395 | |
|
396 | 0 | cp -= 0x10000; |
397 | 0 | ctop = 0xD800 + (cp >> 10); |
398 | 0 | cbot = 0xDC00 + (cp & 0x3ff); |
399 | |
|
400 | 0 | e[0 ] = '\\'; |
401 | 0 | e[1 ] = 'u'; |
402 | 0 | e[2 ] = "0123456789ABCDEF"[(ctop ) >> 12]; |
403 | 0 | e[3 ] = "0123456789ABCDEF"[(ctop & 0x0f00) >> 8]; |
404 | 0 | e[4 ] = "0123456789ABCDEF"[(ctop & 0x00f0) >> 4]; |
405 | 0 | e[5 ] = "0123456789ABCDEF"[(ctop & 0x000f) ]; |
406 | 0 | e[6 ] = '\\'; |
407 | 0 | e[7 ] = 'u'; |
408 | 0 | e[8 ] = "0123456789ABCDEF"[(cbot ) >> 12]; |
409 | 0 | e[9 ] = "0123456789ABCDEF"[(cbot & 0x0f00) >> 8]; |
410 | 0 | e[10] = "0123456789ABCDEF"[(cbot & 0x00f0) >> 4]; |
411 | 0 | e[11] = "0123456789ABCDEF"[(cbot & 0x000f) ]; |
412 | 0 | e[12] = '\0'; |
413 | 0 | j->out(j->ctx, e); |
414 | 0 | continue; |
415 | 0 | } |
416 | 0 | j->out(j->ctx, "\""); |
417 | 0 | break; |
418 | 0 | } |
419 | | |
420 | 0 | case HEIM_TID_DATA: { |
421 | 0 | heim_dict_t d; |
422 | 0 | heim_string_t v; |
423 | 0 | const heim_octet_string *data; |
424 | 0 | char *b64 = NULL; |
425 | 0 | int ret; |
426 | |
|
427 | 0 | if (j->flags & HEIM_JSON_F_NO_DATA) |
428 | 0 | return EINVAL; /* JSON doesn't do binary */ |
429 | | |
430 | 0 | data = heim_data_get_data(obj); |
431 | 0 | ret = rk_base64_encode(data->data, data->length, &b64); |
432 | 0 | if (ret < 0 || b64 == NULL) |
433 | 0 | return ENOMEM; |
434 | | |
435 | 0 | if (j->flags & HEIM_JSON_F_NO_DATA_DICT) { |
436 | 0 | indent(j); |
437 | 0 | j->out(j->ctx, "\""); |
438 | 0 | j->out(j->ctx, b64); /* base64-encode; hope there's no aliasing */ |
439 | 0 | j->out(j->ctx, "\""); |
440 | 0 | free(b64); |
441 | 0 | } else { |
442 | | /* |
443 | | * JSON has no way to represent binary data, therefore the |
444 | | * following is a Heimdal-specific convention. |
445 | | * |
446 | | * We encode binary data as a dict with a single very magic |
447 | | * key with a base64-encoded value. The magic key includes |
448 | | * a uuid, so we're not likely to alias accidentally. |
449 | | */ |
450 | 0 | d = heim_dict_create(2); |
451 | 0 | if (d == NULL) { |
452 | 0 | free(b64); |
453 | 0 | return ENOMEM; |
454 | 0 | } |
455 | 0 | v = heim_string_ref_create(b64, free); |
456 | 0 | if (v == NULL) { |
457 | 0 | free(b64); |
458 | 0 | heim_release(d); |
459 | 0 | return ENOMEM; |
460 | 0 | } |
461 | 0 | ret = heim_dict_set_value(d, heim_tid_data_uuid_key, v); |
462 | 0 | heim_release(v); |
463 | 0 | if (ret) { |
464 | 0 | heim_release(d); |
465 | 0 | return ENOMEM; |
466 | 0 | } |
467 | 0 | ret = base2json(d, j, 0); |
468 | 0 | heim_release(d); |
469 | 0 | if (ret) |
470 | 0 | return ret; |
471 | 0 | } |
472 | 0 | break; |
473 | 0 | } |
474 | | |
475 | 0 | case HEIM_TID_NUMBER: { |
476 | 0 | char num[32]; |
477 | 0 | if (!skip_indent) |
478 | 0 | indent(j); |
479 | 0 | snprintf(num, sizeof (num), "%d", heim_number_get_int(obj)); |
480 | 0 | j->out(j->ctx, num); |
481 | 0 | break; |
482 | 0 | } |
483 | 0 | case HEIM_TID_NULL: |
484 | 0 | if (!skip_indent) |
485 | 0 | indent(j); |
486 | 0 | j->out(j->ctx, "null"); |
487 | 0 | break; |
488 | 0 | case HEIM_TID_BOOL: |
489 | 0 | if (!skip_indent) |
490 | 0 | indent(j); |
491 | 0 | j->out(j->ctx, heim_bool_val(obj) ? "true" : "false"); |
492 | 0 | break; |
493 | 0 | default: |
494 | 0 | return 1; |
495 | 0 | } |
496 | 0 | return 0; |
497 | 0 | } |
498 | | |
499 | | static int |
500 | | heim_base2json(heim_object_t obj, void *ctx, heim_json_flags_t flags, |
501 | | void (*out)(void *, const char *)) |
502 | 0 | { |
503 | 0 | struct twojson j; |
504 | |
|
505 | 0 | heim_base_once_f(&heim_json_once, NULL, json_init_once); |
506 | |
|
507 | 0 | j.indent = 0; |
508 | 0 | j.ctx = ctx; |
509 | 0 | j.out = out; |
510 | 0 | j.flags = flags; |
511 | 0 | j.ret = 0; |
512 | 0 | j.first = 1; |
513 | |
|
514 | 0 | if (!(flags & HEIM_JSON_F_NO_ESCAPE_NON_ASCII) && |
515 | 0 | !heim_locale_is_utf8()) |
516 | 0 | j.flags |= HEIM_JSON_F_ESCAPE_NON_ASCII; |
517 | |
|
518 | 0 | return base2json(obj, &j, 0); |
519 | 0 | } |
520 | | |
521 | | |
522 | | /* |
523 | | * |
524 | | */ |
525 | | |
526 | | struct parse_ctx { |
527 | | unsigned long lineno; |
528 | | const uint8_t *p; |
529 | | const uint8_t *pstart; |
530 | | const uint8_t *pend; |
531 | | heim_error_t error; |
532 | | size_t depth; |
533 | | heim_json_flags_t flags; |
534 | | }; |
535 | | |
536 | | |
537 | | static heim_object_t |
538 | | parse_value(struct parse_ctx *ctx); |
539 | | |
540 | | /* |
541 | | * This function eats whitespace, but, critically, it also succeeds |
542 | | * only if there's anything left to parse. |
543 | | */ |
544 | | static int |
545 | | white_spaces(struct parse_ctx *ctx) |
546 | 0 | { |
547 | 0 | while (ctx->p < ctx->pend) { |
548 | 0 | uint8_t c = *ctx->p; |
549 | 0 | if (c == ' ' || c == '\t' || c == '\r') { |
550 | |
|
551 | 0 | } else if (c == '\n') { |
552 | 0 | ctx->lineno++; |
553 | 0 | } else |
554 | 0 | return 0; |
555 | 0 | (ctx->p)++; |
556 | 0 | } |
557 | 0 | return -1; |
558 | 0 | } |
559 | | |
560 | | static int |
561 | | is_number(uint8_t n) |
562 | 0 | { |
563 | 0 | return ('0' <= n && n <= '9'); |
564 | 0 | } |
565 | | |
566 | | static heim_number_t |
567 | | parse_number(struct parse_ctx *ctx) |
568 | 0 | { |
569 | 0 | int number = 0, neg = 1; |
570 | |
|
571 | 0 | if (ctx->p >= ctx->pend) |
572 | 0 | return NULL; |
573 | | |
574 | 0 | if (*ctx->p == '-') { |
575 | 0 | if (ctx->p + 1 >= ctx->pend) |
576 | 0 | return NULL; |
577 | 0 | neg = -1; |
578 | 0 | ctx->p += 1; |
579 | 0 | } |
580 | | |
581 | 0 | while (ctx->p < ctx->pend) { |
582 | 0 | if (is_number(*ctx->p)) { |
583 | 0 | number = (number * 10) + (*ctx->p - '0'); |
584 | 0 | } else { |
585 | 0 | break; |
586 | 0 | } |
587 | 0 | ctx->p += 1; |
588 | 0 | } |
589 | |
|
590 | 0 | return heim_number_create(number * neg); |
591 | 0 | } |
592 | | |
593 | | /* |
594 | | * Read 4 hex digits from ctx->p. |
595 | | * |
596 | | * If we don't have enough, rewind ctx->p and return -1 . |
597 | | */ |
598 | | static int |
599 | | unescape_unicode(struct parse_ctx *ctx) |
600 | 0 | { |
601 | 0 | int c = 0; |
602 | 0 | int i; |
603 | |
|
604 | 0 | for (i = 0; i < 4 && ctx->p < ctx->pend; i++, ctx->p++) { |
605 | 0 | if (*ctx->p >= '0' && *ctx->p <= '9') { |
606 | 0 | c = (c << 4) + (*ctx->p - '0'); |
607 | 0 | } else if (*ctx->p >= 'A' && *ctx->p <= 'F') { |
608 | 0 | c = (c << 4) + (10 + *ctx->p - 'A'); |
609 | 0 | } else if (*ctx->p >= 'a' && *ctx->p <= 'f') { |
610 | 0 | c = (c << 4) + (10 + *ctx->p - 'a'); |
611 | 0 | } else { |
612 | 0 | ctx->p -= i; |
613 | 0 | return -1; |
614 | 0 | } |
615 | 0 | } |
616 | 0 | return c; |
617 | 0 | } |
618 | | |
619 | | static int |
620 | | encode_utf8(struct parse_ctx *ctx, char **pp, char *pend, int c) |
621 | 0 | { |
622 | 0 | char *p = *pp; |
623 | |
|
624 | 0 | if (c < 0x80) { |
625 | | /* ASCII */ |
626 | 0 | if (p >= pend) return 0; |
627 | 0 | *(p++) = c; |
628 | 0 | *pp = p; |
629 | 0 | return 1; |
630 | 0 | } |
631 | 0 | if (c < 0x800) { |
632 | | /* 2 code unit UTF-8 sequence */ |
633 | 0 | if (p >= pend) return 0; |
634 | 0 | *(p++) = 0xc0 | ((c >> 6) ); |
635 | 0 | if (p == pend) return 0; |
636 | 0 | *(p++) = 0x80 | ((c ) & 0x3f); |
637 | 0 | *pp = p; |
638 | 0 | return 1; |
639 | 0 | } |
640 | 0 | if (c < 0x10000) { |
641 | | /* 3 code unit UTF-8 sequence */ |
642 | 0 | if (p >= pend) return 0; |
643 | 0 | *(p++) = 0xe0 | ((c >> 12) ); |
644 | 0 | if (p == pend) return 0; |
645 | 0 | *(p++) = 0x80 | ((c >> 6) & 0x3f); |
646 | 0 | if (p == pend) return 0; |
647 | 0 | *(p++) = 0x80 | ((c) & 0x3f); |
648 | 0 | *pp = p; |
649 | 0 | return 1; |
650 | 0 | } |
651 | 0 | if (c < 0x110000) { |
652 | | /* 4 code unit UTF-8 sequence */ |
653 | 0 | if (p >= pend) return 0; |
654 | 0 | *(p++) = 0xf0 | ((c >> 18) ); |
655 | 0 | if (p == pend) return 0; |
656 | 0 | *(p++) = 0x80 | ((c >> 12) & 0x3f); |
657 | 0 | if (p == pend) return 0; |
658 | 0 | *(p++) = 0x80 | ((c >> 6) & 0x3f); |
659 | 0 | if (p == pend) return 0; |
660 | 0 | *(p++) = 0x80 | ((c) & 0x3f); |
661 | 0 | *pp = p; |
662 | 0 | return 1; |
663 | 0 | } |
664 | 0 | return 0; |
665 | 0 | } |
666 | | |
667 | | static heim_string_t |
668 | | parse_string_error(struct parse_ctx *ctx, |
669 | | char *freeme, |
670 | | const char *msg) |
671 | 0 | { |
672 | 0 | free(freeme); |
673 | 0 | ctx->error = heim_error_create(EINVAL, "%s at %lu", msg, ctx->lineno); |
674 | 0 | return NULL; |
675 | 0 | } |
676 | | |
677 | | static heim_string_t |
678 | | parse_string(struct parse_ctx *ctx) |
679 | 0 | { |
680 | 0 | const uint8_t *start; |
681 | 0 | heim_object_t o; |
682 | 0 | size_t alloc_len = 0; |
683 | 0 | size_t need = 0; |
684 | 0 | char *p0, *p, *pend; |
685 | 0 | int strict = ctx->flags & HEIM_JSON_F_STRICT_STRINGS; |
686 | 0 | int binary = 0; |
687 | |
|
688 | 0 | if (*ctx->p != '"') |
689 | 0 | return parse_string_error(ctx, NULL, |
690 | 0 | "Expected a JSON string but found " |
691 | 0 | "something else"); |
692 | 0 | start = ++(ctx->p); |
693 | | |
694 | | /* Estimate how many bytes we need to allocate */ |
695 | 0 | p0 = p = pend = NULL; |
696 | 0 | for (need = 1; ctx->p < ctx->pend; ctx->p++) { |
697 | 0 | need++; |
698 | 0 | if (*ctx->p == '\\') |
699 | 0 | ctx->p++; |
700 | 0 | else if (*ctx->p == '"') |
701 | 0 | break; |
702 | 0 | } |
703 | 0 | if (ctx->p == ctx->pend) |
704 | 0 | return parse_string_error(ctx, NULL, "Unterminated JSON string"); |
705 | | |
706 | 0 | ctx->p = start; |
707 | 0 | while (ctx->p < ctx->pend) { |
708 | 0 | const unsigned char *p_save; |
709 | 0 | int32_t ctop, cbot; |
710 | |
|
711 | 0 | if (*ctx->p == '"') { |
712 | 0 | ctx->p++; |
713 | 0 | break; |
714 | 0 | } |
715 | | |
716 | | /* Allocate or resize our output buffer if need be */ |
717 | 0 | if (need || p == pend) { |
718 | 0 | char *tmp; |
719 | | |
720 | | /* |
721 | | * Work out how far p is into p0 to re-esablish p after |
722 | | * the realloc() |
723 | | */ |
724 | 0 | size_t p0_to_p_len = (p - p0); |
725 | |
|
726 | 0 | tmp = realloc(p0, alloc_len + need + 5 /* slop? */); |
727 | |
|
728 | 0 | if (tmp == NULL) { |
729 | 0 | ctx->error = heim_error_create_enomem(); |
730 | 0 | free(p0); |
731 | 0 | return NULL; |
732 | 0 | } |
733 | 0 | alloc_len += need + 5; |
734 | | |
735 | | /* |
736 | | * We have two pointers, p and p0, we want to keep them |
737 | | * pointing into the same memory after the realloc() |
738 | | */ |
739 | 0 | p = tmp + p0_to_p_len; |
740 | 0 | p0 = tmp; |
741 | 0 | pend = p0 + alloc_len; |
742 | |
|
743 | 0 | need = 0; |
744 | 0 | } |
745 | | |
746 | 0 | if (*ctx->p != '\\') { |
747 | 0 | unsigned char c = *ctx->p; |
748 | | |
749 | | /* |
750 | | * Not backslashed -> consume now. |
751 | | * |
752 | | * NOTE: All cases in this block must continue or return w/ error. |
753 | | */ |
754 | | |
755 | | /* Check for unescaped ASCII control characters */ |
756 | 0 | if (c == '\n') { |
757 | 0 | if (strict) |
758 | 0 | return parse_string_error(ctx, p0, |
759 | 0 | "Unescaped newline in JSON string"); |
760 | | /* Count the newline but don't add it to the decoding */ |
761 | 0 | ctx->lineno++; |
762 | 0 | } else if (strict && *ctx->p <= 0x1f) { |
763 | 0 | return parse_string_error(ctx, p0, "Unescaped ASCII control character"); |
764 | 0 | } else if (c == 0) { |
765 | 0 | binary = 1; |
766 | 0 | } |
767 | 0 | if (!strict || c < 0x80) { |
768 | | /* ASCII, or not strict -> no need to validate */ |
769 | 0 | *(p++) = c; |
770 | 0 | ctx->p++; |
771 | 0 | continue; |
772 | 0 | } |
773 | | |
774 | | /* |
775 | | * Being strict for parsing means we want to detect malformed UTF-8 |
776 | | * sequences. |
777 | | * |
778 | | * If not strict then we just go on below and add to `p' whatever |
779 | | * bytes we find in `ctx->p' as we find them. |
780 | | * |
781 | | * For each two-byte sequence we need one more byte in `p[]'. For |
782 | | * each three-byte sequence we need two more bytes in `p[]'. |
783 | | * |
784 | | * Setting `need' and looping will cause `p0' to be grown. |
785 | | * |
786 | | * NOTE: All cases in this block must continue or return w/ error. |
787 | | */ |
788 | 0 | if ((c & 0xe0) == 0xc0) { |
789 | | /* Two-byte UTF-8 encoding */ |
790 | 0 | if (pend - p < 2) { |
791 | 0 | need = 2; |
792 | 0 | continue; /* realloc p0 */ |
793 | 0 | } |
794 | | |
795 | 0 | *(p++) = c; |
796 | 0 | ctx->p++; |
797 | 0 | if (ctx->p == ctx->pend) |
798 | 0 | return parse_string_error(ctx, p0, "Truncated UTF-8"); |
799 | 0 | c = *(ctx->p++); |
800 | 0 | if ((c & 0xc0) != 0x80) |
801 | 0 | return parse_string_error(ctx, p0, "Truncated UTF-8"); |
802 | 0 | *(p++) = c; |
803 | 0 | continue; |
804 | 0 | } |
805 | 0 | if ((c & 0xf0) == 0xe0) { |
806 | | /* Three-byte UTF-8 encoding */ |
807 | 0 | if (pend - p < 3) { |
808 | 0 | need = 3; |
809 | 0 | continue; /* realloc p0 */ |
810 | 0 | } |
811 | | |
812 | 0 | *(p++) = c; |
813 | 0 | ctx->p++; |
814 | 0 | if (ctx->p == ctx->pend) |
815 | 0 | return parse_string_error(ctx, p0, "Truncated UTF-8"); |
816 | 0 | c = *(ctx->p++); |
817 | 0 | if ((c & 0xc0) != 0x80) |
818 | 0 | return parse_string_error(ctx, p0, "Truncated UTF-8"); |
819 | 0 | *(p++) = c; |
820 | 0 | c = *(ctx->p++); |
821 | 0 | if ((c & 0xc0) != 0x80) |
822 | 0 | return parse_string_error(ctx, p0, "Truncated UTF-8"); |
823 | 0 | *(p++) = c; |
824 | 0 | continue; |
825 | 0 | } |
826 | 0 | if ((c & 0xf8) == 0xf0) |
827 | 0 | return parse_string_error(ctx, p0, "UTF-8 sequence not " |
828 | 0 | "encoded as escaped UTF-16"); |
829 | 0 | if ((c & 0xc0) == 0x80) |
830 | 0 | return parse_string_error(ctx, p0, |
831 | 0 | "Invalid UTF-8 " |
832 | 0 | "(bare continuation code unit)"); |
833 | | |
834 | 0 | return parse_string_error(ctx, p0, "Not UTF-8"); |
835 | 0 | } |
836 | | |
837 | | /* Backslash-quoted character */ |
838 | 0 | ctx->p++; |
839 | 0 | if (ctx->p == ctx->pend) { |
840 | 0 | ctx->error = |
841 | 0 | heim_error_create(EINVAL, |
842 | 0 | "Unterminated JSON string at line %lu", |
843 | 0 | ctx->lineno); |
844 | 0 | free(p0); |
845 | 0 | return NULL; |
846 | 0 | } |
847 | 0 | switch (*ctx->p) { |
848 | | /* Simple escapes */ |
849 | 0 | case 'b': *(p++) = '\b'; ctx->p++; continue; |
850 | 0 | case 'f': *(p++) = '\f'; ctx->p++; continue; |
851 | 0 | case 'n': *(p++) = '\n'; ctx->p++; continue; |
852 | 0 | case 'r': *(p++) = '\r'; ctx->p++; continue; |
853 | 0 | case 't': *(p++) = '\t'; ctx->p++; continue; |
854 | 0 | case '"': *(p++) = '"'; ctx->p++; continue; |
855 | 0 | case '\\': *(p++) = '\\'; ctx->p++; continue; |
856 | | /* Escaped Unicode handled below */ |
857 | 0 | case 'u': |
858 | | /* |
859 | | * Worst case for !strict we need 11 bytes for a truncated non-BMP |
860 | | * codepoint escape. Call it 12. |
861 | | */ |
862 | 0 | if (strict) |
863 | 0 | need = 4; |
864 | 0 | else |
865 | 0 | need = 12; |
866 | 0 | if (pend - p < need) { |
867 | | /* Go back to the backslash, realloc, try again */ |
868 | 0 | ctx->p--; |
869 | 0 | continue; |
870 | 0 | } |
871 | | |
872 | 0 | need = 0; |
873 | 0 | ctx->p++; |
874 | 0 | break; |
875 | 0 | default: |
876 | 0 | if (!strict) { |
877 | 0 | *(p++) = *ctx->p; |
878 | 0 | ctx->p++; |
879 | 0 | continue; |
880 | 0 | } |
881 | 0 | ctx->error = |
882 | 0 | heim_error_create(EINVAL, |
883 | 0 | "Invalid backslash escape at line %lu", |
884 | 0 | ctx->lineno); |
885 | 0 | free(p0); |
886 | 0 | return NULL; |
887 | 0 | } |
888 | | |
889 | | /* Unicode code point */ |
890 | 0 | if (pend - p < 12) { |
891 | 0 | need = 12; |
892 | 0 | ctx->p -= 2; /* for "\\u" */ |
893 | 0 | continue; /* This will cause p0 to be realloc'ed */ |
894 | 0 | } |
895 | 0 | p_save = ctx->p; |
896 | 0 | cbot = -3; |
897 | 0 | ctop = unescape_unicode(ctx); |
898 | 0 | if (ctop == -1 && strict) |
899 | 0 | return parse_string_error(ctx, p0, "Invalid escaped Unicode"); |
900 | 0 | if (ctop == -1) { |
901 | | /* |
902 | | * Not strict; tolerate bad input. |
903 | | * |
904 | | * Output "\\u" and then loop to treat what we expected to be four |
905 | | * digits as if they were not part of an escaped Unicode codepoint. |
906 | | */ |
907 | 0 | ctx->p = p_save; |
908 | 0 | if (p < pend) |
909 | 0 | *(p++) = '\\'; |
910 | 0 | if (p < pend) |
911 | 0 | *(p++) = 'u'; |
912 | 0 | continue; |
913 | 0 | } |
914 | 0 | if (ctop == 0) { |
915 | 0 | *(p++) = '\0'; |
916 | 0 | binary = 1; |
917 | 0 | continue; |
918 | 0 | } |
919 | 0 | if (ctop < 0xd800) { |
920 | 0 | if (!encode_utf8(ctx, &p, pend, ctop)) |
921 | 0 | return parse_string_error(ctx, p0, |
922 | 0 | "Internal JSON string parse error"); |
923 | 0 | continue; |
924 | 0 | } |
925 | | |
926 | | /* |
927 | | * We parsed the top escaped codepoint of a surrogate pair encoding |
928 | | * of a non-BMP Unicode codepoint. What follows must be another |
929 | | * escaped codepoint. |
930 | | */ |
931 | 0 | if (ctx->p < ctx->pend && ctx->p[0] == '\\') |
932 | 0 | ctx->p++; |
933 | 0 | else |
934 | 0 | ctop = -1; |
935 | 0 | if (ctop > -1 && ctx->p < ctx->pend && ctx->p[0] == 'u') |
936 | 0 | ctx->p++; |
937 | 0 | else |
938 | 0 | ctop = -1; |
939 | 0 | if (ctop > -1) { |
940 | | /* Parse the hex digits of the bottom half of the surrogate pair */ |
941 | 0 | cbot = unescape_unicode(ctx); |
942 | 0 | if (cbot == -1 || cbot < 0xdc00) |
943 | 0 | ctop = -1; |
944 | 0 | } |
945 | 0 | if (ctop == -1) { |
946 | 0 | if (strict) |
947 | 0 | return parse_string_error(ctx, p0, |
948 | 0 | "Invalid surrogate pair"); |
949 | | |
950 | | /* |
951 | | * Output "\\u", rewind, output the digits of `ctop'. |
952 | | * |
953 | | * When we get to what should have been the bottom half of the |
954 | | * pair we'll necessarily fail to parse it as a normal escaped |
955 | | * Unicode codepoint, and once again, rewind and output its digits. |
956 | | */ |
957 | 0 | if (p < pend) |
958 | 0 | *(p++) = '\\'; |
959 | 0 | if (p < pend) |
960 | 0 | *(p++) = 'u'; |
961 | 0 | ctx->p = p_save; |
962 | 0 | continue; |
963 | 0 | } |
964 | | |
965 | | /* Finally decode the surrogate pair then encode as UTF-8 */ |
966 | 0 | ctop -= 0xd800; |
967 | 0 | cbot -= 0xdc00; |
968 | 0 | if (!encode_utf8(ctx, &p, pend, 0x10000 + ((ctop << 10) | (cbot & 0x3ff)))) |
969 | 0 | return parse_string_error(ctx, p0, |
970 | 0 | "Internal JSON string parse error"); |
971 | 0 | } |
972 | | |
973 | 0 | if (p0 == NULL) |
974 | 0 | return heim_string_create(""); |
975 | | |
976 | | /* NUL-terminate for rk_base64_decode() and plain paranoia */ |
977 | 0 | if (p0 != NULL && p == pend) { |
978 | | /* |
979 | | * Work out how far p is into p0 to re-establish p after |
980 | | * the realloc() |
981 | | */ |
982 | 0 | size_t p0_to_pend_len = (pend - p0); |
983 | 0 | char *tmp = realloc(p0, 1 + p0_to_pend_len); |
984 | |
|
985 | 0 | if (tmp == NULL) { |
986 | 0 | ctx->error = heim_error_create_enomem(); |
987 | 0 | free(p0); |
988 | 0 | return NULL; |
989 | 0 | } |
990 | | /* |
991 | | * We have three pointers, p, pend (which are the same) |
992 | | * and p0, we want to keep them pointing into the same |
993 | | * memory after the realloc() |
994 | | */ |
995 | 0 | p = tmp + p0_to_pend_len; |
996 | |
|
997 | 0 | pend = p + 1; |
998 | 0 | p0 = tmp; |
999 | 0 | } |
1000 | 0 | *(p++) = '\0'; |
1001 | | |
1002 | | /* If there's embedded NULs, it's not a C string */ |
1003 | 0 | if (binary) { |
1004 | 0 | o = heim_data_ref_create(p0, (p - 1) - p0, free); |
1005 | 0 | return o; |
1006 | 0 | } |
1007 | | |
1008 | | /* Sadly this will copy `p0' */ |
1009 | 0 | o = heim_string_create_with_bytes(p0, p - p0); |
1010 | 0 | free(p0); |
1011 | 0 | return o; |
1012 | 0 | } |
1013 | | |
1014 | | static int |
1015 | | parse_pair(heim_dict_t dict, struct parse_ctx *ctx) |
1016 | 0 | { |
1017 | 0 | heim_string_t key; |
1018 | 0 | heim_object_t value; |
1019 | |
|
1020 | 0 | if (white_spaces(ctx)) |
1021 | 0 | return -1; |
1022 | | |
1023 | 0 | if (*ctx->p == '}') { |
1024 | 0 | ctx->p++; |
1025 | 0 | return 0; |
1026 | 0 | } |
1027 | | |
1028 | 0 | if (ctx->flags & HEIM_JSON_F_STRICT_DICT) |
1029 | | /* JSON allows only string keys */ |
1030 | 0 | key = parse_string(ctx); |
1031 | 0 | else |
1032 | | /* heim_dict_t allows any heim_object_t as key */ |
1033 | 0 | key = parse_value(ctx); |
1034 | 0 | if (key == NULL) |
1035 | | /* Even heim_dict_t does not allow C NULLs as keys though! */ |
1036 | 0 | return -1; |
1037 | | |
1038 | 0 | if (white_spaces(ctx)) { |
1039 | 0 | heim_release(key); |
1040 | 0 | return -1; |
1041 | 0 | } |
1042 | | |
1043 | 0 | if (*ctx->p != ':') { |
1044 | 0 | heim_release(key); |
1045 | 0 | return -1; |
1046 | 0 | } |
1047 | | |
1048 | 0 | ctx->p += 1; /* safe because we call white_spaces() next */ |
1049 | |
|
1050 | 0 | if (white_spaces(ctx)) { |
1051 | 0 | heim_release(key); |
1052 | 0 | return -1; |
1053 | 0 | } |
1054 | | |
1055 | 0 | value = parse_value(ctx); |
1056 | 0 | if (value == NULL && |
1057 | 0 | (ctx->error != NULL || (ctx->flags & HEIM_JSON_F_NO_C_NULL))) { |
1058 | 0 | if (ctx->error == NULL) |
1059 | 0 | ctx->error = heim_error_create(EINVAL, "Invalid JSON encoding"); |
1060 | 0 | heim_release(key); |
1061 | 0 | return -1; |
1062 | 0 | } |
1063 | 0 | heim_dict_set_value(dict, key, value); |
1064 | 0 | heim_release(key); |
1065 | 0 | heim_release(value); |
1066 | |
|
1067 | 0 | if (white_spaces(ctx)) |
1068 | 0 | return -1; |
1069 | | |
1070 | 0 | if (*ctx->p == '}') { |
1071 | | /* |
1072 | | * Return 1 but don't consume the '}' so we can count the one |
1073 | | * pair in a one-pair dict |
1074 | | */ |
1075 | 0 | return 1; |
1076 | 0 | } else if (*ctx->p == ',') { |
1077 | 0 | ctx->p++; |
1078 | 0 | return 1; |
1079 | 0 | } |
1080 | 0 | return -1; |
1081 | 0 | } |
1082 | | |
1083 | | static heim_dict_t |
1084 | | parse_dict(struct parse_ctx *ctx) |
1085 | 0 | { |
1086 | 0 | heim_dict_t dict; |
1087 | 0 | size_t count = 0; |
1088 | 0 | int ret; |
1089 | |
|
1090 | 0 | heim_assert(*ctx->p == '{', "string doesn't start with {"); |
1091 | | |
1092 | 0 | dict = heim_dict_create(11); |
1093 | 0 | if (dict == NULL) { |
1094 | 0 | ctx->error = heim_error_create_enomem(); |
1095 | 0 | return NULL; |
1096 | 0 | } |
1097 | | |
1098 | 0 | ctx->p += 1; /* safe because parse_pair() calls white_spaces() first */ |
1099 | |
|
1100 | 0 | while ((ret = parse_pair(dict, ctx)) > 0) |
1101 | 0 | count++; |
1102 | 0 | if (ret < 0) { |
1103 | 0 | heim_release(dict); |
1104 | 0 | return NULL; |
1105 | 0 | } |
1106 | 0 | if (count == 1 && !(ctx->flags & HEIM_JSON_F_NO_DATA_DICT)) { |
1107 | 0 | heim_object_t v = heim_dict_copy_value(dict, heim_tid_data_uuid_key); |
1108 | | |
1109 | | /* |
1110 | | * Binary data encoded as a dict with a single magic key with |
1111 | | * base64-encoded value? Decode as heim_data_t. |
1112 | | */ |
1113 | 0 | if (v != NULL && heim_get_tid(v) == HEIM_TID_STRING) { |
1114 | 0 | void *buf; |
1115 | 0 | size_t len; |
1116 | |
|
1117 | 0 | buf = malloc(strlen(heim_string_get_utf8(v))); |
1118 | 0 | if (buf == NULL) { |
1119 | 0 | heim_release(dict); |
1120 | 0 | heim_release(v); |
1121 | 0 | ctx->error = heim_error_create_enomem(); |
1122 | 0 | return NULL; |
1123 | 0 | } |
1124 | 0 | len = rk_base64_decode(heim_string_get_utf8(v), buf); |
1125 | 0 | heim_release(v); |
1126 | 0 | if (len == -1) { |
1127 | 0 | free(buf); |
1128 | 0 | return dict; /* assume aliasing accident */ |
1129 | 0 | } |
1130 | 0 | heim_release(dict); |
1131 | 0 | return (heim_dict_t)heim_data_ref_create(buf, len, free); |
1132 | 0 | } |
1133 | 0 | } |
1134 | 0 | return dict; |
1135 | 0 | } |
1136 | | |
1137 | | static int |
1138 | | parse_item(heim_array_t array, struct parse_ctx *ctx) |
1139 | 0 | { |
1140 | 0 | heim_object_t value; |
1141 | |
|
1142 | 0 | if (white_spaces(ctx)) |
1143 | 0 | return -1; |
1144 | | |
1145 | 0 | if (*ctx->p == ']') { |
1146 | 0 | ctx->p++; /* safe because parse_value() calls white_spaces() first */ |
1147 | 0 | return 0; |
1148 | 0 | } |
1149 | | |
1150 | 0 | value = parse_value(ctx); |
1151 | 0 | if (value == NULL && |
1152 | 0 | (ctx->error || (ctx->flags & HEIM_JSON_F_NO_C_NULL))) |
1153 | 0 | return -1; |
1154 | | |
1155 | 0 | heim_array_append_value(array, value); |
1156 | 0 | heim_release(value); |
1157 | |
|
1158 | 0 | if (white_spaces(ctx)) |
1159 | 0 | return -1; |
1160 | | |
1161 | 0 | if (*ctx->p == ']') { |
1162 | 0 | ctx->p++; |
1163 | 0 | return 0; |
1164 | 0 | } else if (*ctx->p == ',') { |
1165 | 0 | ctx->p++; |
1166 | 0 | return 1; |
1167 | 0 | } |
1168 | 0 | return -1; |
1169 | 0 | } |
1170 | | |
1171 | | static heim_array_t |
1172 | | parse_array(struct parse_ctx *ctx) |
1173 | 0 | { |
1174 | 0 | heim_array_t array = heim_array_create(); |
1175 | 0 | int ret; |
1176 | |
|
1177 | 0 | heim_assert(*ctx->p == '[', "array doesn't start with ["); |
1178 | 0 | ctx->p += 1; |
1179 | |
|
1180 | 0 | while ((ret = parse_item(array, ctx)) > 0) |
1181 | 0 | ; |
1182 | 0 | if (ret < 0) { |
1183 | 0 | heim_release(array); |
1184 | 0 | return NULL; |
1185 | 0 | } |
1186 | 0 | return array; |
1187 | 0 | } |
1188 | | |
1189 | | static heim_object_t |
1190 | | parse_value(struct parse_ctx *ctx) |
1191 | 0 | { |
1192 | 0 | size_t len; |
1193 | 0 | heim_object_t o; |
1194 | |
|
1195 | 0 | if (white_spaces(ctx)) |
1196 | 0 | return NULL; |
1197 | | |
1198 | 0 | if (*ctx->p == '"') { |
1199 | 0 | return parse_string(ctx); |
1200 | 0 | } else if (*ctx->p == '{') { |
1201 | 0 | if (ctx->depth-- == 1) { |
1202 | 0 | ctx->error = heim_error_create(EINVAL, "JSON object too deep"); |
1203 | 0 | return NULL; |
1204 | 0 | } |
1205 | 0 | o = parse_dict(ctx); |
1206 | 0 | ctx->depth++; |
1207 | 0 | return o; |
1208 | 0 | } else if (*ctx->p == '[') { |
1209 | 0 | if (ctx->depth-- == 1) { |
1210 | 0 | ctx->error = heim_error_create(EINVAL, "JSON object too deep"); |
1211 | 0 | return NULL; |
1212 | 0 | } |
1213 | 0 | o = parse_array(ctx); |
1214 | 0 | ctx->depth++; |
1215 | 0 | return o; |
1216 | 0 | } else if (is_number(*ctx->p) || *ctx->p == '-') { |
1217 | 0 | return parse_number(ctx); |
1218 | 0 | } |
1219 | | |
1220 | 0 | len = ctx->pend - ctx->p; |
1221 | |
|
1222 | 0 | if ((ctx->flags & HEIM_JSON_F_NO_C_NULL) == 0 && |
1223 | 0 | len >= 6 && memcmp(ctx->p, "<NULL>", 6) == 0) { |
1224 | 0 | ctx->p += 6; |
1225 | 0 | return heim_null_create(); |
1226 | 0 | } else if (len >= 4 && memcmp(ctx->p, "null", 4) == 0) { |
1227 | 0 | ctx->p += 4; |
1228 | 0 | return heim_null_create(); |
1229 | 0 | } else if (len >= 4 && strncasecmp((char *)ctx->p, "true", 4) == 0) { |
1230 | 0 | ctx->p += 4; |
1231 | 0 | return heim_bool_create(1); |
1232 | 0 | } else if (len >= 5 && strncasecmp((char *)ctx->p, "false", 5) == 0) { |
1233 | 0 | ctx->p += 5; |
1234 | 0 | return heim_bool_create(0); |
1235 | 0 | } |
1236 | | |
1237 | 0 | ctx->error = heim_error_create(EINVAL, "unknown char %c at %lu line %lu", |
1238 | 0 | (char)*ctx->p, |
1239 | 0 | (unsigned long)(ctx->p - ctx->pstart), |
1240 | 0 | ctx->lineno); |
1241 | 0 | return NULL; |
1242 | 0 | } |
1243 | | |
1244 | | |
1245 | | heim_object_t |
1246 | | heim_json_create(const char *string, size_t max_depth, heim_json_flags_t flags, |
1247 | | heim_error_t *error) |
1248 | 0 | { |
1249 | 0 | return heim_json_create_with_bytes(string, strlen(string), max_depth, flags, |
1250 | 0 | error); |
1251 | 0 | } |
1252 | | |
1253 | | heim_object_t |
1254 | | heim_json_create_with_bytes(const void *data, size_t length, size_t max_depth, |
1255 | | heim_json_flags_t flags, heim_error_t *error) |
1256 | 0 | { |
1257 | 0 | struct parse_ctx ctx; |
1258 | 0 | heim_object_t o; |
1259 | |
|
1260 | 0 | heim_base_once_f(&heim_json_once, NULL, json_init_once); |
1261 | |
|
1262 | 0 | ctx.lineno = 1; |
1263 | 0 | ctx.p = data; |
1264 | 0 | ctx.pstart = data; |
1265 | 0 | ctx.pend = ((uint8_t *)data) + length; |
1266 | 0 | ctx.error = NULL; |
1267 | 0 | ctx.flags = flags; |
1268 | 0 | ctx.depth = max_depth; |
1269 | |
|
1270 | 0 | o = parse_value(&ctx); |
1271 | |
|
1272 | 0 | if (o == NULL && error) { |
1273 | 0 | *error = ctx.error; |
1274 | 0 | } else if (ctx.error) { |
1275 | 0 | heim_release(ctx.error); |
1276 | 0 | } |
1277 | |
|
1278 | 0 | return o; |
1279 | 0 | } |
1280 | | |
1281 | | |
1282 | | static void |
1283 | | show_printf(void *ctx, const char *str) |
1284 | 0 | { |
1285 | 0 | if (str == NULL) |
1286 | 0 | return; |
1287 | 0 | fprintf(ctx, "%s", str); |
1288 | 0 | } |
1289 | | |
1290 | | /** |
1291 | | * Dump a heimbase object to stderr (useful from the debugger!) |
1292 | | * |
1293 | | * @param obj object to dump using JSON or JSON-like format |
1294 | | * |
1295 | | * @addtogroup heimbase |
1296 | | */ |
1297 | | void |
1298 | | heim_show(heim_object_t obj) |
1299 | 0 | { |
1300 | 0 | heim_base2json(obj, stderr, HEIM_JSON_F_NO_DATA_DICT, show_printf); |
1301 | 0 | } |
1302 | | |
1303 | | static void |
1304 | | strbuf_add(void *ctx, const char *str) |
1305 | 0 | { |
1306 | 0 | struct heim_strbuf *strbuf = ctx; |
1307 | 0 | size_t len; |
1308 | |
|
1309 | 0 | if (strbuf->enomem) |
1310 | 0 | return; |
1311 | | |
1312 | 0 | if (str == NULL) { |
1313 | | /* |
1314 | | * Eat the last '\n'; this is used when formatting dict pairs |
1315 | | * and array items so that the ',' separating them is never |
1316 | | * preceded by a '\n'. |
1317 | | */ |
1318 | 0 | if (strbuf->len > 0 && strbuf->str[strbuf->len - 1] == '\n') |
1319 | 0 | strbuf->len--; |
1320 | 0 | return; |
1321 | 0 | } |
1322 | | |
1323 | 0 | len = strlen(str); |
1324 | 0 | if ((len + 1) > (strbuf->alloced - strbuf->len)) { |
1325 | 0 | size_t new_len = strbuf->alloced + (strbuf->alloced >> 2) + len + 1; |
1326 | 0 | char *s; |
1327 | |
|
1328 | 0 | s = realloc(strbuf->str, new_len); |
1329 | 0 | if (s == NULL) { |
1330 | 0 | strbuf->enomem = 1; |
1331 | 0 | return; |
1332 | 0 | } |
1333 | 0 | strbuf->str = s; |
1334 | 0 | strbuf->alloced = new_len; |
1335 | 0 | } |
1336 | | /* +1 so we copy the NUL */ |
1337 | 0 | (void) memcpy(strbuf->str + strbuf->len, str, len + 1); |
1338 | 0 | strbuf->len += len; |
1339 | 0 | if (strbuf->str[strbuf->len - 1] == '\n' && |
1340 | 0 | strbuf->flags & HEIM_JSON_F_ONE_LINE) |
1341 | 0 | strbuf->len--; |
1342 | 0 | } |
1343 | | |
1344 | 0 | #define STRBUF_INIT_SZ 64 |
1345 | | |
1346 | | heim_string_t |
1347 | | heim_json_copy_serialize(heim_object_t obj, heim_json_flags_t flags, heim_error_t *error) |
1348 | 0 | { |
1349 | 0 | heim_string_t str; |
1350 | 0 | struct heim_strbuf strbuf; |
1351 | 0 | int ret; |
1352 | |
|
1353 | 0 | if (error) |
1354 | 0 | *error = NULL; |
1355 | |
|
1356 | 0 | memset(&strbuf, 0, sizeof (strbuf)); |
1357 | 0 | strbuf.str = malloc(STRBUF_INIT_SZ); |
1358 | 0 | if (strbuf.str == NULL) { |
1359 | 0 | if (error) |
1360 | 0 | *error = heim_error_create_enomem(); |
1361 | 0 | return NULL; |
1362 | 0 | } |
1363 | 0 | strbuf.len = 0; |
1364 | 0 | strbuf.alloced = STRBUF_INIT_SZ; |
1365 | 0 | strbuf.str[0] = '\0'; |
1366 | 0 | strbuf.flags = flags; |
1367 | |
|
1368 | 0 | ret = heim_base2json(obj, &strbuf, flags, strbuf_add); |
1369 | 0 | if (ret || strbuf.enomem) { |
1370 | 0 | if (error) { |
1371 | 0 | if (strbuf.enomem || ret == ENOMEM) |
1372 | 0 | *error = heim_error_create_enomem(); |
1373 | 0 | else |
1374 | 0 | *error = heim_error_create(1, "Impossible to JSON-encode " |
1375 | 0 | "object"); |
1376 | 0 | } |
1377 | 0 | free(strbuf.str); |
1378 | 0 | return NULL; |
1379 | 0 | } |
1380 | 0 | if (flags & HEIM_JSON_F_ONE_LINE) { |
1381 | 0 | strbuf.flags &= ~HEIM_JSON_F_ONE_LINE; |
1382 | 0 | strbuf_add(&strbuf, "\n"); |
1383 | 0 | } |
1384 | 0 | str = heim_string_ref_create(strbuf.str, free); |
1385 | 0 | if (str == NULL) { |
1386 | 0 | if (error) |
1387 | 0 | *error = heim_error_create_enomem(); |
1388 | 0 | free(strbuf.str); |
1389 | 0 | } |
1390 | 0 | return str; |
1391 | 0 | } |
1392 | | |
1393 | | struct heim_eq_f_ctx { |
1394 | | heim_dict_t other; |
1395 | | int ret; |
1396 | | }; |
1397 | | |
1398 | | static void |
1399 | | heim_eq_dict_iter_f(heim_object_t key, heim_object_t val, void *d) |
1400 | 0 | { |
1401 | 0 | struct heim_eq_f_ctx *ctx = d; |
1402 | 0 | heim_object_t other_val; |
1403 | |
|
1404 | 0 | if (!ctx->ret) |
1405 | 0 | return; |
1406 | | |
1407 | | /* |
1408 | | * This doesn't work if the key is an array or a dict, which, anyways, |
1409 | | * isn't allowed in JSON, though we allow it. |
1410 | | */ |
1411 | 0 | other_val = heim_dict_get_value(ctx->other, key); |
1412 | 0 | ctx->ret = heim_json_eq(val, other_val); |
1413 | 0 | } |
1414 | | |
1415 | | int |
1416 | | heim_json_eq(heim_object_t a, heim_object_t b) |
1417 | 0 | { |
1418 | 0 | heim_tid_t atid, btid; |
1419 | |
|
1420 | 0 | if (a == b) |
1421 | 0 | return 1; |
1422 | 0 | if (a == NULL || b == NULL) |
1423 | 0 | return 0; |
1424 | 0 | atid = heim_get_tid(a); |
1425 | 0 | btid = heim_get_tid(b); |
1426 | 0 | if (atid != btid) |
1427 | 0 | return 0; |
1428 | 0 | switch (atid) { |
1429 | 0 | case HEIM_TID_ARRAY: { |
1430 | 0 | size_t len = heim_array_get_length(b); |
1431 | 0 | size_t i; |
1432 | |
|
1433 | 0 | if (heim_array_get_length(a) != len) |
1434 | 0 | return 0; |
1435 | 0 | for (i = 0; i < len; i++) { |
1436 | 0 | if (!heim_json_eq(heim_array_get_value(a, i), |
1437 | 0 | heim_array_get_value(b, i))) |
1438 | 0 | return 0; |
1439 | 0 | } |
1440 | 0 | return 1; |
1441 | 0 | } |
1442 | 0 | case HEIM_TID_DICT: { |
1443 | 0 | struct heim_eq_f_ctx ctx; |
1444 | |
|
1445 | 0 | ctx.other = b; |
1446 | 0 | ctx.ret = 1; |
1447 | 0 | heim_dict_iterate_f(a, &ctx, heim_eq_dict_iter_f); |
1448 | |
|
1449 | 0 | if (ctx.ret) { |
1450 | 0 | ctx.other = a; |
1451 | 0 | heim_dict_iterate_f(b, &ctx, heim_eq_dict_iter_f); |
1452 | 0 | } |
1453 | 0 | return ctx.ret; |
1454 | 0 | } |
1455 | 0 | case HEIM_TID_STRING: |
1456 | 0 | return strcmp(heim_string_get_utf8(a), heim_string_get_utf8(b)) == 0; |
1457 | 0 | case HEIM_TID_DATA: { |
1458 | 0 | return heim_data_get_length(a) == heim_data_get_length(b) && |
1459 | 0 | memcmp(heim_data_get_ptr(a), heim_data_get_ptr(b), |
1460 | 0 | heim_data_get_length(a)) == 0; |
1461 | 0 | } |
1462 | 0 | case HEIM_TID_NUMBER: |
1463 | 0 | return heim_number_get_long(a) == heim_number_get_long(b); |
1464 | 0 | case HEIM_TID_NULL: |
1465 | 0 | case HEIM_TID_BOOL: |
1466 | 0 | return heim_bool_val(a) == heim_bool_val(b); |
1467 | 0 | default: |
1468 | 0 | break; |
1469 | 0 | } |
1470 | 0 | return 0; |
1471 | 0 | } |