/src/wireshark/wsutil/wsjson.c
Line | Count | Source |
1 | | /* wsjson.c |
2 | | * JSON parsing functions. |
3 | | * |
4 | | * Copyright 2016, Dario Lombardo |
5 | | * |
6 | | * Wireshark - Network traffic analyzer |
7 | | * By Gerald Combs <gerald@wireshark.org> |
8 | | * Copyright 1998 Gerald Combs |
9 | | * |
10 | | * SPDX-License-Identifier: GPL-2.0-or-later |
11 | | */ |
12 | | |
13 | | #include "config.h" |
14 | 0 | #define WS_LOG_DOMAIN LOG_DOMAIN_MAIN |
15 | | |
16 | | #include "wsjson.h" |
17 | | |
18 | | #include <string.h> |
19 | | #include <errno.h> |
20 | | #include <wsutil/jsmn.h> |
21 | | #include <wsutil/str_util.h> |
22 | | #include <wsutil/unicode-utils.h> |
23 | | #include <wsutil/wslog.h> |
24 | | |
25 | | bool |
26 | | json_validate(const uint8_t *buf, const size_t len) |
27 | 0 | { |
28 | 0 | bool ret = true; |
29 | | /* We expect no more than 1024 tokens */ |
30 | 0 | unsigned max_tokens = 1024; |
31 | 0 | jsmntok_t* t; |
32 | 0 | jsmn_parser p; |
33 | 0 | int rcode; |
34 | | |
35 | | /* |
36 | | * Make sure the buffer isn't empty and the first octet isn't a NUL; |
37 | | * otherwise, the parser will immediately stop parsing and not validate |
38 | | * anything after that, so it'll just think it was handed an empty string. |
39 | | * |
40 | | * XXX - should we check for NULs anywhere in the buffer? |
41 | | */ |
42 | 0 | if (len == 0) { |
43 | 0 | ws_debug("JSON string is empty"); |
44 | 0 | return false; |
45 | 0 | } |
46 | 0 | if (buf[0] == '\0') { |
47 | 0 | ws_debug("invalid character inside JSON string"); |
48 | 0 | return false; |
49 | 0 | } |
50 | | |
51 | | /* |
52 | | * XXX - We create the token array and have jsmn_parse fill it in, only |
53 | | * to free it. It might make more sense to pass in NULL for tokens, and |
54 | | * for our sanity check just check that len isn't too big. |
55 | | */ |
56 | 0 | t = g_new0(jsmntok_t, max_tokens); |
57 | |
|
58 | 0 | if (!t) |
59 | 0 | return false; |
60 | | |
61 | 0 | jsmn_init(&p); |
62 | 0 | rcode = jsmn_parse(&p, (const char*)buf, len, t, max_tokens); |
63 | 0 | if (rcode < 0) { |
64 | 0 | switch (rcode) { |
65 | 0 | case JSMN_ERROR_NOMEM: |
66 | 0 | ws_debug("not enough tokens were provided"); |
67 | 0 | break; |
68 | 0 | case JSMN_ERROR_INVAL: |
69 | 0 | ws_debug("invalid character inside JSON string"); |
70 | 0 | break; |
71 | 0 | case JSMN_ERROR_PART: |
72 | 0 | ws_debug("the string is not a full JSON packet, " |
73 | 0 | "more bytes expected"); |
74 | 0 | break; |
75 | 0 | default: |
76 | 0 | ws_debug("unexpected error"); |
77 | 0 | break; |
78 | 0 | } |
79 | 0 | ret = false; |
80 | 0 | } |
81 | | |
82 | 0 | g_free(t); |
83 | 0 | return ret; |
84 | 0 | } |
85 | | |
86 | | int |
87 | | json_parse(const char *buf, jsmntok_t *tokens, unsigned int max_tokens) |
88 | 0 | { |
89 | 0 | jsmn_parser p; |
90 | |
|
91 | 0 | jsmn_init(&p); |
92 | 0 | return jsmn_parse(&p, buf, strlen(buf), tokens, max_tokens); |
93 | 0 | } |
94 | | |
95 | | int |
96 | | json_parse_len(const char *buf, size_t len, jsmntok_t *tokens, unsigned int max_tokens) |
97 | 0 | { |
98 | 0 | jsmn_parser p; |
99 | |
|
100 | 0 | jsmn_init(&p); |
101 | 0 | return jsmn_parse(&p, buf, len, tokens, max_tokens); |
102 | 0 | } |
103 | | |
104 | | jsmntok_t *json_get_next_object(jsmntok_t *cur) |
105 | 0 | { |
106 | 0 | int i; |
107 | 0 | jsmntok_t *next = cur+1; |
108 | |
|
109 | 0 | for (i = 0; i < cur->size; i++) { |
110 | 0 | next = json_get_next_object(next); |
111 | 0 | } |
112 | 0 | return next; |
113 | 0 | } |
114 | | |
115 | | jsmntok_t *json_get_object(const char *buf, jsmntok_t *parent, const char *name) |
116 | 0 | { |
117 | 0 | int i; |
118 | 0 | jsmntok_t *cur = parent+1; |
119 | |
|
120 | 0 | for (i = 0; i < parent->size; i++) { |
121 | 0 | if (cur->type == JSMN_STRING && |
122 | 0 | !strncmp(&buf[cur->start], name, cur->end - cur->start) |
123 | 0 | && strlen(name) == (size_t)(cur->end - cur->start) && |
124 | 0 | cur->size == 1 && (cur+1)->type == JSMN_OBJECT) { |
125 | 0 | return cur+1; |
126 | 0 | } |
127 | 0 | cur = json_get_next_object(cur); |
128 | 0 | } |
129 | 0 | return NULL; |
130 | 0 | } |
131 | | |
132 | | jsmntok_t *json_get_array(const char *buf, jsmntok_t *parent, const char *name) |
133 | 0 | { |
134 | 0 | int i; |
135 | 0 | jsmntok_t *cur = parent+1; |
136 | |
|
137 | 0 | for (i = 0; i < parent->size; i++) { |
138 | 0 | if (cur->type == JSMN_STRING && |
139 | 0 | !strncmp(&buf[cur->start], name, cur->end - cur->start) |
140 | 0 | && strlen(name) == (size_t)(cur->end - cur->start) && |
141 | 0 | cur->size == 1 && (cur+1)->type == JSMN_ARRAY) { |
142 | 0 | return cur+1; |
143 | 0 | } |
144 | 0 | cur = json_get_next_object(cur); |
145 | 0 | } |
146 | 0 | return NULL; |
147 | 0 | } |
148 | | |
149 | | int json_get_array_len(jsmntok_t *array) |
150 | 0 | { |
151 | 0 | if (array->type != JSMN_ARRAY) |
152 | 0 | return -1; |
153 | 0 | return array->size; |
154 | 0 | } |
155 | | |
156 | | jsmntok_t *json_get_array_index(jsmntok_t *array, int idx) |
157 | 0 | { |
158 | 0 | int i; |
159 | 0 | jsmntok_t *cur = array+1; |
160 | | |
161 | |
|
162 | 0 | if (array->type != JSMN_ARRAY || idx < 0 || idx >= array->size) |
163 | 0 | return NULL; |
164 | 0 | for (i = 0; i < idx; i++) |
165 | 0 | cur = json_get_next_object(cur); |
166 | 0 | return cur; |
167 | 0 | } |
168 | | |
169 | | char *json_get_string(char *buf, jsmntok_t *parent, const char *name) |
170 | 0 | { |
171 | 0 | int i; |
172 | 0 | jsmntok_t *cur = parent+1; |
173 | |
|
174 | 0 | for (i = 0; i < parent->size; i++) { |
175 | 0 | if (cur->type == JSMN_STRING && |
176 | 0 | !strncmp(&buf[cur->start], name, cur->end - cur->start) |
177 | 0 | && strlen(name) == (size_t)(cur->end - cur->start) && |
178 | 0 | cur->size == 1 && (cur+1)->type == JSMN_STRING) { |
179 | 0 | buf[(cur+1)->end] = '\0'; |
180 | 0 | if (!json_decode_string_inplace(&buf[(cur+1)->start])) |
181 | 0 | return NULL; |
182 | 0 | return &buf[(cur+1)->start]; |
183 | 0 | } |
184 | 0 | cur = json_get_next_object(cur); |
185 | 0 | } |
186 | 0 | return NULL; |
187 | 0 | } |
188 | | |
189 | | bool json_get_double(char *buf, jsmntok_t *parent, const char *name, double *val) |
190 | 0 | { |
191 | 0 | int i; |
192 | 0 | jsmntok_t *cur = parent+1; |
193 | |
|
194 | 0 | for (i = 0; i < parent->size; i++) { |
195 | 0 | if (cur->type == JSMN_STRING && |
196 | 0 | !strncmp(&buf[cur->start], name, cur->end - cur->start) |
197 | 0 | && strlen(name) == (size_t)(cur->end - cur->start) && |
198 | 0 | cur->size == 1 && (cur+1)->type == JSMN_PRIMITIVE) { |
199 | 0 | buf[(cur+1)->end] = '\0'; |
200 | 0 | *val = g_ascii_strtod(&buf[(cur+1)->start], NULL); |
201 | 0 | if (errno != 0) |
202 | 0 | return false; |
203 | 0 | return true; |
204 | 0 | } |
205 | 0 | cur = json_get_next_object(cur); |
206 | 0 | } |
207 | 0 | return false; |
208 | 0 | } |
209 | | |
210 | | bool json_get_int(char *buf, jsmntok_t *parent, const char *name, int64_t *val) |
211 | 0 | { |
212 | 0 | int i; |
213 | 0 | jsmntok_t *cur = parent+1; |
214 | |
|
215 | 0 | for (i = 0; i < parent->size; i++) { |
216 | 0 | if (cur->type == JSMN_STRING && |
217 | 0 | !strncmp(&buf[cur->start], name, cur->end - cur->start) |
218 | 0 | && strlen(name) == (size_t)(cur->end - cur->start) && |
219 | 0 | cur->size == 1 && (cur+1)->type == JSMN_PRIMITIVE) { |
220 | 0 | buf[(cur+1)->end] = '\0'; |
221 | 0 | *val = g_ascii_strtoll(&buf[(cur+1)->start], NULL, 10); |
222 | 0 | if (errno != 0) |
223 | 0 | return false; |
224 | 0 | return true; |
225 | 0 | } |
226 | 0 | cur = json_get_next_object(cur); |
227 | 0 | } |
228 | 0 | return false; |
229 | 0 | } |
230 | | |
231 | | bool json_get_boolean(char *buf, jsmntok_t *parent, const char *name, bool *val) |
232 | 0 | { |
233 | 0 | int i; |
234 | 0 | size_t tok_len; |
235 | 0 | jsmntok_t *cur = parent+1; |
236 | |
|
237 | 0 | for (i = 0; i < parent->size; i++) { |
238 | 0 | if (cur->type == JSMN_STRING && |
239 | 0 | !strncmp(&buf[cur->start], name, cur->end - cur->start) |
240 | 0 | && strlen(name) == (size_t)(cur->end - cur->start) && |
241 | 0 | cur->size == 1 && (cur+1)->type == JSMN_PRIMITIVE) { |
242 | | /* JSMN_STRICT guarantees that a primitive starts with the |
243 | | * correct character. |
244 | | */ |
245 | 0 | tok_len = (cur+1)->end - (cur+1)->start; |
246 | 0 | switch (buf[(cur+1)->start]) { |
247 | 0 | case 't': |
248 | 0 | if (tok_len == 4 && strncmp(&buf[(cur+1)->start], "true", tok_len) == 0) { |
249 | 0 | *val = true; |
250 | 0 | return true; |
251 | 0 | } |
252 | 0 | return false; |
253 | 0 | case 'f': |
254 | 0 | if (tok_len == 5 && strncmp(&buf[(cur+1)->start], "false", tok_len) == 0) { |
255 | 0 | *val = false; |
256 | 0 | return true; |
257 | 0 | } |
258 | 0 | return false; |
259 | 0 | default: |
260 | 0 | return false; |
261 | 0 | } |
262 | 0 | } |
263 | 0 | cur = json_get_next_object(cur); |
264 | 0 | } |
265 | 0 | return false; |
266 | 0 | } |
267 | | |
268 | | bool |
269 | | json_decode_string_inplace(char *text) |
270 | 0 | { |
271 | 0 | const char *input = text; |
272 | 0 | char *output = text; |
273 | 0 | while (*input) { |
274 | 0 | char ch = *input++; |
275 | |
|
276 | 0 | if (ch == '\\') { |
277 | 0 | ch = *input++; |
278 | |
|
279 | 0 | switch (ch) { |
280 | 0 | case '\"': |
281 | 0 | case '\\': |
282 | 0 | case '/': |
283 | 0 | *output++ = ch; |
284 | 0 | break; |
285 | | |
286 | 0 | case 'b': |
287 | 0 | *output++ = '\b'; |
288 | 0 | break; |
289 | 0 | case 'f': |
290 | 0 | *output++ = '\f'; |
291 | 0 | break; |
292 | 0 | case 'n': |
293 | 0 | *output++ = '\n'; |
294 | 0 | break; |
295 | 0 | case 'r': |
296 | 0 | *output++ = '\r'; |
297 | 0 | break; |
298 | 0 | case 't': |
299 | 0 | *output++ = '\t'; |
300 | 0 | break; |
301 | | |
302 | 0 | case 'u': |
303 | 0 | { |
304 | 0 | uint32_t unicode_hex = 0; |
305 | 0 | int k; |
306 | 0 | int bin; |
307 | |
|
308 | 0 | for (k = 0; k < 4; k++) { |
309 | 0 | unicode_hex <<= 4; |
310 | |
|
311 | 0 | ch = *input++; |
312 | 0 | bin = ws_xton(ch); |
313 | 0 | if (bin == -1) |
314 | 0 | return false; |
315 | 0 | unicode_hex |= bin; |
316 | 0 | } |
317 | | |
318 | 0 | if ((IS_LEAD_SURROGATE(unicode_hex))) { |
319 | 0 | uint16_t lead_surrogate = unicode_hex; |
320 | 0 | uint16_t trail_surrogate = 0; |
321 | |
|
322 | 0 | if (input[0] != '\\' || input[1] != 'u') |
323 | 0 | return false; |
324 | 0 | input += 2; |
325 | |
|
326 | 0 | for (k = 0; k < 4; k++) { |
327 | 0 | trail_surrogate <<= 4; |
328 | |
|
329 | 0 | ch = *input++; |
330 | 0 | bin = ws_xton(ch); |
331 | 0 | if (bin == -1) |
332 | 0 | return false; |
333 | 0 | trail_surrogate |= bin; |
334 | 0 | } |
335 | | |
336 | 0 | if ((!IS_TRAIL_SURROGATE(trail_surrogate))) |
337 | 0 | return false; |
338 | | |
339 | 0 | unicode_hex = SURROGATE_VALUE(lead_surrogate,trail_surrogate); |
340 | |
|
341 | 0 | } else if ((IS_TRAIL_SURROGATE(unicode_hex))) { |
342 | 0 | return false; |
343 | 0 | } |
344 | | |
345 | 0 | if (!g_unichar_validate(unicode_hex)) |
346 | 0 | return false; |
347 | | |
348 | | /* Don't allow NUL byte injection. */ |
349 | 0 | if (unicode_hex == 0) |
350 | 0 | return false; |
351 | | |
352 | | /* \uXXXX => 6 bytes, and g_unichar_to_utf8() requires to have output buffer at least 6 bytes -> OK. */ |
353 | 0 | k = g_unichar_to_utf8(unicode_hex, output); |
354 | 0 | output += k; |
355 | 0 | break; |
356 | 0 | } |
357 | | |
358 | 0 | default: |
359 | 0 | return false; |
360 | 0 | } |
361 | |
|
362 | 0 | } else { |
363 | 0 | *output = ch; |
364 | 0 | output++; |
365 | 0 | } |
366 | 0 | } |
367 | | |
368 | 0 | *output = '\0'; |
369 | | return true; |
370 | 0 | } |
371 | | |
372 | | /* |
373 | | * Editor modelines - https://www.wireshark.org/tools/modelines.html |
374 | | * |
375 | | * Local variables: |
376 | | * c-basic-offset: 4 |
377 | | * tab-width: 8 |
378 | | * indent-tabs-mode: nil |
379 | | * End: |
380 | | * |
381 | | * vi: set shiftwidth=4 tabstop=8 expandtab: |
382 | | * :indentSize=4:tabSize=8:noTabs=true: |
383 | | */ |