/src/wireshark/wsutil/wsjson.c

Source
/* wsjson.c
 * JSON parsing functions.
 *
 * Copyright 2016, Dario Lombardo
 *
 * Wireshark - Network traffic analyzer
 * By Gerald Combs <gerald@wireshark.org>
 * Copyright 1998 Gerald Combs
 *
 * SPDX-License-Identifier: GPL-2.0-or-later
 */

#include "config.h"
#define WS_LOG_DOMAIN LOG_DOMAIN_MAIN

#include "wsjson.h"

#include <string.h>
#include <errno.h>
#include <wsutil/jsmn.h>
#include <wsutil/str_util.h>
#include <wsutil/unicode-utils.h>
#include <wsutil/wslog.h>

bool
json_validate(const uint8_t *buf, const size_t len)
{
    bool ret = true;
    /* We expect no more than 1024 tokens */
    unsigned max_tokens = 1024;
    jsmntok_t* t;
    jsmn_parser p;
    int rcode;

    /*
     * Make sure the buffer isn't empty and the first octet isn't a NUL;
     * otherwise, the parser will immediately stop parsing and not validate
     * anything after that, so it'll just think it was handed an empty string.
     *
     * XXX - should we check for NULs anywhere in the buffer?
     */
    if (len == 0) {
        ws_debug("JSON string is empty");
        return false;
    }
    if (buf[0] == '\0') {
        ws_debug("invalid character inside JSON string");
        return false;
    }

    /*
     * XXX - We create the token array and have jsmn_parse fill it in, only
     * to free it. It might make more sense to pass in NULL for tokens, and
     * for our sanity check just check that len isn't too big.
     */
    t = g_new0(jsmntok_t, max_tokens);

    if (!t)
        return false;

    jsmn_init(&p);
    rcode = jsmn_parse(&p, (const char*)buf, len, t, max_tokens);
    if (rcode < 0) {
        switch (rcode) {
            case JSMN_ERROR_NOMEM:
                ws_debug("not enough tokens were provided");
                break;
            case JSMN_ERROR_INVAL:
                ws_debug("invalid character inside JSON string");
                break;
            case JSMN_ERROR_PART:
                ws_debug("the string is not a full JSON packet, "
                    "more bytes expected");
                break;
            default:
                ws_debug("unexpected error");
                break;
        }
        ret = false;
    }

    g_free(t);
    return ret;
}

int
json_parse(const char *buf, jsmntok_t *tokens, unsigned int max_tokens)
{
    jsmn_parser p;

    jsmn_init(&p);
    return jsmn_parse(&p, buf, strlen(buf), tokens, max_tokens);
}

int
json_parse_len(const char *buf, size_t len, jsmntok_t *tokens, unsigned int max_tokens)
{
    jsmn_parser p;

    jsmn_init(&p);
    return jsmn_parse(&p, buf, len, tokens, max_tokens);
}

jsmntok_t *json_get_next_object(jsmntok_t *cur)
{
    int i;
    jsmntok_t *next = cur+1;

    for (i = 0; i < cur->size; i++) {
        next = json_get_next_object(next);
    }
    return next;
}

jsmntok_t *json_get_object(const char *buf, jsmntok_t *parent, const char *name)
{
    int i;
    jsmntok_t *cur = parent+1;

    for (i = 0; i < parent->size; i++) {
        if (cur->type == JSMN_STRING &&
            !strncmp(&buf[cur->start], name, cur->end - cur->start)
            && strlen(name) == (size_t)(cur->end - cur->start) &&
            cur->size == 1 && (cur+1)->type == JSMN_OBJECT) {
            return cur+1;
        }
        cur = json_get_next_object(cur);
    }
    return NULL;
}

jsmntok_t *json_get_array(const char *buf, jsmntok_t *parent, const char *name)
{
    int i;
    jsmntok_t *cur = parent+1;

    for (i = 0; i < parent->size; i++) {
        if (cur->type == JSMN_STRING &&
            !strncmp(&buf[cur->start], name, cur->end - cur->start)
            && strlen(name) == (size_t)(cur->end - cur->start) &&
            cur->size == 1 && (cur+1)->type == JSMN_ARRAY) {
            return cur+1;
        }
        cur = json_get_next_object(cur);
    }
    return NULL;
}

int json_get_array_len(jsmntok_t *array)
{
    if (array->type != JSMN_ARRAY)
        return -1;
    return array->size;
}

jsmntok_t *json_get_array_index(jsmntok_t *array, int idx)
{
    int i;
    jsmntok_t *cur = array+1;


    if (array->type != JSMN_ARRAY || idx < 0 || idx >= array->size)
        return NULL;
    for (i = 0; i < idx; i++)
        cur = json_get_next_object(cur);
    return cur;
}

char *json_get_string(char *buf, jsmntok_t *parent, const char *name)
{
    int i;
    jsmntok_t *cur = parent+1;

    for (i = 0; i < parent->size; i++) {
        if (cur->type == JSMN_STRING &&
            !strncmp(&buf[cur->start], name, cur->end - cur->start)
            && strlen(name) == (size_t)(cur->end - cur->start) &&
            cur->size == 1 && (cur+1)->type == JSMN_STRING) {
            buf[(cur+1)->end] = '\0';
            if (!json_decode_string_inplace(&buf[(cur+1)->start]))
                return NULL;
            return &buf[(cur+1)->start];
        }
        cur = json_get_next_object(cur);
    }
    return NULL;
}

bool json_get_double(char *buf, jsmntok_t *parent, const char *name, double *val)
{
    int i;
    jsmntok_t *cur = parent+1;

    for (i = 0; i < parent->size; i++) {
        if (cur->type == JSMN_STRING &&
            !strncmp(&buf[cur->start], name, cur->end - cur->start)
            && strlen(name) == (size_t)(cur->end - cur->start) &&
            cur->size == 1 && (cur+1)->type == JSMN_PRIMITIVE) {
            buf[(cur+1)->end] = '\0';
            *val = g_ascii_strtod(&buf[(cur+1)->start], NULL);
            if (errno != 0)
                return false;
            return true;
        }
        cur = json_get_next_object(cur);
    }
    return false;
}

bool json_get_int(char *buf, jsmntok_t *parent, const char *name, int64_t *val)
{
    int i;
    jsmntok_t *cur = parent+1;

    for (i = 0; i < parent->size; i++) {
        if (cur->type == JSMN_STRING &&
            !strncmp(&buf[cur->start], name, cur->end - cur->start)
            && strlen(name) == (size_t)(cur->end - cur->start) &&
            cur->size == 1 && (cur+1)->type == JSMN_PRIMITIVE) {
            buf[(cur+1)->end] = '\0';
            *val = g_ascii_strtoll(&buf[(cur+1)->start], NULL, 10);
            if (errno != 0)
                return false;
            return true;
        }
        cur = json_get_next_object(cur);
    }
    return false;
}

bool json_get_boolean(char *buf, jsmntok_t *parent, const char *name, bool *val)
{
    int i;
    size_t tok_len;
    jsmntok_t *cur = parent+1;

    for (i = 0; i < parent->size; i++) {
        if (cur->type == JSMN_STRING &&
            !strncmp(&buf[cur->start], name, cur->end - cur->start)
            && strlen(name) == (size_t)(cur->end - cur->start) &&
            cur->size == 1 && (cur+1)->type == JSMN_PRIMITIVE) {
            /* JSMN_STRICT guarantees that a primitive starts with the
             * correct character.
             */
            tok_len = (cur+1)->end - (cur+1)->start;
            switch (buf[(cur+1)->start]) {
            case 't':
                if (tok_len == 4 && strncmp(&buf[(cur+1)->start], "true", tok_len) == 0) {
                    *val = true;
                    return true;
                }
                return false;
            case 'f':
                if (tok_len == 5 && strncmp(&buf[(cur+1)->start], "false", tok_len) == 0) {
                    *val = false;
                    return true;
                }
                return false;
            default:
                return false;
            }
        }
        cur = json_get_next_object(cur);
    }
    return false;
}

bool
json_decode_string_inplace(char *text)
{
    const char *input = text;
    char *output = text;
    while (*input) {
        char ch = *input++;

        if (ch == '\\') {
            ch = *input++;

            switch (ch) {
                case '\"':
                case '\\':
                case '/':
                    *output++ = ch;
                    break;

                case 'b':
                    *output++ = '\b';
                    break;
                case 'f':
                    *output++ = '\f';
                    break;
                case 'n':
                    *output++ = '\n';
                    break;
                case 'r':
                    *output++ = '\r';
                    break;
                case 't':
                    *output++ = '\t';
                    break;

                case 'u':
                {
                    uint32_t unicode_hex = 0;
                    int k;
                    int bin;

                    for (k = 0; k < 4; k++) {
                        unicode_hex <<= 4;

                        ch = *input++;
                        bin = ws_xton(ch);
                        if (bin == -1)
                            return false;
                        unicode_hex |= bin;
                    }

                    if ((IS_LEAD_SURROGATE(unicode_hex))) {
                        uint16_t lead_surrogate = unicode_hex;
                        uint16_t trail_surrogate = 0;

                        if (input[0] != '\\' || input[1] != 'u')
                            return false;
                        input += 2;

                        for (k = 0; k < 4; k++) {
                            trail_surrogate <<= 4;

                            ch = *input++;
                            bin = ws_xton(ch);
                            if (bin == -1)
                                return false;
                            trail_surrogate |= bin;
                        }

                        if ((!IS_TRAIL_SURROGATE(trail_surrogate)))
                            return false;

                        unicode_hex = SURROGATE_VALUE(lead_surrogate,trail_surrogate);

                    } else if ((IS_TRAIL_SURROGATE(unicode_hex))) {
                        return false;
                    }

                    if (!g_unichar_validate(unicode_hex))
                        return false;

                    /* Don't allow NUL byte injection. */
                    if (unicode_hex == 0)
                        return false;

                    /* \uXXXX => 6 bytes, and g_unichar_to_utf8() requires to have output buffer at least 6 bytes -> OK. */
                    k = g_unichar_to_utf8(unicode_hex, output);
                    output += k;
                    break;
                }

                default:
                    return false;
            }

        } else {
            *output = ch;
            output++;
        }
    }

    *output = '\0';
    return true;
}

/*
 * Editor modelines  -  https://www.wireshark.org/tools/modelines.html
 *
 * Local variables:
 * c-basic-offset: 4
 * tab-width: 8
 * indent-tabs-mode: nil
 * End:
 *
 * vi: set shiftwidth=4 tabstop=8 expandtab:
 * :indentSize=4:tabSize=8:noTabs=true:
 */

Coverage Report

Created: 2026-06-30 07:22

Line	Count	Source
1		/* wsjson.c
2		* JSON parsing functions.
3		*
4		* Copyright 2016, Dario Lombardo
5		*
6		* Wireshark - Network traffic analyzer
7		* By Gerald Combs <gerald@wireshark.org>
8		* Copyright 1998 Gerald Combs
9		*
10		* SPDX-License-Identifier: GPL-2.0-or-later
11		*/
12
13		#include "config.h"
14	0	#define WS_LOG_DOMAIN LOG_DOMAIN_MAIN
15
16		#include "wsjson.h"
17
18		#include <string.h>
19		#include <errno.h>
20		#include <wsutil/jsmn.h>
21		#include <wsutil/str_util.h>
22		#include <wsutil/unicode-utils.h>
23		#include <wsutil/wslog.h>
24
25		bool
26		json_validate(const uint8_t *buf, const size_t len)
27	0	{
28	0	bool ret = true;
29		/* We expect no more than 1024 tokens */
30	0	unsigned max_tokens = 1024;
31	0	jsmntok_t* t;
32	0	jsmn_parser p;
33	0	int rcode;
34
35		/*
36		* Make sure the buffer isn't empty and the first octet isn't a NUL;
37		* otherwise, the parser will immediately stop parsing and not validate
38		* anything after that, so it'll just think it was handed an empty string.
39		*
40		* XXX - should we check for NULs anywhere in the buffer?
41		*/
42	0	if (len == 0) {
43	0	ws_debug("JSON string is empty");
44	0	return false;
45	0	}
46	0	if (buf[0] == '\0') {
47	0	ws_debug("invalid character inside JSON string");
48	0	return false;
49	0	}
50
51		/*
52		* XXX - We create the token array and have jsmn_parse fill it in, only
53		* to free it. It might make more sense to pass in NULL for tokens, and
54		* for our sanity check just check that len isn't too big.
55		*/
56	0	t = g_new0(jsmntok_t, max_tokens);
57
58	0	if (!t)
59	0	return false;
60
61	0	jsmn_init(&p);
62	0	rcode = jsmn_parse(&p, (const char*)buf, len, t, max_tokens);
63	0	if (rcode < 0) {
64	0	switch (rcode) {
65	0	case JSMN_ERROR_NOMEM:
66	0	ws_debug("not enough tokens were provided");
67	0	break;
68	0	case JSMN_ERROR_INVAL:
69	0	ws_debug("invalid character inside JSON string");
70	0	break;
71	0	case JSMN_ERROR_PART:
72	0	ws_debug("the string is not a full JSON packet, "
73	0	"more bytes expected");
74	0	break;
75	0	default:
76	0	ws_debug("unexpected error");
77	0	break;
78	0	}
79	0	ret = false;
80	0	}
81
82	0	g_free(t);
83	0	return ret;
84	0	}
85
86		int
87		json_parse(const char buf, jsmntok_t tokens, unsigned int max_tokens)
88	0	{
89	0	jsmn_parser p;
90
91	0	jsmn_init(&p);
92	0	return jsmn_parse(&p, buf, strlen(buf), tokens, max_tokens);
93	0	}
94
95		int
96		json_parse_len(const char buf, size_t len, jsmntok_t tokens, unsigned int max_tokens)
97	0	{
98	0	jsmn_parser p;
99
100	0	jsmn_init(&p);
101	0	return jsmn_parse(&p, buf, len, tokens, max_tokens);
102	0	}
103
104		jsmntok_t json_get_next_object(jsmntok_t cur)
105	0	{
106	0	int i;
107	0	jsmntok_t *next = cur+1;
108
109	0	for (i = 0; i < cur->size; i++) {
110	0	next = json_get_next_object(next);
111	0	}
112	0	return next;
113	0	}
114
115		jsmntok_t json_get_object(const char buf, jsmntok_t parent, const char name)
116	0	{
117	0	int i;
118	0	jsmntok_t *cur = parent+1;
119
120	0	for (i = 0; i < parent->size; i++) {
121	0	if (cur->type == JSMN_STRING &&
122	0	!strncmp(&buf[cur->start], name, cur->end - cur->start)
123	0	&& strlen(name) == (size_t)(cur->end - cur->start) &&
124	0	cur->size == 1 && (cur+1)->type == JSMN_OBJECT) {
125	0	return cur+1;
126	0	}
127	0	cur = json_get_next_object(cur);
128	0	}
129	0	return NULL;
130	0	}
131
132		jsmntok_t json_get_array(const char buf, jsmntok_t parent, const char name)
133	0	{
134	0	int i;
135	0	jsmntok_t *cur = parent+1;
136
137	0	for (i = 0; i < parent->size; i++) {
138	0	if (cur->type == JSMN_STRING &&
139	0	!strncmp(&buf[cur->start], name, cur->end - cur->start)
140	0	&& strlen(name) == (size_t)(cur->end - cur->start) &&
141	0	cur->size == 1 && (cur+1)->type == JSMN_ARRAY) {
142	0	return cur+1;
143	0	}
144	0	cur = json_get_next_object(cur);
145	0	}
146	0	return NULL;
147	0	}
148
149		int json_get_array_len(jsmntok_t *array)
150	0	{
151	0	if (array->type != JSMN_ARRAY)
152	0	return -1;
153	0	return array->size;
154	0	}
155
156		jsmntok_t json_get_array_index(jsmntok_t array, int idx)
157	0	{
158	0	int i;
159	0	jsmntok_t *cur = array+1;
160
161
162	0	if (array->type != JSMN_ARRAY \|\| idx < 0 \|\| idx >= array->size)
163	0	return NULL;
164	0	for (i = 0; i < idx; i++)
165	0	cur = json_get_next_object(cur);
166	0	return cur;
167	0	}
168
169		char json_get_string(char buf, jsmntok_t parent, const char name)
170	0	{
171	0	int i;
172	0	jsmntok_t *cur = parent+1;
173
174	0	for (i = 0; i < parent->size; i++) {
175	0	if (cur->type == JSMN_STRING &&
176	0	!strncmp(&buf[cur->start], name, cur->end - cur->start)
177	0	&& strlen(name) == (size_t)(cur->end - cur->start) &&
178	0	cur->size == 1 && (cur+1)->type == JSMN_STRING) {
179	0	buf[(cur+1)->end] = '\0';
180	0	if (!json_decode_string_inplace(&buf[(cur+1)->start]))
181	0	return NULL;
182	0	return &buf[(cur+1)->start];
183	0	}
184	0	cur = json_get_next_object(cur);
185	0	}
186	0	return NULL;
187	0	}
188
189		bool json_get_double(char buf, jsmntok_t parent, const char name, double val)
190	0	{
191	0	int i;
192	0	jsmntok_t *cur = parent+1;
193
194	0	for (i = 0; i < parent->size; i++) {
195	0	if (cur->type == JSMN_STRING &&
196	0	!strncmp(&buf[cur->start], name, cur->end - cur->start)
197	0	&& strlen(name) == (size_t)(cur->end - cur->start) &&
198	0	cur->size == 1 && (cur+1)->type == JSMN_PRIMITIVE) {
199	0	buf[(cur+1)->end] = '\0';
200	0	*val = g_ascii_strtod(&buf[(cur+1)->start], NULL);
201	0	if (errno != 0)
202	0	return false;
203	0	return true;
204	0	}
205	0	cur = json_get_next_object(cur);
206	0	}
207	0	return false;
208	0	}
209
210		bool json_get_int(char buf, jsmntok_t parent, const char name, int64_t val)
211	0	{
212	0	int i;
213	0	jsmntok_t *cur = parent+1;
214
215	0	for (i = 0; i < parent->size; i++) {
216	0	if (cur->type == JSMN_STRING &&
217	0	!strncmp(&buf[cur->start], name, cur->end - cur->start)
218	0	&& strlen(name) == (size_t)(cur->end - cur->start) &&
219	0	cur->size == 1 && (cur+1)->type == JSMN_PRIMITIVE) {
220	0	buf[(cur+1)->end] = '\0';
221	0	*val = g_ascii_strtoll(&buf[(cur+1)->start], NULL, 10);
222	0	if (errno != 0)
223	0	return false;
224	0	return true;
225	0	}
226	0	cur = json_get_next_object(cur);
227	0	}
228	0	return false;
229	0	}
230
231		bool json_get_boolean(char buf, jsmntok_t parent, const char name, bool val)
232	0	{
233	0	int i;
234	0	size_t tok_len;
235	0	jsmntok_t *cur = parent+1;
236
237	0	for (i = 0; i < parent->size; i++) {
238	0	if (cur->type == JSMN_STRING &&
239	0	!strncmp(&buf[cur->start], name, cur->end - cur->start)
240	0	&& strlen(name) == (size_t)(cur->end - cur->start) &&
241	0	cur->size == 1 && (cur+1)->type == JSMN_PRIMITIVE) {
242		/* JSMN_STRICT guarantees that a primitive starts with the
243		* correct character.
244		*/
245	0	tok_len = (cur+1)->end - (cur+1)->start;
246	0	switch (buf[(cur+1)->start]) {
247	0	case 't':
248	0	if (tok_len == 4 && strncmp(&buf[(cur+1)->start], "true", tok_len) == 0) {
249	0	*val = true;
250	0	return true;
251	0	}
252	0	return false;
253	0	case 'f':
254	0	if (tok_len == 5 && strncmp(&buf[(cur+1)->start], "false", tok_len) == 0) {
255	0	*val = false;
256	0	return true;
257	0	}
258	0	return false;
259	0	default:
260	0	return false;
261	0	}
262	0	}
263	0	cur = json_get_next_object(cur);
264	0	}
265	0	return false;
266	0	}
267
268		bool
269		json_decode_string_inplace(char *text)
270	0	{
271	0	const char *input = text;
272	0	char *output = text;
273	0	while (*input) {
274	0	char ch = *input++;
275
276	0	if (ch == '\\') {
277	0	ch = *input++;
278
279	0	switch (ch) {
280	0	case '\"':
281	0	case '\\':
282	0	case '/':
283	0	*output++ = ch;
284	0	break;
285
286	0	case 'b':
287	0	*output++ = '\b';
288	0	break;
289	0	case 'f':
290	0	*output++ = '\f';
291	0	break;
292	0	case 'n':
293	0	*output++ = '\n';
294	0	break;
295	0	case 'r':
296	0	*output++ = '\r';
297	0	break;
298	0	case 't':
299	0	*output++ = '\t';
300	0	break;
301
302	0	case 'u':
303	0	{
304	0	uint32_t unicode_hex = 0;
305	0	int k;
306	0	int bin;
307
308	0	for (k = 0; k < 4; k++) {
309	0	unicode_hex <<= 4;
310
311	0	ch = *input++;
312	0	bin = ws_xton(ch);
313	0	if (bin == -1)
314	0	return false;
315	0	unicode_hex \|= bin;
316	0	}
317
318	0	if ((IS_LEAD_SURROGATE(unicode_hex))) {
319	0	uint16_t lead_surrogate = unicode_hex;
320	0	uint16_t trail_surrogate = 0;
321
322	0	if (input[0] != '\\' \|\| input[1] != 'u')
323	0	return false;
324	0	input += 2;
325
326	0	for (k = 0; k < 4; k++) {
327	0	trail_surrogate <<= 4;
328
329	0	ch = *input++;
330	0	bin = ws_xton(ch);
331	0	if (bin == -1)
332	0	return false;
333	0	trail_surrogate \|= bin;
334	0	}
335
336	0	if ((!IS_TRAIL_SURROGATE(trail_surrogate)))
337	0	return false;
338
339	0	unicode_hex = SURROGATE_VALUE(lead_surrogate,trail_surrogate);
340
341	0	} else if ((IS_TRAIL_SURROGATE(unicode_hex))) {
342	0	return false;
343	0	}
344
345	0	if (!g_unichar_validate(unicode_hex))
346	0	return false;
347
348		/* Don't allow NUL byte injection. */
349	0	if (unicode_hex == 0)
350	0	return false;
351
352		/* \uXXXX => 6 bytes, and g_unichar_to_utf8() requires to have output buffer at least 6 bytes -> OK. */
353	0	k = g_unichar_to_utf8(unicode_hex, output);
354	0	output += k;
355	0	break;
356	0	}
357
358	0	default:
359	0	return false;
360	0	}
361
362	0	} else {
363	0	*output = ch;
364	0	output++;
365	0	}
366	0	}
367
368	0	*output = '\0';
369		return true;
370	0	}
371
372		/*
373		* Editor modelines - https://www.wireshark.org/tools/modelines.html
374		*
375		* Local variables:
376		* c-basic-offset: 4
377		* tab-width: 8
378		* indent-tabs-mode: nil
379		* End:
380		*
381		* vi: set shiftwidth=4 tabstop=8 expandtab:
382		* :indentSize=4:tabSize=8:noTabs=true:
383		*/