Coverage Report

Created: 2025-06-13 07:09

/src/server/strings/json_lib.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (c) 2016, 2022, MariaDB Corporation.
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
15
16
#include <my_global.h>
17
#include <string.h>
18
#include <m_ctype.h>
19
#include "json_lib.h"
20
21
/*
22
  JSON escaping lets user specify UTF16 codes of characters.
23
  So we're going to need the UTF16 charset capabilities. Let's import
24
  them from the utf16 charset.
25
*/
26
int my_utf16_uni(CHARSET_INFO *cs,
27
                 my_wc_t *pwc, const uchar *s, const uchar *e);
28
int my_uni_utf16(CHARSET_INFO *cs, my_wc_t wc, uchar *s, uchar *e);
29
30
31
void json_string_set_str(json_string_t *s,
32
                         const uchar *str, const uchar *end)
33
19.2k
{
34
19.2k
  s->c_str= str;
35
19.2k
  s->str_end= end;
36
19.2k
}
37
38
39
void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs)
40
8.70k
{
41
8.70k
  s->cs= i_cs;
42
8.70k
  s->error= 0;
43
8.70k
  s->wc= i_cs->cset->mb_wc;
44
8.70k
}
45
46
47
static void json_string_setup(json_string_t *s,
48
                              CHARSET_INFO *i_cs, const uchar *str,
49
                              const uchar *end)
50
4.35k
{
51
4.35k
  json_string_set_cs(s, i_cs);
52
4.35k
  json_string_set_str(s, str, end);
53
4.35k
}
54
55
56
enum json_char_classes {
57
  C_EOS,    /* end of string */
58
  C_LCURB,  /* {  */
59
  C_RCURB,  /* } */
60
  C_LSQRB,  /* [ */
61
  C_RSQRB,  /* ] */
62
  C_COLON,  /* : */
63
  C_COMMA,  /* , */
64
  C_QUOTE,  /* " */
65
  C_DIGIT,  /* -0123456789 */
66
  C_LOW_F,  /* 'f' (for "false") */
67
  C_LOW_N,  /* 'n' (for "null") */
68
  C_LOW_T,  /* 't' (for "true") */
69
  C_ETC,    /* everything else */
70
  C_ERR,    /* character disallowed in JSON */
71
  C_BAD,    /* invalid character, charset handler cannot read it */
72
  NR_C_CLASSES, /* Counter for classes that handled with functions. */
73
  C_SPACE   /* space. Doesn't need specific handlers, so after the counter.*/
74
};
75
76
77
/*
78
  This array maps first 128 Unicode Code Points into classes.
79
  The remaining Unicode characters should be mapped to C_ETC.
80
*/
81
82
static enum json_char_classes json_chr_map[128] = {
83
  C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
84
  C_ERR,   C_SPACE, C_SPACE, C_ERR,   C_ERR,   C_SPACE, C_ERR,   C_ERR,
85
  C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
86
  C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
87
88
  C_SPACE, C_ETC,   C_QUOTE, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
89
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_COMMA, C_DIGIT, C_ETC,   C_ETC,
90
  C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
91
  C_DIGIT, C_DIGIT, C_COLON, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
92
93
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
94
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
95
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
96
  C_ETC,   C_ETC,   C_ETC,   C_LSQRB, C_ETC,   C_RSQRB, C_ETC,   C_ETC,
97
98
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_F, C_ETC,
99
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_N, C_ETC,
100
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_T, C_ETC,   C_ETC,   C_ETC,
101
  C_ETC,   C_ETC,   C_ETC,   C_LCURB, C_ETC,   C_RCURB, C_ETC,   C_ETC
102
};
103
104
105
/*
106
  JSON parser actually has more states than the 'enum json_states'
107
  declares. But the rest of the states aren't seen to the user so let's
108
  specify them here to avoid confusion.
109
*/
110
111
enum json_all_states {
112
  JST_DONE= NR_JSON_USER_STATES,         /* ok to finish     */
113
  JST_OBJ_CONT= NR_JSON_USER_STATES+1,   /* object continues */
114
  JST_ARRAY_CONT= NR_JSON_USER_STATES+2, /* array continues  */
115
  JST_READ_VALUE= NR_JSON_USER_STATES+3, /* value is being read */
116
  NR_JSON_STATES= NR_JSON_USER_STATES+4
117
};
118
119
120
typedef int (*json_state_handler)(json_engine_t *);
121
122
123
/* The string is broken. */
124
static int unexpected_eos(json_engine_t *j)
125
535
{
126
535
  j->s.error= JE_EOS;
127
535
  return 1;
128
535
}
129
130
131
/* This symbol here breaks the JSON syntax. */
132
static int syntax_error(json_engine_t *j)
133
466
{
134
466
  j->s.error= JE_SYN;
135
466
  return 1;
136
466
}
137
138
139
/* Value of object. */
140
static int mark_object(json_engine_t *j)
141
2.22k
{
142
2.22k
  j->state= JST_OBJ_START;
143
2.22k
  if (++j->stack_p < JSON_DEPTH_LIMIT)
144
2.21k
  {
145
2.21k
    j->stack[j->stack_p]= JST_OBJ_CONT;
146
2.21k
    return 0;
147
2.21k
  }
148
6
  j->s.error= JE_DEPTH;
149
6
  return 1;
150
2.22k
}
151
152
153
/* Read value of object. */
154
static int read_obj(json_engine_t *j)
155
4.35k
{
156
4.35k
  j->state= JST_OBJ_START;
157
4.35k
  j->value_type= JSON_VALUE_OBJECT;
158
4.35k
  j->value= j->value_begin;
159
4.35k
  if (++j->stack_p < JSON_DEPTH_LIMIT)
160
4.35k
  {
161
4.35k
    j->stack[j->stack_p]= JST_OBJ_CONT;
162
4.35k
    return 0;
163
4.35k
  }
164
0
  j->s.error= JE_DEPTH;
165
0
  return 1;
166
4.35k
}
167
168
169
/* Value of array. */
170
static int mark_array(json_engine_t *j)
171
1.80k
{
172
1.80k
  j->state= JST_ARRAY_START;
173
1.80k
  if (++j->stack_p < JSON_DEPTH_LIMIT)
174
1.79k
  {
175
1.79k
    j->stack[j->stack_p]= JST_ARRAY_CONT;
176
1.79k
    j->value= j->value_begin;
177
1.79k
    return 0;
178
1.79k
  }
179
6
  j->s.error= JE_DEPTH;
180
6
  return 1;
181
1.80k
}
182
183
/* Read value of object. */
184
static int read_array(json_engine_t *j)
185
1.56k
{
186
1.56k
  j->state= JST_ARRAY_START;
187
1.56k
  j->value_type= JSON_VALUE_ARRAY;
188
1.56k
  j->value= j->value_begin;
189
1.56k
  if (++j->stack_p < JSON_DEPTH_LIMIT)
190
1.56k
  {
191
1.56k
    j->stack[j->stack_p]= JST_ARRAY_CONT;
192
1.56k
    return 0;
193
1.56k
  }
194
0
  j->s.error= JE_DEPTH;
195
0
  return 1;
196
1.56k
}
197
198
199
200
/*
201
  Character classes inside the JSON string constant.
202
  We mostly need this to parse escaping properly.
203
  Escapings available in JSON are:
204
  \" - quotation mark
205
  \\ - backslash
206
  \b - backspace UNICODE 8
207
  \f - formfeed UNICODE 12
208
  \n - newline UNICODE 10
209
  \r - carriage return UNICODE 13
210
  \t - horizontal tab UNICODE 9
211
  \u{four-hex-digits} - code in UCS16 character set
212
*/
213
enum json_string_char_classes {
214
  S_0= 0,
215
  S_1= 1,
216
  S_2= 2,
217
  S_3= 3,
218
  S_4= 4,
219
  S_5= 5,
220
  S_6= 6,
221
  S_7= 7,
222
  S_8= 8,
223
  S_9= 9,
224
  S_A= 10,
225
  S_B= 11,
226
  S_C= 12,
227
  S_D= 13,
228
  S_E= 14,
229
  S_F= 15,
230
  S_ETC= 36,    /* rest of characters. */
231
  S_QUOTE= 37,
232
  S_BKSL= 38, /* \ */
233
  S_ERR= 100,   /* disallowed */
234
};
235
236
237
/* This maps characters to their types inside a string constant. */
238
static enum json_string_char_classes json_instr_chr_map[128] = {
239
  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
240
  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
241
  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
242
  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
243
244
  S_ETC,   S_ETC,   S_QUOTE, S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
245
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
246
  S_0,     S_1,     S_2,     S_3,     S_4,     S_5,     S_6,     S_7,
247
  S_8,     S_9,     S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
248
249
  S_ETC,   S_A,     S_B,     S_C,     S_D,     S_E,     S_F,     S_ETC,
250
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
251
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
252
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_BKSL,  S_ETC,   S_ETC,   S_ETC,
253
254
  S_ETC,   S_A,     S_B,     S_C,     S_D,     S_E,     S_F,     S_ETC,
255
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
256
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
257
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC
258
};
259
260
261
static int read_4_hexdigits(json_string_t *s, uchar *dest)
262
9.71k
{
263
9.71k
  int i, t, c_len;
264
36.1k
  for (i=0; i<4; i++)
265
31.1k
  {
266
31.1k
    if ((c_len= json_next_char(s)) <= 0)
267
2.69k
      return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
268
269
28.4k
    if (s->c_next >= 128 || (t= json_instr_chr_map[s->c_next]) > S_F)
270
2.03k
      return s->error= JE_SYN;
271
272
26.4k
    s->c_str+= c_len;
273
26.4k
    dest[i/2]+= (i % 2) ? t : t*16;
274
26.4k
  }
275
4.98k
  return 0;
276
9.71k
}
277
278
279
static int json_handle_esc(json_string_t *s)
280
12.6k
{
281
12.6k
  int t, c_len;
282
  
283
12.6k
  if ((c_len= json_next_char(s)) <= 0)
284
604
    return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
285
286
12.0k
  s->c_str+= c_len;
287
12.0k
  switch (s->c_next)
288
12.0k
  {
289
266
    case 'b':
290
266
      s->c_next= 8;
291
266
      return 0;
292
208
    case 'f':
293
208
      s->c_next= 12;
294
208
      return 0;
295
598
    case 'n':
296
598
      s->c_next= 10;
297
598
      return 0;
298
206
    case 'r':
299
206
      s->c_next= 13;
300
206
      return 0;
301
400
    case 't':
302
400
      s->c_next= 9;
303
400
      return 0;
304
12.0k
  }
305
306
10.3k
  if (s->c_next < 128 && (t= json_instr_chr_map[s->c_next]) == S_ERR)
307
242
  {
308
242
    s->c_str-= c_len;
309
242
    return s->error= JE_ESCAPING;
310
242
  }
311
312
313
10.1k
  if (s->c_next != 'u')
314
2.08k
    return 0;
315
316
8.03k
  {
317
    /*
318
      Read the four-hex-digits code.
319
      If symbol is not in the Basic Multilingual Plane, we're reading
320
      the string for the next four digits to compose the UTF-16 surrogate pair.
321
    */
322
8.03k
    uchar code[4]= {0,0,0,0};
323
324
8.03k
    if (read_4_hexdigits(s, code))
325
4.41k
      return 1;
326
327
3.62k
    if ((c_len= my_utf16_uni(0, &s->c_next, code, code+2)) == 2)
328
266
      return 0;
329
330
3.36k
    if (c_len != MY_CS_TOOSMALL4)
331
198
      return s->error= JE_BAD_CHR;
332
333
3.16k
    if ((c_len= json_next_char(s)) <= 0)
334
518
      return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
335
2.64k
    if (s->c_next != '\\')
336
250
      return s->error= JE_SYN;
337
338
2.39k
    s->c_str+= c_len;
339
2.39k
    if ((c_len= json_next_char(s)) <= 0)
340
406
      return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
341
1.98k
    if (s->c_next != 'u')
342
312
      return s->error= JE_SYN;
343
1.67k
    s->c_str+= c_len;
344
345
1.67k
    if (read_4_hexdigits(s, code+2))
346
314
      return 1;
347
348
1.36k
    if ((c_len= my_utf16_uni(0, &s->c_next, code, code+4)) == 4)
349
1.15k
      return 0;
350
1.36k
  }
351
208
  return s->error= JE_BAD_CHR;
352
1.36k
}
353
354
355
int json_read_string_const_chr(json_string_t *js)
356
16.4k
{
357
16.4k
  int c_len;
358
359
16.4k
  if ((c_len= json_next_char(js)) > 0)
360
15.1k
  {
361
15.1k
    js->c_str+= c_len;
362
15.1k
    return (js->c_next == '\\') ? json_handle_esc(js) : 0;
363
15.1k
  }
364
1.35k
  js->error= json_eos(js) ? JE_EOS : JE_BAD_CHR; 
365
1.35k
  return 1;
366
16.4k
}
367
368
369
static int skip_str_constant(json_engine_t *j)
370
1.27k
{
371
1.27k
  int t, c_len;
372
1.27k
  for (;;)
373
4.42k
  {
374
4.42k
    if ((c_len= json_next_char(&j->s)) > 0)
375
4.29k
    {
376
4.29k
      j->s.c_str+= c_len;
377
4.29k
      if (j->s.c_next >= 128 || ((t=json_instr_chr_map[j->s.c_next]) <= S_ETC))
378
2.96k
        continue;
379
380
1.33k
      if (j->s.c_next == '"')
381
1.11k
        break;
382
222
      if (j->s.c_next == '\\')
383
206
      {
384
206
        j->value_escaped= 1;
385
206
        if (json_handle_esc(&j->s))
386
16
          return 1;
387
190
        continue;
388
206
      }
389
      /* Symbol not allowed in JSON. */
390
16
      return j->s.error= JE_NOT_JSON_CHR;
391
222
    }
392
128
    else
393
128
      return j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR; 
394
4.42k
  }
395
396
1.11k
  j->state= j->stack[j->stack_p];
397
1.11k
  return 0;
398
1.27k
}
399
400
401
/* Scalar string. */
402
static int v_string(json_engine_t *j)
403
862
{
404
862
  return skip_str_constant(j) || json_scan_next(j);
405
862
}
406
407
408
/* Read scalar string. */
409
static int read_strn(json_engine_t *j)
410
408
{
411
408
  j->value= j->s.c_str;
412
408
  j->value_type= JSON_VALUE_STRING;
413
408
  j->value_escaped= 0;
414
415
408
  if (skip_str_constant(j))
416
136
    return 1;
417
418
272
  j->state= j->stack[j->stack_p];
419
272
  j->value_len= (int)(j->s.c_str - j->value) - j->s.c_next_len;
420
272
  return 0;
421
408
}
422
423
424
/*
425
  We have dedicated parser for numeric constants. It's similar
426
  to the main JSON parser, we similarly define character classes,
427
  map characters to classes and implement the state-per-class
428
  table. Though we don't create functions that handle
429
  particular classes, just specify what new state should parser
430
  get in this case.
431
*/
432
enum json_num_char_classes {
433
  N_MINUS,
434
  N_PLUS,
435
  N_ZERO,
436
  N_DIGIT,
437
  N_POINT,
438
  N_E,
439
  N_END,
440
  N_EEND,
441
  N_ERR,
442
  N_NUM_CLASSES
443
};
444
445
446
static enum json_num_char_classes json_num_chr_map[128] = {
447
  N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
448
  N_ERR,   N_END,   N_END,   N_ERR,   N_ERR,   N_END,   N_ERR,   N_ERR,
449
  N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
450
  N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
451
452
  N_END,   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
453
  N_EEND,  N_EEND,  N_EEND,  N_PLUS,  N_END,   N_MINUS, N_POINT, N_EEND,
454
  N_ZERO,  N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT,
455
  N_DIGIT, N_DIGIT, N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
456
457
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_E,     N_EEND,  N_EEND,
458
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
459
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
460
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_END,   N_EEND,  N_EEND,
461
462
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_E,     N_EEND,  N_EEND,
463
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
464
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
465
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,   N_END,   N_EEND,  N_EEND,
466
};
467
468
469
enum json_num_states {
470
  NS_OK,  /* Number ended. */
471
  NS_GO,  /* Initial state. */
472
  NS_GO1, /* If the number starts with '-'. */
473
  NS_Z,   /* If the number starts with '0'. */
474
  NS_Z1,  /* If the numbers starts with '-0'. */
475
  NS_INT, /* Integer part. */
476
  NS_FRAC,/* Fractional part. */
477
  NS_EX,  /* Exponential part begins. */
478
  NS_EX1, /* Exponential part continues. */
479
  NS_NUM_STATES
480
};
481
482
483
static int json_num_states[NS_NUM_STATES][N_NUM_CLASSES]=
484
{
485
/*         -        +       0         1..9    POINT    E       END_OK   ERROR */
486
/*OK*/   { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
487
/*GO*/   { NS_GO1,  JE_SYN, NS_Z,     NS_INT, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
488
/*GO1*/  { JE_SYN,  JE_SYN, NS_Z1,    NS_INT, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
489
/*ZERO*/ { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, NS_FRAC, NS_EX,  NS_OK,  JE_BAD_CHR },
490
/*ZE1*/  { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, NS_FRAC, NS_EX,  NS_OK,  JE_BAD_CHR },
491
/*INT*/  { JE_SYN,  JE_SYN, NS_INT,   NS_INT, NS_FRAC, NS_EX,  NS_OK,  JE_BAD_CHR },
492
/*FRAC*/ { JE_SYN,  JE_SYN, NS_FRAC,  NS_FRAC,JE_SYN,  NS_EX,  NS_OK,  JE_BAD_CHR },
493
/*EX*/   { NS_EX,   NS_EX,  NS_EX1,   NS_EX1, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR }, 
494
/*EX1*/  { JE_SYN,  JE_SYN, NS_EX1,   NS_EX1, JE_SYN,  JE_SYN, NS_OK,  JE_BAD_CHR }
495
};
496
497
498
static uint json_num_state_flags[NS_NUM_STATES]=
499
{
500
/*OK*/   0,
501
/*GO*/   0,
502
/*GO1*/  JSON_NUM_NEG,
503
/*ZERO*/ 0,
504
/*ZE1*/  0,
505
/*INT*/  0,
506
/*FRAC*/ JSON_NUM_FRAC_PART,
507
/*EX*/   JSON_NUM_EXP,
508
/*EX1*/  0,
509
};
510
511
512
static int skip_num_constant(json_engine_t *j)
513
12.0k
{
514
12.0k
  int state= json_num_states[NS_GO][json_num_chr_map[j->s.c_next]];
515
12.0k
  int c_len;
516
517
12.0k
  j->num_flags= 0;
518
12.0k
  for (;;)
519
13.5k
  {
520
13.5k
    j->num_flags|= json_num_state_flags[state];
521
13.5k
    if ((c_len= json_next_char(&j->s)) > 0 && j->s.c_next < 128)
522
13.3k
    {
523
13.3k
      if ((state= json_num_states[state][json_num_chr_map[j->s.c_next]]) > 0)
524
1.46k
      {
525
1.46k
        j->s.c_str+= c_len;
526
1.46k
        continue;
527
1.46k
      }
528
11.9k
      break;
529
13.3k
    }
530
531
162
    if ((j->s.error=
532
162
          json_eos(&j->s) ? json_num_states[state][N_END] : JE_BAD_CHR) < 0)
533
76
      return 1;
534
86
    else
535
86
      break;
536
162
  }
537
538
12.0k
  j->state= j->stack[j->stack_p];
539
12.0k
  return 0;
540
12.0k
}
541
542
543
/* Scalar numeric. */
544
static int v_number(json_engine_t *j)
545
2.19k
{
546
2.19k
  return skip_num_constant(j) || json_scan_next(j);
547
2.19k
}
548
549
550
/* Read numeric constant. */
551
static int read_num(json_engine_t *j)
552
9.88k
{
553
9.88k
  j->value= j->value_begin;
554
9.88k
  if (skip_num_constant(j) == 0)
555
9.84k
  {
556
9.84k
    j->value_type= JSON_VALUE_NUMBER;
557
9.84k
    j->value_len= (int)(j->s.c_str - j->value_begin);
558
9.84k
    return 0;
559
9.84k
  }
560
40
  return 1;
561
9.88k
}
562
563
564
/* Check that the JSON string matches the argument and skip it. */
565
static int skip_string_verbatim(json_string_t *s, const char *str)
566
3.41k
{
567
3.41k
  int c_len;
568
12.4k
  while (*str)
569
10.0k
  {
570
10.0k
    if ((c_len= json_next_char(s)) > 0)
571
9.84k
    {
572
9.84k
      if (s->c_next == (my_wc_t) *(str++))
573
9.04k
      {
574
9.04k
        s->c_str+= c_len;
575
9.04k
        continue;
576
9.04k
      }
577
798
      return s->error= JE_SYN;
578
9.84k
    }
579
216
    return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR; 
580
10.0k
  }
581
582
2.40k
  return 0;
583
3.41k
}
584
585
586
/* Scalar false. */
587
static int v_false(json_engine_t *j)
588
662
{
589
662
  if (skip_string_verbatim(&j->s, "alse"))
590
226
   return 1;
591
436
  j->state= j->stack[j->stack_p];
592
436
  return json_scan_next(j);
593
662
}
594
595
596
/* Scalar null. */
597
static int v_null(json_engine_t *j)
598
802
{
599
802
  if (skip_string_verbatim(&j->s, "ull"))
600
176
   return 1;
601
626
  j->state= j->stack[j->stack_p];
602
626
  return json_scan_next(j);
603
802
}
604
605
606
/* Scalar true. */
607
static int v_true(json_engine_t *j)
608
524
{
609
524
  if (skip_string_verbatim(&j->s, "rue"))
610
182
   return 1;
611
342
  j->state= j->stack[j->stack_p];
612
342
  return json_scan_next(j);
613
524
}
614
615
616
/* Read false. */
617
static int read_false(json_engine_t *j)
618
472
{
619
472
  j->value_type= JSON_VALUE_FALSE;
620
472
  j->value= j->value_begin;
621
472
  j->state= j->stack[j->stack_p];
622
472
  j->value_len= 5;
623
472
  return skip_string_verbatim(&j->s, "alse");
624
472
}
625
626
627
/* Read null. */
628
static int read_null(json_engine_t *j)
629
474
{
630
474
  j->value_type= JSON_VALUE_NULL;
631
474
  j->value= j->value_begin;
632
474
  j->state= j->stack[j->stack_p];
633
474
  j->value_len= 4;
634
474
  return skip_string_verbatim(&j->s, "ull");
635
474
}
636
637
638
/* Read true. */
639
static int read_true(json_engine_t *j)
640
480
{
641
480
  j->value_type= JSON_VALUE_TRUE;
642
480
  j->value= j->value_begin;
643
480
  j->state= j->stack[j->stack_p];
644
480
  j->value_len= 4;
645
480
  return skip_string_verbatim(&j->s, "rue");
646
480
}
647
648
649
/* Disallowed character. */
650
static int not_json_chr(json_engine_t *j)
651
40
{
652
40
  j->s.error= JE_NOT_JSON_CHR;
653
40
  return 1;
654
40
}
655
656
657
/* Bad character. */
658
static int bad_chr(json_engine_t *j)
659
145
{
660
145
  j->s.error= JE_BAD_CHR;
661
145
  return 1;
662
145
}
663
664
665
/* Correct finish. */
666
static int done(json_engine_t *j  __attribute__((unused)))
667
0
{
668
0
  return 1;
669
0
}
670
671
672
/* End of the object. */
673
static int end_object(json_engine_t *j)
674
1.51k
{
675
1.51k
  j->stack_p--;
676
1.51k
  j->state= JST_OBJ_END;
677
1.51k
  return 0;
678
1.51k
}
679
680
681
/* End of the array. */
682
static int end_array(json_engine_t *j)
683
1.20k
{
684
1.20k
  j->stack_p--;
685
1.20k
  j->state= JST_ARRAY_END;
686
1.20k
  return 0;
687
1.20k
}
688
689
690
/* Start reading key name. */
691
static int read_keyname(json_engine_t *j)
692
5.59k
{
693
5.59k
  j->state= JST_KEY;
694
5.59k
  return 0;
695
5.59k
}
696
697
698
static void get_first_nonspace(json_string_t *js, int *t_next, int *c_len)
699
78.2k
{
700
78.2k
  do
701
81.8k
  {
702
81.8k
    if ((*c_len= json_next_char(js)) <= 0)
703
860
      *t_next= json_eos(js) ? C_EOS : C_BAD;
704
81.0k
    else
705
81.0k
    {
706
81.0k
      *t_next= (js->c_next < 128) ? json_chr_map[js->c_next] : C_ETC;
707
81.0k
      js->c_str+= *c_len;
708
81.0k
    }
709
81.8k
  } while (*t_next == C_SPACE);
710
78.2k
}
711
712
713
/* Next key name. */
714
static int next_key(json_engine_t *j)
715
13.0k
{
716
13.0k
  int t_next, c_len;
717
13.0k
  get_first_nonspace(&j->s, &t_next, &c_len);
718
719
13.0k
  if (t_next == C_QUOTE)
720
12.9k
  {
721
12.9k
    j->state= JST_KEY;
722
12.9k
    return 0;
723
12.9k
  }
724
725
102
  j->s.error= (t_next == C_EOS)  ? JE_EOS :
726
102
              ((t_next == C_BAD) ? JE_BAD_CHR :
727
58
                                   JE_SYN);
728
102
  return 1;
729
13.0k
}
730
731
732
/* Forward declarations. */
733
static int skip_colon(json_engine_t *j);
734
static int skip_key(json_engine_t *j);
735
static int struct_end_cb(json_engine_t *j);
736
static int struct_end_qb(json_engine_t *j);
737
static int struct_end_cm(json_engine_t *j);
738
static int struct_end_eos(json_engine_t *j);
739
740
741
static int next_item(json_engine_t *j)
742
2.92k
{
743
2.92k
  j->state= JST_VALUE;
744
2.92k
  return 0;
745
2.92k
}
746
747
748
static int array_item(json_engine_t *j)
749
2.88k
{
750
2.88k
  j->state= JST_VALUE;
751
2.88k
  j->s.c_str-= j->sav_c_len;
752
2.88k
  return 0;
753
2.88k
}
754
755
756
static json_state_handler json_actions[NR_JSON_STATES][NR_C_CLASSES]=
757
/*
758
   EOS              {            }             [             ]
759
   :                ,            "             -0..9         f
760
   n                t              ETC          ERR           BAD
761
*/
762
{
763
  {/*VALUE*/
764
    unexpected_eos, mark_object, syntax_error, mark_array,   syntax_error,
765
    syntax_error,   syntax_error,v_string,     v_number,     v_false,
766
    v_null,         v_true,       syntax_error, not_json_chr, bad_chr},
767
  {/*KEY*/
768
    unexpected_eos, skip_key,    skip_key,     skip_key,     skip_key,
769
    skip_key,       skip_key,    skip_colon,   skip_key,     skip_key,
770
    skip_key,       skip_key,     skip_key,     not_json_chr, bad_chr},
771
  {/*OBJ_START*/
772
    unexpected_eos, syntax_error, end_object,  syntax_error, syntax_error,
773
    syntax_error,   syntax_error, read_keyname, syntax_error, syntax_error,
774
    syntax_error,   syntax_error,   syntax_error,    not_json_chr, bad_chr},
775
  {/*OBJ_END*/
776
    struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb,
777
    syntax_error,   struct_end_cm,syntax_error,  syntax_error, syntax_error,
778
    syntax_error,   syntax_error,  syntax_error,    not_json_chr, bad_chr},
779
  {/*ARRAY_START*/
780
    unexpected_eos, array_item,   syntax_error, array_item,   end_array,
781
    syntax_error,   syntax_error, array_item,  array_item,  array_item,
782
    array_item,    array_item,    syntax_error,    not_json_chr, bad_chr},
783
  {/*ARRAY_END*/
784
    struct_end_eos, syntax_error, struct_end_cb, syntax_error,  struct_end_qb,
785
    syntax_error,   struct_end_cm, syntax_error, syntax_error,  syntax_error,
786
    syntax_error,   syntax_error,  syntax_error,    not_json_chr, bad_chr},
787
  {/*DONE*/
788
    done,           syntax_error, syntax_error, syntax_error, syntax_error,
789
    syntax_error,   syntax_error, syntax_error, syntax_error, syntax_error,
790
    syntax_error,   syntax_error, syntax_error, not_json_chr, bad_chr},
791
  {/*OBJ_CONT*/
792
    unexpected_eos, syntax_error, end_object,    syntax_error,   syntax_error,
793
    syntax_error,   next_key,     syntax_error,  syntax_error,   syntax_error,
794
    syntax_error,    syntax_error,    syntax_error,    not_json_chr, bad_chr},
795
  {/*ARRAY_CONT*/
796
    unexpected_eos, syntax_error, syntax_error,  syntax_error, end_array,
797
    syntax_error,   next_item,    syntax_error,  syntax_error, syntax_error,
798
    syntax_error,    syntax_error,    syntax_error,    not_json_chr, bad_chr},
799
  {/*READ_VALUE*/
800
    unexpected_eos, read_obj,     syntax_error,  read_array,    syntax_error,
801
    syntax_error,   syntax_error, read_strn,     read_num,      read_false,
802
    read_null,      read_true,    syntax_error,    not_json_chr, bad_chr},
803
};
804
805
806
807
int json_scan_start(json_engine_t *je,
808
                    CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
809
4.35k
{
810
4.35k
  static const uchar no_time_to_die= 0;
811
812
4.35k
  json_string_setup(&je->s, i_cs, str, end);
813
4.35k
  je->stack[0]= JST_DONE;
814
4.35k
  je->stack_p= 0;
815
4.35k
  je->state= JST_VALUE;
816
4.35k
  je->killed_ptr = (uchar*)&no_time_to_die;
817
4.35k
  return 0;
818
4.35k
}
819
820
821
/* Skip colon and the value. */
822
static int skip_colon(json_engine_t *j)
823
756
{
824
756
  int t_next, c_len;
825
826
756
  get_first_nonspace(&j->s, &t_next, &c_len);
827
828
756
  if (t_next == C_COLON)
829
670
  {
830
670
    get_first_nonspace(&j->s, &t_next, &c_len);
831
670
    return json_actions[JST_VALUE][t_next](j);
832
670
 }
833
834
86
  j->s.error= (t_next == C_EOS)  ? JE_EOS :
835
86
              ((t_next == C_BAD) ? JE_BAD_CHR:
836
62
                                   JE_SYN);
837
838
86
  return 1;
839
756
}
840
841
842
/* Skip colon and the value. */
843
static int skip_key(json_engine_t *j)
844
2.87k
{
845
2.87k
  int t_next, c_len;
846
847
2.87k
  if (j->s.c_next<128 && json_instr_chr_map[j->s.c_next] == S_BKSL &&
848
2.87k
      json_handle_esc(&j->s))
849
10
    return 1;
850
851
3.91k
  while (json_read_keyname_chr(j) == 0) {}
852
853
2.86k
  if (j->s.error)
854
108
    return 1;
855
856
2.75k
  get_first_nonspace(&j->s, &t_next, &c_len);
857
2.75k
  return json_actions[JST_VALUE][t_next](j);
858
2.86k
}
859
860
861
/*
862
  Handle EOS after the end of an object or array.
863
  To do that we should pop the stack to see if
864
  we are inside an object, or an array, and
865
  run our 'state machine' accordingly.
866
*/
867
static int struct_end_eos(json_engine_t *j)
868
35
{ return json_actions[j->stack[j->stack_p]][C_EOS](j); }
869
870
871
/*
872
  Handle '}' after the end of an object or array.
873
  To do that we should pop the stack to see if
874
  we are inside an object, or an array, and
875
  run our 'state machine' accordingly.
876
*/
877
static int struct_end_cb(json_engine_t *j)
878
442
{ return json_actions[j->stack[j->stack_p]][C_RCURB](j); }
879
880
881
/*
882
  Handle ']' after the end of an object or array.
883
  To do that we should pop the stack to see if
884
  we are inside an object, or an array, and
885
  run our 'state machine' accordingly.
886
*/
887
static int struct_end_qb(json_engine_t *j)
888
646
{ return json_actions[j->stack[j->stack_p]][C_RSQRB](j); }
889
890
891
/*
892
  Handle ',' after the end of an object or array.
893
  To do that we should pop the stack to see if
894
  we are inside an object, or an array, and
895
  run our 'state machine' accordingly.
896
*/
897
static int struct_end_cm(json_engine_t *j)
898
1.53k
{ return json_actions[j->stack[j->stack_p]][C_COMMA](j); }
899
900
901
int json_read_keyname_chr(json_engine_t *j)
902
29.7k
{
903
29.7k
  int c_len, t;
904
905
29.7k
  if ((c_len= json_next_char(&j->s)) > 0)
906
27.2k
  {
907
27.2k
    j->s.c_str+= c_len;
908
27.2k
    if (j->s.c_next>= 128 || (t= json_instr_chr_map[j->s.c_next]) <= S_ETC)
909
9.77k
      return 0;
910
911
17.5k
    switch (t)
912
17.5k
    {
913
16.3k
    case S_QUOTE:
914
16.3k
      for (;;)  /* Skip spaces until ':'. */
915
16.5k
      {
916
16.5k
        if ((c_len= json_next_char(&j->s)) > 0)
917
16.5k
        {
918
16.5k
          if (j->s.c_next == ':')
919
16.2k
          {
920
16.2k
            j->s.c_str+= c_len;
921
16.2k
            j->state= JST_VALUE;
922
16.2k
            return 1;
923
16.2k
          }
924
925
312
          if (j->s.c_next < 128 && json_chr_map[j->s.c_next] == C_SPACE)
926
204
          {
927
204
            j->s.c_str+= c_len;
928
204
            continue;
929
204
          }
930
108
          j->s.error= JE_SYN;
931
108
          break;
932
312
        }
933
24
        j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
934
24
        break;
935
16.5k
      }
936
132
      return 1;
937
1.14k
    case S_BKSL:
938
1.14k
      return json_handle_esc(&j->s);
939
32
    case S_ERR:
940
32
      j->s.c_str-= c_len;
941
32
      j->s.error= JE_STRING_CONST;
942
32
      return 1;
943
17.5k
    }
944
17.5k
  }
945
2.44k
  j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR; 
946
2.44k
  return 1;
947
29.7k
}
948
949
950
int json_read_value(json_engine_t *j)
951
19.2k
{
952
19.2k
  int t_next, c_len, res;
953
954
19.2k
  j->value_type= JSON_VALUE_UNINITIALIZED;
955
19.2k
  if (j->state == JST_KEY)
956
8.28k
  {
957
9.33k
    while (json_read_keyname_chr(j) == 0) {}
958
959
8.28k
    if (j->s.error)
960
1.41k
      return 1;
961
8.28k
  }
962
963
17.8k
  get_first_nonspace(&j->s, &t_next, &c_len);
964
965
17.8k
  j->value_begin= j->s.c_str-c_len;
966
17.8k
  res= json_actions[JST_READ_VALUE][t_next](j);
967
17.8k
  j->value_end= j->s.c_str;
968
17.8k
  return res;
969
19.2k
}
970
971
972
int json_scan_next(json_engine_t *j)
973
37.4k
{
974
37.4k
  int t_next;
975
976
37.4k
  get_first_nonspace(&j->s, &t_next, &j->sav_c_len);
977
37.4k
  return *j->killed_ptr || json_actions[j->state][t_next](j);
978
37.4k
}
979
980
981
enum json_path_chr_classes {
982
  P_EOS,    /* end of string */
983
  P_USD,    /* $ */
984
  P_ASTER,  /* * */
985
  P_LSQRB,  /* [ */
986
  P_T,      /* t (for to) */
987
  P_RSQRB,  /* ] */
988
  P_POINT,  /* . */
989
  P_NEG,    /* hyphen (for negative index in path) */
990
  P_ZERO,   /* 0 */
991
  P_DIGIT,  /* 123456789 */
992
  P_L,      /* l (for "lax") */
993
  P_S,      /* s (for "strict") */
994
  P_SPACE,  /* space */
995
  P_BKSL,   /* \ */
996
  P_QUOTE,  /* " */
997
  P_ETC,    /* everything else */
998
  P_ERR,    /* character disallowed in JSON*/
999
  P_BAD,    /* invalid character */
1000
  N_PATH_CLASSES,
1001
};
1002
1003
1004
static enum json_path_chr_classes json_path_chr_map[128] = {
1005
  P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
1006
  P_ERR,   P_SPACE, P_SPACE, P_ERR,   P_ERR,   P_SPACE, P_ERR,   P_ERR,
1007
  P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
1008
  P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
1009
1010
  P_SPACE, P_ETC,   P_QUOTE, P_ETC,   P_USD,   P_ETC,   P_ETC,   P_ETC,
1011
  P_ETC,   P_ETC,   P_ASTER, P_ETC,   P_ETC,   P_NEG,   P_POINT, P_ETC,
1012
  P_ZERO,  P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT,
1013
  P_DIGIT, P_DIGIT, P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1014
1015
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1016
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_L,     P_ETC,   P_ETC,   P_ETC,
1017
  P_ETC,   P_ETC,   P_S,     P_ETC,   P_T,   P_ETC,   P_ETC,   P_ETC,
1018
  P_ETC,   P_ETC,   P_ETC,   P_LSQRB, P_BKSL, P_RSQRB, P_ETC,   P_ETC,
1019
1020
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1021
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_L,     P_ETC,   P_ETC,   P_ETC,
1022
  P_ETC,   P_ETC,   P_S,     P_ETC,   P_T,   P_ETC,   P_ETC,   P_ETC,
1023
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC
1024
};
1025
1026
1027
enum json_path_states {
1028
  PS_GO,  /* Initial state. */
1029
  PS_LAX, /* Parse the 'lax' keyword. */
1030
  PS_PT,  /* New path's step begins. */
1031
  PS_AR,  /* Parse array step. */
1032
  PS_SAR, /* space after the '['. */
1033
  PS_AWD, /* Array wildcard. */
1034
  PS_NEG,  /*  Parse '-' (hyphen) */
1035
  PS_Z,   /* '0' (as an array item number). */
1036
  PS_INT, /* Parse integer (as an array item number). */
1037
  PS_AS,  /* Space. */
1038
  PS_KEY, /* Key. */
1039
  PS_KNM, /* Parse key name. */
1040
  PS_KWD, /* Key wildcard. */
1041
  PS_AST, /* Asterisk. */
1042
  PS_DWD, /* Double wildcard. */
1043
  PS_KEYX, /* Key started with quote ("). */
1044
  PS_KNMX, /* Parse quoted key name. */
1045
  PS_LAST, /* Parse 'last' keyword */
1046
  PS_T,    /* Parse 'to' keyword. */
1047
  N_PATH_STATES, /* Below are states that aren't in the transitions table. */
1048
  PS_SCT,  /* Parse the 'strict' keyword. */
1049
  PS_EKY,  /* '.' after the keyname so next step is the key. */
1050
  PS_EKYX, /* Closing " for the quoted keyname. */
1051
  PS_EAR,  /* '[' after the keyname so next step is the array. */
1052
  PS_ESC,  /* Escaping in the keyname. */
1053
  PS_ESCX, /* Escaping in the quoted keyname. */
1054
  PS_OK,   /* Path normally ended. */
1055
  PS_KOK   /* EOS after the keyname so end the path normally. */
1056
};
1057
1058
1059
static int json_path_transitions[N_PATH_STATES][N_PATH_CLASSES]=
1060
{
1061
/*
1062
            EOS       $,      *       [       to       ]       .       -
1063
            0       1..9    L       S       SPACE   \       "       ETC
1064
            ERR              BAD
1065
*/
1066
/* GO  */ { JE_EOS, PS_PT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1067
            JE_SYN, JE_SYN, PS_LAX, PS_SCT, PS_GO, JE_SYN, JE_SYN, JE_SYN,
1068
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1069
/* LAX */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1070
            JE_SYN, JE_SYN, PS_LAX, JE_SYN, PS_GO, JE_SYN, JE_SYN, JE_SYN,
1071
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1072
/* PT */  { PS_OK, JE_SYN, PS_AST, PS_AR, JE_SYN, JE_SYN, PS_KEY, JE_SYN,
1073
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1074
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1075
/* AR */  { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_NEG,
1076
            PS_Z, PS_INT, PS_LAST, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1077
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1078
/* SAR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1079
            PS_Z, PS_INT, PS_LAST, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1080
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1081
/* AWD */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1082
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1083
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1084
/* NEG */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1085
            PS_INT, PS_INT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1086
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1087
/* Z */   { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1088
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1089
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1090
/* INT */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1091
            PS_INT, PS_INT, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1092
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1093
/* AS */  { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_T, PS_PT, JE_SYN, PS_NEG,
1094
            PS_Z, PS_INT, PS_LAST, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1095
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1096
/* KEY */ { JE_EOS, PS_KNM, PS_KWD, JE_SYN, PS_KNM, PS_KNM, JE_SYN, PS_KNM,
1097
            PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_KNM, JE_SYN, PS_KEYX, PS_KNM,
1098
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1099
/* KNM */ { PS_KOK, PS_KNM, PS_AST, PS_EAR, PS_KNM, PS_KNM, PS_EKY, PS_KNM,
1100
            PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_ESC, PS_KNM, PS_KNM,
1101
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1102
/* KWD */ { PS_OK, JE_SYN, JE_SYN, PS_AR, JE_SYN, JE_SYN, PS_EKY, JE_SYN,
1103
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1104
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1105
/* AST */ { JE_SYN, JE_SYN, PS_DWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1106
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1107
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1108
/* DWD */ { JE_SYN, JE_SYN, PS_AST, PS_AR, JE_SYN, JE_SYN, PS_KEY, JE_SYN,
1109
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1110
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1111
/* KEYX*/ { JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, JE_SYN,
1112
            PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_ESCX, PS_EKYX, PS_KNMX,
1113
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1114
/* KNMX */{ JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
1115
            PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_ESCX, PS_EKYX, PS_KNMX,
1116
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1117
/* LAST */{ JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_NEG,
1118
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1119
            JE_SYN, JE_BAD_CHR},
1120
/* T */   { JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_NEG,
1121
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1122
            JE_SYN, JE_BAD_CHR},
1123
};
1124
1125
1126
int json_path_setup(json_path_t *p,
1127
                    CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
1128
0
{
1129
0
 int c_len, t_next, state= PS_GO, is_negative_index= 0, is_last= 0,
1130
0
  prev_value=0, is_to= 0, *cur_val;
1131
0
  enum json_path_step_types double_wildcard= JSON_PATH_KEY_NULL;
1132
0
  json_string_setup(&p->s, i_cs, str, end);
1133
1134
0
  p->steps[0].type= JSON_PATH_ARRAY_WILD;
1135
0
  p->last_step= p->steps;
1136
0
  p->mode_strict= FALSE;
1137
0
  p->types_used= JSON_PATH_KEY_NULL;
1138
1139
0
  do
1140
0
  {
1141
0
    if ((c_len= json_next_char(&p->s)) <= 0)
1142
0
      t_next= json_eos(&p->s) ? P_EOS : P_BAD;
1143
0
    else
1144
0
      t_next= (p->s.c_next >= 128) ? P_ETC : json_path_chr_map[p->s.c_next];
1145
1146
0
    if ((state= json_path_transitions[state][t_next]) < 0)
1147
0
      return p->s.error= state;
1148
1149
0
    p->s.c_str+= c_len;
1150
1151
0
    switch (state)
1152
0
    {
1153
0
    case PS_LAX:
1154
0
      if ((p->s.error= skip_string_verbatim(&p->s, "ax")))
1155
0
        return 1;
1156
0
      p->mode_strict= FALSE;
1157
0
      continue;
1158
0
    case PS_SCT:
1159
0
      if ((p->s.error= skip_string_verbatim(&p->s, "rict")))
1160
0
        return 1;
1161
0
      p->mode_strict= TRUE;
1162
0
      state= PS_LAX;
1163
0
      continue;
1164
0
    case PS_KWD:
1165
0
    case PS_AWD:
1166
0
      p->last_step->type|= JSON_PATH_WILD;
1167
0
      p->types_used|= JSON_PATH_WILD;
1168
0
      continue;
1169
0
    case PS_INT:
1170
0
      cur_val= is_to ? &(p->last_step->n_item_end) :
1171
0
                       &(p->last_step->n_item);
1172
0
      if (is_last)
1173
0
      {
1174
0
        prev_value*= 10;
1175
0
        prev_value-= p->s.c_next - '0';
1176
0
        *cur_val= -1 + prev_value;
1177
0
      }
1178
0
      else
1179
0
      {
1180
0
        (*cur_val)*= 10;
1181
0
        if (is_negative_index)
1182
0
          *cur_val-= p->s.c_next - '0';
1183
0
        else
1184
0
          *cur_val+= p->s.c_next - '0';
1185
0
      }
1186
0
      continue;
1187
0
    case PS_EKYX:
1188
0
      p->last_step->key_end= p->s.c_str - c_len;
1189
0
      state= PS_PT;
1190
0
      continue;
1191
0
    case PS_EKY:
1192
0
      p->last_step->key_end= p->s.c_str - c_len;
1193
0
      state= PS_KEY;
1194
      /* fall through */
1195
0
    case PS_KEY:
1196
0
      p->last_step++;
1197
0
      is_to= 0;
1198
0
      prev_value= 0;
1199
0
      is_negative_index= 0;
1200
0
      is_last= 0;
1201
0
      if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1202
0
        return p->s.error= JE_DEPTH;
1203
0
      p->types_used|= p->last_step->type= JSON_PATH_KEY | double_wildcard;
1204
0
      double_wildcard= JSON_PATH_KEY_NULL;
1205
      /* fall through */
1206
0
    case PS_KEYX:
1207
0
      p->last_step->key= p->s.c_str;
1208
0
      continue;
1209
0
    case PS_EAR:
1210
0
      p->last_step->key_end= p->s.c_str - c_len;
1211
0
      state= PS_AR;
1212
      /* fall through */
1213
0
    case PS_AR:
1214
0
      p->last_step++;
1215
0
      is_last= 0;
1216
0
      is_to= 0;
1217
0
      prev_value= 0;
1218
0
      is_negative_index= 0;
1219
0
      if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1220
0
        return p->s.error= JE_DEPTH;
1221
0
      p->types_used|= p->last_step->type= JSON_PATH_ARRAY | double_wildcard;
1222
0
      double_wildcard= JSON_PATH_KEY_NULL;
1223
0
      p->last_step->n_item= 0;
1224
0
      continue;
1225
0
    case PS_ESC:
1226
0
      if (json_handle_esc(&p->s))
1227
0
        return 1;
1228
0
      state= PS_KNM;
1229
0
      continue;
1230
0
    case PS_ESCX:
1231
0
      if (json_handle_esc(&p->s))
1232
0
        return 1;
1233
0
      state= PS_KNMX;
1234
0
      continue;
1235
0
    case PS_KOK:
1236
0
      p->last_step->key_end= p->s.c_str - c_len;
1237
0
      state= PS_OK;
1238
0
      break; /* 'break' as the loop supposed to end after that. */
1239
0
    case PS_DWD:
1240
0
      double_wildcard= JSON_PATH_DOUBLE_WILD;
1241
0
      continue;
1242
0
    case PS_NEG:
1243
0
       p->types_used|= JSON_PATH_NEGATIVE_INDEX;
1244
0
       is_negative_index= 1;
1245
0
       continue;
1246
0
    case PS_LAST:
1247
0
      if ((p->s.error= skip_string_verbatim(&p->s, "ast")))
1248
0
       return 1;
1249
0
      p->types_used|= JSON_PATH_NEGATIVE_INDEX;
1250
0
      is_last= 1;
1251
0
      if (is_to)
1252
0
        p->last_step->n_item_end= -1;
1253
0
      else
1254
0
        p->last_step->n_item= -1;
1255
0
      continue;
1256
0
    case PS_T:
1257
0
      if ((p->s.error= skip_string_verbatim(&p->s, "o")))
1258
0
        return 1;
1259
0
      is_to= 1;
1260
0
      is_negative_index= 0;
1261
0
      is_last= 0;
1262
0
      prev_value= 0;
1263
0
      p->last_step->n_item_end= 0;
1264
0
      p->last_step->type|= JSON_PATH_ARRAY_RANGE;
1265
0
      p->types_used|= JSON_PATH_ARRAY_RANGE;
1266
0
      continue;
1267
0
    };
1268
0
  } while (state != PS_OK);
1269
1270
0
  return double_wildcard ? (p->s.error= JE_SYN) : 0;
1271
0
}
1272
1273
1274
int json_skip_to_level(json_engine_t *j, int level)
1275
2.30k
{
1276
18.4k
  do {
1277
18.4k
    if (j->stack_p < level)
1278
632
      return 0;
1279
18.4k
  } while (json_scan_next(j) == 0);
1280
1281
1.66k
  return 1;
1282
2.30k
}
1283
1284
1285
/*
1286
  works as json_skip_level() but also counts items on the current
1287
  level skipped.
1288
*/
1289
int json_skip_level_and_count(json_engine_t *j, int *n_items_skipped)
1290
0
{
1291
0
  int level= j->stack_p;
1292
1293
0
  *n_items_skipped= 0;
1294
0
  while (json_scan_next(j) == 0)
1295
0
  {
1296
0
    if (j->stack_p < level)
1297
0
      return 0;
1298
0
    if (j->stack_p == level && j->state == JST_VALUE)
1299
0
      (*n_items_skipped)++;
1300
0
  }
1301
1302
0
  return 1;
1303
0
}
1304
1305
1306
int json_skip_array_and_count(json_engine_t *je, int *n_items)
1307
0
{
1308
0
  json_engine_t j= *je;
1309
0
  *n_items= 0;
1310
1311
0
  return json_skip_level_and_count(&j, n_items); 
1312
0
}
1313
1314
1315
int json_skip_key(json_engine_t *j)
1316
13.9k
{
1317
13.9k
  if (json_read_value(j))
1318
1.05k
    return 1;
1319
1320
12.8k
  if (json_value_scalar(j))
1321
11.0k
    return 0;
1322
1323
1.83k
  return json_skip_level(j);
1324
12.8k
}
1325
1326
1327
0
#define SKIPPED_STEP_MARK INT_MAX32
1328
1329
/*
1330
  Current step of the patch matches the JSON construction.
1331
  Now we should either stop the search or go to the next
1332
  step of the path.
1333
*/
1334
static int handle_match(json_engine_t *je, json_path_t *p,
1335
                        json_path_step_t **p_cur_step, int *array_counters)
1336
0
{
1337
0
  json_path_step_t *next_step= *p_cur_step + 1;
1338
1339
0
  DBUG_ASSERT(*p_cur_step < p->last_step);
1340
1341
0
  if (json_read_value(je))
1342
0
    return 1;
1343
1344
0
  if (json_value_scalar(je))
1345
0
  {
1346
0
    while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0)
1347
0
    {
1348
0
      if (++next_step > p->last_step)
1349
0
      {
1350
0
        je->s.c_str= je->value_begin;
1351
0
        return 1;
1352
0
      }
1353
0
    }
1354
0
    return 0;
1355
0
  }
1356
1357
0
  if (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0 &&
1358
0
      je->value_type & JSON_VALUE_OBJECT)
1359
0
  {
1360
0
    do
1361
0
    {
1362
0
      array_counters[next_step - p->steps]= SKIPPED_STEP_MARK;
1363
0
      if (++next_step > p->last_step)
1364
0
      {
1365
0
        je->s.c_str= je->value_begin;
1366
0
        je->stack_p--;
1367
0
        return 1;
1368
0
      }
1369
0
    } while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0);
1370
0
  }
1371
1372
0
  if ((int) je->value_type !=
1373
0
      (int) (next_step->type & JSON_PATH_KEY_OR_ARRAY))
1374
0
    return json_skip_level(je);
1375
1376
0
  array_counters[next_step - p->steps]= 0;
1377
0
  if (next_step->type & JSON_PATH_ARRAY)
1378
0
  {
1379
0
    int array_size;
1380
0
    if (next_step->n_item >= 0)
1381
0
      array_size= 0;
1382
0
    else
1383
0
    {
1384
0
      json_engine_t j2= *je;
1385
0
      if (json_skip_array_and_count(&j2, &array_size))
1386
0
      {
1387
0
        *je= j2;
1388
0
        return 1;
1389
0
      }
1390
0
      array_size= -array_size;
1391
0
    }
1392
0
    array_counters[next_step - p->steps]= array_size;
1393
0
  }
1394
1395
0
  *p_cur_step= next_step;
1396
0
  return 0;
1397
0
}
1398
1399
1400
/*
1401
  Check if the name of the current JSON key matches
1402
  the step of the path.
1403
*/
1404
int json_key_matches(json_engine_t *je, json_string_t *k)
1405
14.8k
{
1406
16.4k
  while (json_read_keyname_chr(je) == 0)
1407
8.68k
  {
1408
8.68k
    if (json_read_string_const_chr(k) ||
1409
8.68k
        je->s.c_next != k->c_next)
1410
7.06k
      return 0;
1411
8.68k
  }
1412
1413
7.80k
  return json_read_string_const_chr(k);
1414
14.8k
}
1415
1416
1417
int json_find_path(json_engine_t *je,
1418
                   json_path_t *p, json_path_step_t **p_cur_step,
1419
                   int *array_counters)
1420
0
{
1421
0
  json_string_t key_name;
1422
0
  int res= 0;
1423
1424
0
  json_string_set_cs(&key_name, p->s.cs);
1425
1426
0
  do
1427
0
  {
1428
0
    json_path_step_t *cur_step= *p_cur_step;
1429
0
    switch (je->state)
1430
0
    {
1431
0
    case JST_KEY:
1432
0
      DBUG_ASSERT(cur_step->type & JSON_PATH_KEY);
1433
0
      if (!(cur_step->type & JSON_PATH_WILD))
1434
0
      {
1435
0
        json_string_set_str(&key_name, cur_step->key, cur_step->key_end);
1436
0
        if (!json_key_matches(je, &key_name))
1437
0
        {
1438
0
          if (json_skip_key(je))
1439
0
            goto exit;
1440
0
          continue;
1441
0
        }
1442
0
      }
1443
0
      if (cur_step == p->last_step ||
1444
0
          handle_match(je, p, p_cur_step, array_counters))
1445
0
        goto exit;
1446
0
      break;
1447
0
    case JST_VALUE:
1448
0
      DBUG_ASSERT(cur_step->type & JSON_PATH_ARRAY);
1449
0
      if (cur_step->type & JSON_PATH_ARRAY_RANGE)
1450
0
      {
1451
0
        res= (cur_step->n_item <= array_counters[cur_step - p->steps] &&
1452
0
              cur_step->n_item_end >= array_counters[cur_step - p->steps]);
1453
0
        array_counters[cur_step - p->steps]++;
1454
0
      }
1455
0
      else
1456
0
        res= cur_step->n_item == array_counters[cur_step - p->steps]++;
1457
0
      if ((cur_step->type & JSON_PATH_WILD) || res)
1458
0
      {
1459
        /* Array item matches. */
1460
0
        if (cur_step == p->last_step ||
1461
0
            handle_match(je, p, p_cur_step, array_counters))
1462
0
          goto exit;
1463
0
      }
1464
0
      else
1465
0
        json_skip_array_item(je);
1466
0
      break;
1467
0
    case JST_OBJ_END:
1468
0
      do
1469
0
      {
1470
0
        (*p_cur_step)--;
1471
0
      } while (*p_cur_step > p->steps &&
1472
0
               array_counters[*p_cur_step - p->steps] == SKIPPED_STEP_MARK);
1473
0
      break;
1474
0
    case JST_ARRAY_END:
1475
0
      (*p_cur_step)--;
1476
0
      break;
1477
0
    default:
1478
0
      DBUG_ASSERT(0);
1479
0
      break;
1480
0
    };
1481
0
  } while (json_scan_next(je) == 0);
1482
1483
  /* No luck. */
1484
0
  return 1;
1485
1486
0
exit:
1487
0
  return je->s.error;
1488
0
}
1489
1490
1491
int json_find_paths_first(json_engine_t *je, json_find_paths_t *state,
1492
                          uint n_paths, json_path_t *paths, uint *path_depths)
1493
0
{
1494
0
  state->n_paths= n_paths;
1495
0
  state->paths= paths;
1496
0
  state->cur_depth= 0;
1497
0
  state->path_depths= path_depths;
1498
0
  return json_find_paths_next(je, state);
1499
0
}
1500
1501
1502
int json_find_paths_next(json_engine_t *je, json_find_paths_t *state)
1503
0
{
1504
0
  uint p_c;
1505
0
  int path_found, no_match_found;
1506
0
  do
1507
0
  {
1508
0
    switch (je->state)
1509
0
    {
1510
0
    case JST_KEY:
1511
0
      path_found= FALSE;
1512
0
      no_match_found= TRUE;
1513
0
      for (p_c=0; p_c < state->n_paths; p_c++)
1514
0
      {
1515
0
        json_path_step_t *cur_step;
1516
0
        if (state->path_depths[p_c] <
1517
0
              state->cur_depth /* Path already failed. */ ||
1518
0
            !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1519
0
              JSON_PATH_KEY))
1520
0
          continue;
1521
1522
0
        if (!(cur_step->type & JSON_PATH_WILD))
1523
0
        {
1524
0
          json_string_t key_name;
1525
0
          json_string_setup(&key_name, state->paths[p_c].s.cs,
1526
0
                            cur_step->key, cur_step->key_end);
1527
0
          if (!json_key_matches(je, &key_name))
1528
0
            continue;
1529
0
        }
1530
0
        if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1531
0
          path_found= TRUE;
1532
0
        else
1533
0
        {
1534
0
          no_match_found= FALSE;
1535
0
          state->path_depths[p_c]= state->cur_depth + 1;
1536
0
        }
1537
0
      }
1538
0
      if (path_found)
1539
        /* Return the result. */
1540
0
        goto exit;
1541
0
      if (no_match_found)
1542
0
      {
1543
        /* No possible paths left to check. Just skip the level. */
1544
0
        if (json_skip_level(je))
1545
0
          goto exit;
1546
0
      }
1547
1548
0
      break;
1549
0
    case JST_VALUE:
1550
0
      path_found= FALSE;
1551
0
      no_match_found= TRUE;
1552
0
      for (p_c=0; p_c < state->n_paths; p_c++)
1553
0
      {
1554
0
        json_path_step_t *cur_step;
1555
0
        if (state->path_depths[p_c]< state->cur_depth /* Path already failed. */ ||
1556
0
            !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1557
0
              JSON_PATH_ARRAY))
1558
0
          continue;
1559
0
        if (cur_step->type & JSON_PATH_WILD ||
1560
0
            cur_step->n_item == state->array_counters[state->cur_depth])
1561
0
        {
1562
          /* Array item matches. */
1563
0
          if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1564
0
            path_found= TRUE;
1565
0
          else
1566
0
          {
1567
0
            no_match_found= FALSE;
1568
0
            state->path_depths[p_c]= state->cur_depth + 1;
1569
0
          }
1570
0
        }
1571
0
      }
1572
1573
0
      if (path_found)
1574
0
        goto exit;
1575
1576
0
      if (no_match_found)
1577
0
        json_skip_array_item(je);
1578
1579
0
      state->array_counters[state->cur_depth]++;
1580
0
      break;
1581
0
    case JST_OBJ_START:
1582
0
    case JST_ARRAY_START:
1583
0
      for (p_c=0; p_c < state->n_paths; p_c++)
1584
0
      {
1585
0
        if (state->path_depths[p_c] < state->cur_depth)
1586
          /* Path already failed. */
1587
0
          continue;
1588
0
        if (state->paths[p_c].steps[state->cur_depth].type &
1589
0
            ((je->state == JST_OBJ_START) ? JSON_PATH_KEY : JSON_PATH_ARRAY))
1590
0
          state->path_depths[p_c]++;
1591
0
      }
1592
0
      state->cur_depth++;
1593
0
      break;
1594
0
    case JST_OBJ_END:
1595
0
    case JST_ARRAY_END:
1596
0
      for (p_c=0; p_c < state->n_paths; p_c++)
1597
0
      {
1598
0
        if (state->path_depths[p_c] < state->cur_depth)
1599
0
          continue;
1600
0
        state->path_depths[p_c]--;
1601
0
      }
1602
0
      state->cur_depth--;
1603
0
      break;
1604
0
    default:
1605
0
      DBUG_ASSERT(0);
1606
0
      break;
1607
0
    };
1608
0
  } while (json_scan_next(je) == 0);
1609
1610
  /* No luck. */
1611
0
  return 1;
1612
1613
0
exit:
1614
0
  return je->s.error;
1615
0
}
1616
1617
1618
int json_append_ascii(CHARSET_INFO *json_cs,
1619
                      uchar *json, uchar *json_end,
1620
                      const uchar *ascii, const uchar *ascii_end)
1621
0
{
1622
0
  const uchar *json_start= json;
1623
0
  while (ascii < ascii_end)
1624
0
  {
1625
0
    int c_len;
1626
0
    if ((c_len= my_ci_wc_mb(json_cs, (my_wc_t) *ascii, json, json_end)) > 0)
1627
0
    {
1628
0
      json+= c_len;
1629
0
      ascii++;
1630
0
      continue;
1631
0
    }
1632
1633
    /* Error return. */
1634
0
    return c_len;
1635
0
  }
1636
1637
0
  return (int)(json - json_start);
1638
0
}
1639
1640
1641
int json_unescape(CHARSET_INFO *json_cs,
1642
                  const uchar *json_str, const uchar *json_end,
1643
                  CHARSET_INFO *res_cs, uchar *res, uchar *res_end)
1644
0
{
1645
0
  json_string_t s;
1646
0
  const uchar *res_b= res;
1647
1648
0
  json_string_setup(&s, json_cs, json_str, json_end);
1649
0
  while (json_read_string_const_chr(&s) == 0)
1650
0
  {
1651
0
    int c_len;
1652
0
    if ((c_len= my_ci_wc_mb(res_cs, s.c_next, res, res_end)) > 0)
1653
0
    {
1654
0
      res+= c_len;
1655
0
      continue;
1656
0
    }
1657
0
    if (c_len == MY_CS_ILUNI)
1658
0
    {
1659
0
      return -1;
1660
0
    }
1661
    /* Result buffer is too small. */
1662
0
    return -1;
1663
0
  }
1664
1665
0
  return s.error==JE_EOS ? (int)(res - res_b) : -1;
1666
0
}
1667
1668
1669
/* When we need to replace a character with the escaping. */
1670
enum json_esc_char_classes {
1671
  ESC_= 0,    /* No need to escape. */
1672
  ESC_U= 'u', /* Character not allowed in JSON. Always escape as \uXXXX. */
1673
  ESC_B= 'b', /* Backspace. Escape as \b */
1674
  ESC_F= 'f', /* Formfeed. Escape as \f */
1675
  ESC_N= 'n', /* Newline. Escape as \n */
1676
  ESC_R= 'r', /* Return. Escape as \r */
1677
  ESC_T= 't', /* Tab. Escape as \s */
1678
  ESC_BS= '\\'  /* Backslash or '"'. Escape by the \\ prefix. */
1679
};
1680
1681
1682
/* This specifies how we should escape the character. */
1683
static enum json_esc_char_classes json_escape_chr_map[0x60] = {
1684
  ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1685
  ESC_B,   ESC_T,   ESC_N,   ESC_U,   ESC_F,   ESC_R,   ESC_U,   ESC_U,
1686
  ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1687
  ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1688
1689
  ESC_,    ESC_,    ESC_BS,  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1690
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1691
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1692
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1693
1694
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1695
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1696
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1697
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_BS,  ESC_,    ESC_,    ESC_,
1698
};
1699
1700
1701
static const char hexconv[17] = "0123456789ABCDEF";
1702
1703
1704
int json_escape(CHARSET_INFO *str_cs,
1705
                const uchar *str, const uchar *str_end,
1706
                CHARSET_INFO *json_cs, uchar *json, uchar *json_end)
1707
0
{
1708
0
  const uchar *json_start= json;
1709
1710
0
  while (str < str_end)
1711
0
  {
1712
0
    my_wc_t c_chr;
1713
0
    int c_len;
1714
0
    if ((c_len= my_ci_mb_wc(str_cs, &c_chr, str, str_end)) > 0)
1715
0
    {
1716
0
      enum json_esc_char_classes c_class;
1717
      
1718
0
      str+= c_len;
1719
0
      if (c_chr >= 0x60 || (c_class= json_escape_chr_map[c_chr]) == ESC_)
1720
0
      {
1721
0
        if ((c_len= my_ci_wc_mb(json_cs, c_chr, json, json_end)) > 0)
1722
0
        {
1723
0
          json+= c_len;
1724
0
          continue;
1725
0
        }
1726
0
        if (c_len < 0)
1727
0
        {
1728
          /* JSON buffer is depleted. */
1729
0
          return JSON_ERROR_OUT_OF_SPACE;
1730
0
        }
1731
1732
        /* JSON charset cannot convert this character. */
1733
0
        c_class= ESC_U;
1734
0
      }
1735
1736
0
      if ((c_len= my_ci_wc_mb(json_cs, '\\', json, json_end)) <= 0 ||
1737
0
          (c_len= my_ci_wc_mb(json_cs, (c_class == ESC_BS) ? c_chr : c_class,
1738
0
                                       json+= c_len, json_end)) <= 0)
1739
0
      {
1740
        /* JSON buffer is depleted. */
1741
0
        return JSON_ERROR_OUT_OF_SPACE;
1742
0
      }
1743
0
      json+= c_len;
1744
1745
0
      if (c_class != ESC_U)
1746
0
        continue;
1747
1748
0
      {
1749
        /* We have to use /uXXXX escaping. */
1750
0
        uchar utf16buf[4];
1751
0
        uchar code_str[8];
1752
0
        int u_len= my_uni_utf16(0, c_chr, utf16buf, utf16buf + 4);
1753
1754
0
        code_str[0]= hexconv[utf16buf[0] >> 4];
1755
0
        code_str[1]= hexconv[utf16buf[0] & 15];
1756
0
        code_str[2]= hexconv[utf16buf[1] >> 4];
1757
0
        code_str[3]= hexconv[utf16buf[1] & 15];
1758
1759
0
        if (u_len > 2)
1760
0
        {
1761
0
          code_str[4]= hexconv[utf16buf[2] >> 4];
1762
0
          code_str[5]= hexconv[utf16buf[2] & 15];
1763
0
          code_str[6]= hexconv[utf16buf[3] >> 4];
1764
0
          code_str[7]= hexconv[utf16buf[3] & 15];
1765
0
        }
1766
        
1767
0
        if ((c_len= json_append_ascii(json_cs, json, json_end,
1768
0
                                      code_str, code_str+u_len*2)) > 0)
1769
0
        {
1770
0
          json+= c_len;
1771
0
          continue;
1772
0
        }
1773
        /* JSON buffer is depleted. */
1774
0
        return JSON_ERROR_OUT_OF_SPACE;
1775
0
      }
1776
0
    }
1777
0
    else /* c_len == 0, an illegal symbol. */
1778
0
      return JSON_ERROR_ILLEGAL_SYMBOL;
1779
0
  }
1780
1781
0
  return (int)(json - json_start);
1782
0
}
1783
1784
1785
int json_get_path_start(json_engine_t *je, CHARSET_INFO *i_cs,
1786
                        const uchar *str, const uchar *end,
1787
                        json_path_t *p)
1788
0
{
1789
0
  json_scan_start(je, i_cs, str, end);
1790
0
  p->last_step= p->steps - 1; 
1791
0
  return 0;
1792
0
}
1793
1794
1795
int json_get_path_next(json_engine_t *je, json_path_t *p)
1796
0
{
1797
0
  if (p->last_step < p->steps)
1798
0
  {
1799
0
    if (json_read_value(je))
1800
0
      return 1;
1801
1802
0
    p->last_step= p->steps;
1803
0
    p->steps[0].type= JSON_PATH_ARRAY_WILD;
1804
0
    p->steps[0].n_item= 0;
1805
0
    return 0;
1806
0
  }
1807
0
  else
1808
0
  {
1809
0
    if (json_value_scalar(je))
1810
0
    {
1811
0
      if (p->last_step->type & JSON_PATH_ARRAY)
1812
0
        p->last_step->n_item++;
1813
0
    }
1814
0
    else
1815
0
    {
1816
0
      p->last_step++;
1817
0
      p->last_step->type= (enum json_path_step_types) je->value_type;
1818
0
      p->last_step->n_item= 0;
1819
0
    }
1820
1821
0
    if (json_scan_next(je))
1822
0
      return 1;
1823
0
  }
1824
1825
0
  do
1826
0
  {
1827
0
    switch (je->state)
1828
0
    {
1829
0
    case JST_KEY:
1830
0
      p->last_step->key= je->s.c_str;
1831
0
      do
1832
0
      {
1833
0
        p->last_step->key_end= je->s.c_str;
1834
0
      } while (json_read_keyname_chr(je) == 0);
1835
0
      if (je->s.error)
1836
0
        return 1;
1837
      /* Now we have je.state == JST_VALUE, so let's handle it. */
1838
1839
      /* fall through */
1840
0
    case JST_VALUE:
1841
0
      if (json_read_value(je))
1842
0
        return 1;
1843
0
      return 0;
1844
0
    case JST_OBJ_END:
1845
0
    case JST_ARRAY_END:
1846
0
      p->last_step--;
1847
0
      if (p->last_step->type & JSON_PATH_ARRAY)
1848
0
        p->last_step->n_item++;
1849
0
      break;
1850
0
    default:
1851
0
      break;
1852
0
    }
1853
0
  } while (json_scan_next(je) == 0);
1854
1855
0
  return 1;
1856
0
}
1857
1858
1859
static enum json_types smart_read_value(json_engine_t *je,
1860
                                        const char **value, int *value_len)
1861
944
{
1862
944
  if (json_read_value(je))
1863
439
    goto err_return;
1864
1865
505
  *value= (char *) je->value;
1866
1867
505
  if (json_value_scalar(je))
1868
43
    *value_len= je->value_len;
1869
462
  else
1870
462
  {
1871
462
    if (json_skip_level(je))
1872
445
      goto err_return;
1873
1874
17
    *value_len= (int) ((char *) je->s.c_str - *value);
1875
17
  }
1876
1877
60
  compile_time_assert((int) JSON_VALUE_OBJECT == (int) JSV_OBJECT);
1878
60
  compile_time_assert((int) JSON_VALUE_ARRAY == (int) JSV_ARRAY);
1879
60
  compile_time_assert((int) JSON_VALUE_STRING == (int) JSV_STRING);
1880
60
  compile_time_assert((int) JSON_VALUE_NUMBER == (int) JSV_NUMBER);
1881
60
  compile_time_assert((int) JSON_VALUE_TRUE == (int) JSV_TRUE);
1882
60
  compile_time_assert((int) JSON_VALUE_FALSE == (int) JSV_FALSE);
1883
60
  compile_time_assert((int) JSON_VALUE_NULL == (int) JSV_NULL);
1884
1885
60
  return (enum json_types) je->value_type;
1886
1887
884
err_return:
1888
884
  return JSV_BAD_JSON;
1889
505
}
1890
1891
1892
enum json_types json_type(const char *js, const char *js_end,
1893
                          const char **value, int *value_len)
1894
0
{
1895
0
  json_engine_t je;
1896
1897
0
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1898
0
                  (const uchar *) js_end);
1899
1900
0
  return smart_read_value(&je, value, value_len);
1901
0
}
1902
1903
1904
enum json_types json_get_array_item(const char *js, const char *js_end,
1905
                                    int n_item,
1906
                                    const char **value, int *value_len)
1907
0
{
1908
0
  json_engine_t je;
1909
0
  int c_item= 0;
1910
1911
0
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1912
0
                  (const uchar *) js_end);
1913
1914
0
  if (json_read_value(&je) ||
1915
0
      je.value_type != JSON_VALUE_ARRAY)
1916
0
    goto err_return;
1917
1918
0
  while (!json_scan_next(&je))
1919
0
  {
1920
0
    switch (je.state)
1921
0
    {
1922
0
    case JST_VALUE:
1923
0
      if (c_item == n_item)
1924
0
        return smart_read_value(&je, value, value_len);
1925
1926
0
      if (json_skip_key(&je))
1927
0
        goto err_return;
1928
1929
0
      c_item++;
1930
0
      break;
1931
1932
0
    case JST_ARRAY_END:
1933
0
      *value= (const char *) (je.s.c_str - je.sav_c_len);
1934
0
      *value_len= c_item;
1935
0
      return JSV_NOTHING;
1936
0
    }
1937
0
  }
1938
1939
0
err_return:
1940
0
  return JSV_BAD_JSON;
1941
0
}
1942
1943
1944
/** Simple json lookup for a value by the key.
1945
1946
  Expects JSON object.
1947
  Only scans the 'first level' of the object, not
1948
  the nested structures.
1949
1950
  @param js          [in]       json object to search in
1951
  @param js_end      [in]       end of json string
1952
  @param key         [in]       key to search for
1953
  @param key_end     [in]         - " -
1954
  @param value_start [out]      pointer into js (value or closing })
1955
  @param value_len   [out]      length of the value found or number of keys
1956
1957
  @retval the type of the key value
1958
  @retval JSV_BAD_JSON - syntax error found reading JSON.
1959
                         or not JSON object.
1960
  @retval JSV_NOTHING - no such key found.
1961
*/
1962
enum json_types json_get_object_key(const char *js, const char *js_end,
1963
                                    const char *key,
1964
                                    const char **value, int *value_len)
1965
2.17k
{
1966
2.17k
  const char *key_end= key + strlen(key);
1967
2.17k
  json_engine_t je;
1968
2.17k
  json_string_t key_name;
1969
2.17k
  int n_keys= 0;
1970
1971
2.17k
  json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
1972
1973
2.17k
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1974
2.17k
                  (const uchar *) js_end);
1975
1976
2.17k
  if (json_read_value(&je) ||
1977
2.17k
      je.value_type != JSON_VALUE_OBJECT)
1978
367
    goto err_return;
1979
1980
7.60k
  while (!json_scan_next(&je))
1981
7.44k
  {
1982
7.44k
    switch (je.state)
1983
7.44k
    {
1984
7.43k
    case JST_KEY:
1985
7.43k
      n_keys++;
1986
7.43k
      json_string_set_str(&key_name, (const uchar *) key,
1987
7.43k
                          (const uchar *) key_end);
1988
7.43k
      if (json_key_matches(&je, &key_name))
1989
944
        return smart_read_value(&je, value, value_len);
1990
1991
6.48k
      if (json_skip_key(&je))
1992
697
        goto err_return;
1993
1994
5.79k
      break;
1995
1996
5.79k
    case JST_OBJ_END:
1997
9
      *value= (const char *) (je.s.c_str - je.sav_c_len);
1998
9
      *value_len= n_keys;
1999
9
      return JSV_NOTHING;
2000
7.44k
    }
2001
7.44k
  }
2002
2003
1.22k
err_return:
2004
1.22k
  return JSV_BAD_JSON;
2005
1.81k
}
2006
2007
2008
enum json_types json_get_object_nkey(const char *js __attribute__((unused)),
2009
                                     const char *js_end __attribute__((unused)),
2010
                                     int nkey __attribute__((unused)),
2011
                                     const char **keyname __attribute__((unused)),
2012
                                     const char **keyname_end __attribute__((unused)),
2013
                                     const char **value __attribute__((unused)),
2014
                                     int *value_len __attribute__((unused)))
2015
0
{
2016
0
  json_engine_t je;
2017
0
  int keys_found= 0;
2018
2019
0
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
2020
0
                  (const uchar *) js_end);
2021
2022
0
  if (json_read_value(&je) ||
2023
0
      je.value_type != JSON_VALUE_OBJECT)
2024
0
    goto err_return;
2025
2026
0
  while (!json_scan_next(&je))
2027
0
  {
2028
0
    switch (je.state)
2029
0
    {
2030
0
    case JST_KEY:
2031
0
      if (nkey == keys_found)
2032
0
      {
2033
0
        *keyname= (char *) je.s.c_str;
2034
0
        while (json_read_keyname_chr(&je) == 0)
2035
0
          *keyname_end= (char *) je.s.c_str;
2036
2037
0
        return smart_read_value(&je, value, value_len);
2038
0
      }
2039
2040
0
      keys_found++;
2041
0
      if (json_skip_key(&je))
2042
0
        goto err_return;
2043
2044
0
      break;
2045
2046
0
    case JST_OBJ_END:
2047
0
      return JSV_NOTHING;
2048
0
    }
2049
0
  }
2050
2051
0
err_return:
2052
0
  return JSV_BAD_JSON;
2053
0
}
2054
2055
2056
/** Check if json is valid (well-formed)
2057
2058
  @retval 0 - success, json is well-formed
2059
  @retval 1 - error, json is invalid
2060
*/
2061
int json_valid(const char *js, size_t js_len, CHARSET_INFO *cs)
2062
0
{
2063
0
  json_engine_t je;
2064
0
  json_scan_start(&je, cs, (const uchar *) js, (const uchar *) js + js_len);
2065
0
  while (json_scan_next(&je) == 0) /* no-op */ ;
2066
0
  return je.s.error == 0;
2067
0
}
2068
2069
2070
/*
2071
  Expects the JSON object as an js argument, and the key name.
2072
  Looks for this key in the object and returns
2073
  the location of all the text related to it.
2074
  The text includes the comma, separating this key.
2075
2076
  comma_pos - the hint where the comma is. It is important
2077
       if you plan to replace the key rather than just cut.
2078
    1  - comma is on the left
2079
    2  - comma is on the right.
2080
    0  - no comma at all (the object has just this single key)
2081
 
2082
  if no such key found *key_start is set to NULL.
2083
*/
2084
int json_locate_key(const char *js, const char *js_end,
2085
                    const char *kname,
2086
                    const char **key_start, const char **key_end,
2087
                    int *comma_pos)
2088
2.17k
{
2089
2.17k
  const char *kname_end= kname + strlen(kname);
2090
2.17k
  json_engine_t je;
2091
2.17k
  json_string_t key_name;
2092
2.17k
  int t_next, c_len, match_result;
2093
2094
2.17k
  json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
2095
2096
2.17k
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
2097
2.17k
                  (const uchar *) js_end);
2098
2099
2.17k
  if (json_read_value(&je) ||
2100
2.17k
      je.value_type != JSON_VALUE_OBJECT)
2101
367
    goto err_return;
2102
2103
1.81k
  *key_start= (const char *) je.s.c_str;
2104
1.81k
  *comma_pos= 0;
2105
2106
7.60k
  while (!json_scan_next(&je))
2107
7.44k
  {
2108
7.44k
    switch (je.state)
2109
7.44k
    {
2110
7.43k
    case JST_KEY:
2111
7.43k
      json_string_set_str(&key_name, (const uchar *) kname,
2112
7.43k
                          (const uchar *) kname_end);
2113
7.43k
      match_result= json_key_matches(&je, &key_name);
2114
7.43k
      if (json_skip_key(&je))
2115
1.58k
        goto err_return;
2116
5.85k
      get_first_nonspace(&je.s, &t_next, &c_len);
2117
5.85k
      je.s.c_str-= c_len;
2118
2119
5.85k
      if (match_result)
2120
60
      {
2121
60
        *key_end= (const char *) je.s.c_str;
2122
2123
60
        if (*comma_pos == 1)
2124
2
          return 0;
2125
2126
58
        DBUG_ASSERT(*comma_pos == 0);
2127
2128
58
        if (t_next == C_COMMA)
2129
1
        {
2130
1
          *key_end+= c_len;
2131
1
          *comma_pos= 2;
2132
1
        }
2133
57
        else if (t_next == C_RCURB)
2134
1
          *comma_pos= 0;
2135
56
        else
2136
56
          goto err_return;
2137
2
        return 0;
2138
58
      }
2139
2140
5.79k
      *key_start= (const char *) je.s.c_str;
2141
5.79k
      *comma_pos= 1;
2142
5.79k
      break;
2143
2144
9
    case JST_OBJ_END:
2145
9
      *key_start= NULL;
2146
9
      return 0;
2147
7.44k
    }
2148
7.44k
  }
2149
2150
2.16k
err_return:
2151
2.16k
  return 1;
2152
2153
1.81k
}