Coverage Report

Created: 2025-07-04 07:08

/src/server/strings/json_lib.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (c) 2016, 2022, MariaDB Corporation.
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
15
16
#include <my_global.h>
17
#include <string.h>
18
#include <m_ctype.h>
19
#include "json_lib.h"
20
21
/*
22
  JSON escaping lets user specify UTF16 codes of characters.
23
  So we're going to need the UTF16 charset capabilities. Let's import
24
  them from the utf16 charset.
25
*/
26
int my_utf16_uni(CHARSET_INFO *cs,
27
                 my_wc_t *pwc, const uchar *s, const uchar *e);
28
int my_uni_utf16(CHARSET_INFO *cs, my_wc_t wc, uchar *s, uchar *e);
29
30
31
void json_string_set_str(json_string_t *s,
32
                         const uchar *str, const uchar *end)
33
19.0k
{
34
19.0k
  s->c_str= str;
35
19.0k
  s->str_end= end;
36
19.0k
}
37
38
39
void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs)
40
8.72k
{
41
8.72k
  s->cs= i_cs;
42
8.72k
  s->error= 0;
43
8.72k
  s->wc= i_cs->cset->mb_wc;
44
8.72k
}
45
46
47
static void json_string_setup(json_string_t *s,
48
                              CHARSET_INFO *i_cs, const uchar *str,
49
                              const uchar *end)
50
4.36k
{
51
4.36k
  json_string_set_cs(s, i_cs);
52
4.36k
  json_string_set_str(s, str, end);
53
4.36k
}
54
55
56
enum json_char_classes {
57
  C_EOS,    /* end of string */
58
  C_LCURB,  /* {  */
59
  C_RCURB,  /* } */
60
  C_LSQRB,  /* [ */
61
  C_RSQRB,  /* ] */
62
  C_COLON,  /* : */
63
  C_COMMA,  /* , */
64
  C_QUOTE,  /* " */
65
  C_DIGIT,  /* -0123456789 */
66
  C_LOW_F,  /* 'f' (for "false") */
67
  C_LOW_N,  /* 'n' (for "null") */
68
  C_LOW_T,  /* 't' (for "true") */
69
  C_ETC,    /* everything else */
70
  C_ERR,    /* character disallowed in JSON */
71
  C_BAD,    /* invalid character, charset handler cannot read it */
72
  NR_C_CLASSES, /* Counter for classes that handled with functions. */
73
  C_SPACE   /* space. Doesn't need specific handlers, so after the counter.*/
74
};
75
76
77
/*
78
  This array maps first 128 Unicode Code Points into classes.
79
  The remaining Unicode characters should be mapped to C_ETC.
80
*/
81
82
static enum json_char_classes json_chr_map[128] = {
83
  C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
84
  C_ERR,   C_SPACE, C_SPACE, C_ERR,   C_ERR,   C_SPACE, C_ERR,   C_ERR,
85
  C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
86
  C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
87
88
  C_SPACE, C_ETC,   C_QUOTE, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
89
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_COMMA, C_DIGIT, C_ETC,   C_ETC,
90
  C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
91
  C_DIGIT, C_DIGIT, C_COLON, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
92
93
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
94
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
95
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
96
  C_ETC,   C_ETC,   C_ETC,   C_LSQRB, C_ETC,   C_RSQRB, C_ETC,   C_ETC,
97
98
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_F, C_ETC,
99
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_N, C_ETC,
100
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_T, C_ETC,   C_ETC,   C_ETC,
101
  C_ETC,   C_ETC,   C_ETC,   C_LCURB, C_ETC,   C_RCURB, C_ETC,   C_ETC
102
};
103
104
105
/*
106
  JSON parser actually has more states than the 'enum json_states'
107
  declares. But the rest of the states aren't seen to the user so let's
108
  specify them here to avoid confusion.
109
*/
110
111
enum json_all_states {
112
  JST_DONE= NR_JSON_USER_STATES,         /* ok to finish     */
113
  JST_OBJ_CONT= NR_JSON_USER_STATES+1,   /* object continues */
114
  JST_ARRAY_CONT= NR_JSON_USER_STATES+2, /* array continues  */
115
  JST_READ_VALUE= NR_JSON_USER_STATES+3, /* value is being read */
116
  NR_JSON_STATES= NR_JSON_USER_STATES+4
117
};
118
119
120
typedef int (*json_state_handler)(json_engine_t *);
121
122
123
/* The string is broken. */
124
static int unexpected_eos(json_engine_t *j)
125
511
{
126
511
  j->s.error= JE_EOS;
127
511
  return 1;
128
511
}
129
130
131
/* This symbol here breaks the JSON syntax. */
132
static int syntax_error(json_engine_t *j)
133
478
{
134
478
  j->s.error= JE_SYN;
135
478
  return 1;
136
478
}
137
138
139
/* Value of object. */
140
static int mark_object(json_engine_t *j)
141
2.09k
{
142
2.09k
  j->state= JST_OBJ_START;
143
2.09k
  if (++j->stack_p < JSON_DEPTH_LIMIT)
144
2.09k
  {
145
2.09k
    j->stack[j->stack_p]= JST_OBJ_CONT;
146
2.09k
    return 0;
147
2.09k
  }
148
6
  j->s.error= JE_DEPTH;
149
6
  return 1;
150
2.09k
}
151
152
153
/* Read value of object. */
154
static int read_obj(json_engine_t *j)
155
4.46k
{
156
4.46k
  j->state= JST_OBJ_START;
157
4.46k
  j->value_type= JSON_VALUE_OBJECT;
158
4.46k
  j->value= j->value_begin;
159
4.46k
  if (++j->stack_p < JSON_DEPTH_LIMIT)
160
4.46k
  {
161
4.46k
    j->stack[j->stack_p]= JST_OBJ_CONT;
162
4.46k
    return 0;
163
4.46k
  }
164
0
  j->s.error= JE_DEPTH;
165
0
  return 1;
166
4.46k
}
167
168
169
/* Value of array. */
170
static int mark_array(json_engine_t *j)
171
1.54k
{
172
1.54k
  j->state= JST_ARRAY_START;
173
1.54k
  if (++j->stack_p < JSON_DEPTH_LIMIT)
174
1.53k
  {
175
1.53k
    j->stack[j->stack_p]= JST_ARRAY_CONT;
176
1.53k
    j->value= j->value_begin;
177
1.53k
    return 0;
178
1.53k
  }
179
6
  j->s.error= JE_DEPTH;
180
6
  return 1;
181
1.54k
}
182
183
/* Read value of object. */
184
static int read_array(json_engine_t *j)
185
1.44k
{
186
1.44k
  j->state= JST_ARRAY_START;
187
1.44k
  j->value_type= JSON_VALUE_ARRAY;
188
1.44k
  j->value= j->value_begin;
189
1.44k
  if (++j->stack_p < JSON_DEPTH_LIMIT)
190
1.44k
  {
191
1.44k
    j->stack[j->stack_p]= JST_ARRAY_CONT;
192
1.44k
    return 0;
193
1.44k
  }
194
0
  j->s.error= JE_DEPTH;
195
0
  return 1;
196
1.44k
}
197
198
199
200
/*
201
  Character classes inside the JSON string constant.
202
  We mostly need this to parse escaping properly.
203
  Escapings available in JSON are:
204
  \" - quotation mark
205
  \\ - backslash
206
  \b - backspace UNICODE 8
207
  \f - formfeed UNICODE 12
208
  \n - newline UNICODE 10
209
  \r - carriage return UNICODE 13
210
  \t - horizontal tab UNICODE 9
211
  \u{four-hex-digits} - code in UCS16 character set
212
*/
213
enum json_string_char_classes {
214
  S_0= 0,
215
  S_1= 1,
216
  S_2= 2,
217
  S_3= 3,
218
  S_4= 4,
219
  S_5= 5,
220
  S_6= 6,
221
  S_7= 7,
222
  S_8= 8,
223
  S_9= 9,
224
  S_A= 10,
225
  S_B= 11,
226
  S_C= 12,
227
  S_D= 13,
228
  S_E= 14,
229
  S_F= 15,
230
  S_ETC= 36,    /* rest of characters. */
231
  S_QUOTE= 37,
232
  S_BKSL= 38, /* \ */
233
  S_ERR= 100,   /* disallowed */
234
};
235
236
237
/* This maps characters to their types inside a string constant. */
238
static enum json_string_char_classes json_instr_chr_map[128] = {
239
  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
240
  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
241
  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
242
  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
243
244
  S_ETC,   S_ETC,   S_QUOTE, S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
245
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
246
  S_0,     S_1,     S_2,     S_3,     S_4,     S_5,     S_6,     S_7,
247
  S_8,     S_9,     S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
248
249
  S_ETC,   S_A,     S_B,     S_C,     S_D,     S_E,     S_F,     S_ETC,
250
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
251
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
252
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_BKSL,  S_ETC,   S_ETC,   S_ETC,
253
254
  S_ETC,   S_A,     S_B,     S_C,     S_D,     S_E,     S_F,     S_ETC,
255
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
256
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
257
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC
258
};
259
260
261
static int read_4_hexdigits(json_string_t *s, uchar *dest)
262
7.69k
{
263
7.69k
  int i, t, c_len;
264
27.9k
  for (i=0; i<4; i++)
265
24.2k
  {
266
24.2k
    if ((c_len= json_next_char(s)) <= 0)
267
2.22k
      return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
268
269
22.0k
    if (s->c_next >= 128 || (t= json_instr_chr_map[s->c_next]) > S_F)
270
1.82k
      return s->error= JE_SYN;
271
272
20.2k
    s->c_str+= c_len;
273
20.2k
    dest[i/2]+= (i % 2) ? t : t*16;
274
20.2k
  }
275
3.64k
  return 0;
276
7.69k
}
277
278
279
static int json_handle_esc(json_string_t *s)
280
12.0k
{
281
12.0k
  int t, c_len;
282
  
283
12.0k
  if ((c_len= json_next_char(s)) <= 0)
284
824
    return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
285
286
11.1k
  s->c_str+= c_len;
287
11.1k
  switch (s->c_next)
288
11.1k
  {
289
490
    case 'b':
290
490
      s->c_next= 8;
291
490
      return 0;
292
250
    case 'f':
293
250
      s->c_next= 12;
294
250
      return 0;
295
598
    case 'n':
296
598
      s->c_next= 10;
297
598
      return 0;
298
252
    case 'r':
299
252
      s->c_next= 13;
300
252
      return 0;
301
396
    case 't':
302
396
      s->c_next= 9;
303
396
      return 0;
304
11.1k
  }
305
306
9.19k
  if (s->c_next < 128 && (t= json_instr_chr_map[s->c_next]) == S_ERR)
307
240
  {
308
240
    s->c_str-= c_len;
309
240
    return s->error= JE_ESCAPING;
310
240
  }
311
312
313
8.95k
  if (s->c_next != 'u')
314
2.36k
    return 0;
315
316
6.59k
  {
317
    /*
318
      Read the four-hex-digits code.
319
      If symbol is not in the Basic Multilingual Plane, we're reading
320
      the string for the next four digits to compose the UTF-16 surrogate pair.
321
    */
322
6.59k
    uchar code[4]= {0,0,0,0};
323
324
6.59k
    if (read_4_hexdigits(s, code))
325
3.47k
      return 1;
326
327
3.12k
    if ((c_len= my_utf16_uni(0, &s->c_next, code, code+2)) == 2)
328
272
      return 0;
329
330
2.85k
    if (c_len != MY_CS_TOOSMALL4)
331
194
      return s->error= JE_BAD_CHR;
332
333
2.66k
    if ((c_len= json_next_char(s)) <= 0)
334
836
      return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
335
1.82k
    if (s->c_next != '\\')
336
244
      return s->error= JE_SYN;
337
338
1.58k
    s->c_str+= c_len;
339
1.58k
    if ((c_len= json_next_char(s)) <= 0)
340
206
      return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
341
1.37k
    if (s->c_next != 'u')
342
274
      return s->error= JE_SYN;
343
1.10k
    s->c_str+= c_len;
344
345
1.10k
    if (read_4_hexdigits(s, code+2))
346
584
      return 1;
347
348
516
    if ((c_len= my_utf16_uni(0, &s->c_next, code, code+4)) == 4)
349
302
      return 0;
350
516
  }
351
214
  return s->error= JE_BAD_CHR;
352
516
}
353
354
355
int json_read_string_const_chr(json_string_t *js)
356
16.2k
{
357
16.2k
  int c_len;
358
359
16.2k
  if ((c_len= json_next_char(js)) > 0)
360
14.6k
  {
361
14.6k
    js->c_str+= c_len;
362
14.6k
    return (js->c_next == '\\') ? json_handle_esc(js) : 0;
363
14.6k
  }
364
1.57k
  js->error= json_eos(js) ? JE_EOS : JE_BAD_CHR; 
365
1.57k
  return 1;
366
16.2k
}
367
368
369
static int skip_str_constant(json_engine_t *j)
370
1.02k
{
371
1.02k
  int t, c_len;
372
1.02k
  for (;;)
373
2.13k
  {
374
2.13k
    if ((c_len= json_next_char(&j->s)) > 0)
375
2.02k
    {
376
2.02k
      j->s.c_str+= c_len;
377
2.02k
      if (j->s.c_next >= 128 || ((t=json_instr_chr_map[j->s.c_next]) <= S_ETC))
378
800
        continue;
379
380
1.22k
      if (j->s.c_next == '"')
381
864
        break;
382
356
      if (j->s.c_next == '\\')
383
334
      {
384
334
        j->value_escaped= 1;
385
334
        if (json_handle_esc(&j->s))
386
16
          return 1;
387
318
        continue;
388
334
      }
389
      /* Symbol not allowed in JSON. */
390
22
      return j->s.error= JE_NOT_JSON_CHR;
391
356
    }
392
118
    else
393
118
      return j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR; 
394
2.13k
  }
395
396
864
  j->state= j->stack[j->stack_p];
397
864
  return 0;
398
1.02k
}
399
400
401
/* Scalar string. */
402
static int v_string(json_engine_t *j)
403
602
{
404
602
  return skip_str_constant(j) || json_scan_next(j);
405
602
}
406
407
408
/* Read scalar string. */
409
static int read_strn(json_engine_t *j)
410
418
{
411
418
  j->value= j->s.c_str;
412
418
  j->value_type= JSON_VALUE_STRING;
413
418
  j->value_escaped= 0;
414
415
418
  if (skip_str_constant(j))
416
136
    return 1;
417
418
282
  j->state= j->stack[j->stack_p];
419
282
  j->value_len= (int)(j->s.c_str - j->value) - j->s.c_next_len;
420
282
  return 0;
421
418
}
422
423
424
/*
425
  We have dedicated parser for numeric constants. It's similar
426
  to the main JSON parser, we similarly define character classes,
427
  map characters to classes and implement the state-per-class
428
  table. Though we don't create functions that handle
429
  particular classes, just specify what new state should parser
430
  get in this case.
431
*/
432
enum json_num_char_classes {
433
  N_MINUS,
434
  N_PLUS,
435
  N_ZERO,
436
  N_DIGIT,
437
  N_POINT,
438
  N_E,
439
  N_END,
440
  N_EEND,
441
  N_ERR,
442
  N_NUM_CLASSES
443
};
444
445
446
static enum json_num_char_classes json_num_chr_map[128] = {
447
  N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
448
  N_ERR,   N_END,   N_END,   N_ERR,   N_ERR,   N_END,   N_ERR,   N_ERR,
449
  N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
450
  N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
451
452
  N_END,   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
453
  N_EEND,  N_EEND,  N_EEND,  N_PLUS,  N_END,   N_MINUS, N_POINT, N_EEND,
454
  N_ZERO,  N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT,
455
  N_DIGIT, N_DIGIT, N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
456
457
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_E,     N_EEND,  N_EEND,
458
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
459
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
460
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_END,   N_EEND,  N_EEND,
461
462
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_E,     N_EEND,  N_EEND,
463
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
464
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
465
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,   N_END,   N_EEND,  N_EEND,
466
};
467
468
469
enum json_num_states {
470
  NS_OK,  /* Number ended. */
471
  NS_GO,  /* Initial state. */
472
  NS_GO1, /* If the number starts with '-'. */
473
  NS_Z,   /* If the number starts with '0'. */
474
  NS_Z1,  /* If the numbers starts with '-0'. */
475
  NS_INT, /* Integer part. */
476
  NS_FRAC,/* Fractional part. */
477
  NS_EX,  /* Exponential part begins. */
478
  NS_EX1, /* Exponential part continues. */
479
  NS_NUM_STATES
480
};
481
482
483
static int json_num_states[NS_NUM_STATES][N_NUM_CLASSES]=
484
{
485
/*         -        +       0         1..9    POINT    E       END_OK   ERROR */
486
/*OK*/   { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
487
/*GO*/   { NS_GO1,  JE_SYN, NS_Z,     NS_INT, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
488
/*GO1*/  { JE_SYN,  JE_SYN, NS_Z1,    NS_INT, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
489
/*ZERO*/ { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, NS_FRAC, NS_EX,  NS_OK,  JE_BAD_CHR },
490
/*ZE1*/  { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, NS_FRAC, NS_EX,  NS_OK,  JE_BAD_CHR },
491
/*INT*/  { JE_SYN,  JE_SYN, NS_INT,   NS_INT, NS_FRAC, NS_EX,  NS_OK,  JE_BAD_CHR },
492
/*FRAC*/ { JE_SYN,  JE_SYN, NS_FRAC,  NS_FRAC,JE_SYN,  NS_EX,  NS_OK,  JE_BAD_CHR },
493
/*EX*/   { NS_EX,   NS_EX,  NS_EX1,   NS_EX1, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR }, 
494
/*EX1*/  { JE_SYN,  JE_SYN, NS_EX1,   NS_EX1, JE_SYN,  JE_SYN, NS_OK,  JE_BAD_CHR }
495
};
496
497
498
static uint json_num_state_flags[NS_NUM_STATES]=
499
{
500
/*OK*/   0,
501
/*GO*/   0,
502
/*GO1*/  JSON_NUM_NEG,
503
/*ZERO*/ 0,
504
/*ZE1*/  0,
505
/*INT*/  0,
506
/*FRAC*/ JSON_NUM_FRAC_PART,
507
/*EX*/   JSON_NUM_EXP,
508
/*EX1*/  0,
509
};
510
511
512
static int skip_num_constant(json_engine_t *j)
513
10.7k
{
514
10.7k
  int state= json_num_states[NS_GO][json_num_chr_map[j->s.c_next]];
515
10.7k
  int c_len;
516
517
10.7k
  j->num_flags= 0;
518
10.7k
  for (;;)
519
11.1k
  {
520
11.1k
    j->num_flags|= json_num_state_flags[state];
521
11.1k
    if ((c_len= json_next_char(&j->s)) > 0 && j->s.c_next < 128)
522
11.0k
    {
523
11.0k
      if ((state= json_num_states[state][json_num_chr_map[j->s.c_next]]) > 0)
524
384
      {
525
384
        j->s.c_str+= c_len;
526
384
        continue;
527
384
      }
528
10.6k
      break;
529
11.0k
    }
530
531
148
    if ((j->s.error=
532
148
          json_eos(&j->s) ? json_num_states[state][N_END] : JE_BAD_CHR) < 0)
533
78
      return 1;
534
70
    else
535
70
      break;
536
148
  }
537
538
10.7k
  j->state= j->stack[j->stack_p];
539
10.7k
  return 0;
540
10.7k
}
541
542
543
/* Scalar numeric. */
544
static int v_number(json_engine_t *j)
545
994
{
546
994
  return skip_num_constant(j) || json_scan_next(j);
547
994
}
548
549
550
/* Read numeric constant. */
551
static int read_num(json_engine_t *j)
552
9.79k
{
553
9.79k
  j->value= j->value_begin;
554
9.79k
  if (skip_num_constant(j) == 0)
555
9.75k
  {
556
9.75k
    j->value_type= JSON_VALUE_NUMBER;
557
9.75k
    j->value_len= (int)(j->s.c_str - j->value_begin);
558
9.75k
    return 0;
559
9.75k
  }
560
40
  return 1;
561
9.79k
}
562
563
564
/* Check that the JSON string matches the argument and skip it. */
565
static int skip_string_verbatim(json_string_t *s, const char *str)
566
3.42k
{
567
3.42k
  int c_len;
568
12.5k
  while (*str)
569
10.1k
  {
570
10.1k
    if ((c_len= json_next_char(s)) > 0)
571
9.92k
    {
572
9.92k
      if (s->c_next == (my_wc_t) *(str++))
573
9.09k
      {
574
9.09k
        s->c_str+= c_len;
575
9.09k
        continue;
576
9.09k
      }
577
832
      return s->error= JE_SYN;
578
9.92k
    }
579
210
    return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR; 
580
10.1k
  }
581
582
2.38k
  return 0;
583
3.42k
}
584
585
586
/* Scalar false. */
587
static int v_false(json_engine_t *j)
588
714
{
589
714
  if (skip_string_verbatim(&j->s, "alse"))
590
232
   return 1;
591
482
  j->state= j->stack[j->stack_p];
592
482
  return json_scan_next(j);
593
714
}
594
595
596
/* Scalar null. */
597
static int v_null(json_engine_t *j)
598
810
{
599
810
  if (skip_string_verbatim(&j->s, "ull"))
600
182
   return 1;
601
628
  j->state= j->stack[j->stack_p];
602
628
  return json_scan_next(j);
603
810
}
604
605
606
/* Scalar true. */
607
static int v_true(json_engine_t *j)
608
526
{
609
526
  if (skip_string_verbatim(&j->s, "rue"))
610
186
   return 1;
611
340
  j->state= j->stack[j->stack_p];
612
340
  return json_scan_next(j);
613
526
}
614
615
616
/* Read false. */
617
static int read_false(json_engine_t *j)
618
486
{
619
486
  j->value_type= JSON_VALUE_FALSE;
620
486
  j->value= j->value_begin;
621
486
  j->state= j->stack[j->stack_p];
622
486
  j->value_len= 5;
623
486
  return skip_string_verbatim(&j->s, "alse");
624
486
}
625
626
627
/* Read null. */
628
static int read_null(json_engine_t *j)
629
418
{
630
418
  j->value_type= JSON_VALUE_NULL;
631
418
  j->value= j->value_begin;
632
418
  j->state= j->stack[j->stack_p];
633
418
  j->value_len= 4;
634
418
  return skip_string_verbatim(&j->s, "ull");
635
418
}
636
637
638
/* Read true. */
639
static int read_true(json_engine_t *j)
640
474
{
641
474
  j->value_type= JSON_VALUE_TRUE;
642
474
  j->value= j->value_begin;
643
474
  j->state= j->stack[j->stack_p];
644
474
  j->value_len= 4;
645
474
  return skip_string_verbatim(&j->s, "rue");
646
474
}
647
648
649
/* Disallowed character. */
650
static int not_json_chr(json_engine_t *j)
651
38
{
652
38
  j->s.error= JE_NOT_JSON_CHR;
653
38
  return 1;
654
38
}
655
656
657
/* Bad character. */
658
static int bad_chr(json_engine_t *j)
659
151
{
660
151
  j->s.error= JE_BAD_CHR;
661
151
  return 1;
662
151
}
663
664
665
/* Correct finish. */
666
static int done(json_engine_t *j  __attribute__((unused)))
667
0
{
668
0
  return 1;
669
0
}
670
671
672
/* End of the object. */
673
static int end_object(json_engine_t *j)
674
1.47k
{
675
1.47k
  j->stack_p--;
676
1.47k
  j->state= JST_OBJ_END;
677
1.47k
  return 0;
678
1.47k
}
679
680
681
/* End of the array. */
682
static int end_array(json_engine_t *j)
683
874
{
684
874
  j->stack_p--;
685
874
  j->state= JST_ARRAY_END;
686
874
  return 0;
687
874
}
688
689
690
/* Start reading key name. */
691
static int read_keyname(json_engine_t *j)
692
5.46k
{
693
5.46k
  j->state= JST_KEY;
694
5.46k
  return 0;
695
5.46k
}
696
697
698
static void get_first_nonspace(json_string_t *js, int *t_next, int *c_len)
699
72.0k
{
700
72.0k
  do
701
84.0k
  {
702
84.0k
    if ((*c_len= json_next_char(js)) <= 0)
703
835
      *t_next= json_eos(js) ? C_EOS : C_BAD;
704
83.2k
    else
705
83.2k
    {
706
83.2k
      *t_next= (js->c_next < 128) ? json_chr_map[js->c_next] : C_ETC;
707
83.2k
      js->c_str+= *c_len;
708
83.2k
    }
709
84.0k
  } while (*t_next == C_SPACE);
710
72.0k
}
711
712
713
/* Next key name. */
714
static int next_key(json_engine_t *j)
715
12.0k
{
716
12.0k
  int t_next, c_len;
717
12.0k
  get_first_nonspace(&j->s, &t_next, &c_len);
718
719
12.0k
  if (t_next == C_QUOTE)
720
11.9k
  {
721
11.9k
    j->state= JST_KEY;
722
11.9k
    return 0;
723
11.9k
  }
724
725
96
  j->s.error= (t_next == C_EOS)  ? JE_EOS :
726
96
              ((t_next == C_BAD) ? JE_BAD_CHR :
727
56
                                   JE_SYN);
728
96
  return 1;
729
12.0k
}
730
731
732
/* Forward declarations. */
733
static int skip_colon(json_engine_t *j);
734
static int skip_key(json_engine_t *j);
735
static int struct_end_cb(json_engine_t *j);
736
static int struct_end_qb(json_engine_t *j);
737
static int struct_end_cm(json_engine_t *j);
738
static int struct_end_eos(json_engine_t *j);
739
740
741
static int next_item(json_engine_t *j)
742
2.40k
{
743
2.40k
  j->state= JST_VALUE;
744
2.40k
  return 0;
745
2.40k
}
746
747
748
static int array_item(json_engine_t *j)
749
2.61k
{
750
2.61k
  j->state= JST_VALUE;
751
2.61k
  j->s.c_str-= j->sav_c_len;
752
2.61k
  return 0;
753
2.61k
}
754
755
756
static json_state_handler json_actions[NR_JSON_STATES][NR_C_CLASSES]=
757
/*
758
   EOS              {            }             [             ]
759
   :                ,            "             -0..9         f
760
   n                t              ETC          ERR           BAD
761
*/
762
{
763
  {/*VALUE*/
764
    unexpected_eos, mark_object, syntax_error, mark_array,   syntax_error,
765
    syntax_error,   syntax_error,v_string,     v_number,     v_false,
766
    v_null,         v_true,       syntax_error, not_json_chr, bad_chr},
767
  {/*KEY*/
768
    unexpected_eos, skip_key,    skip_key,     skip_key,     skip_key,
769
    skip_key,       skip_key,    skip_colon,   skip_key,     skip_key,
770
    skip_key,       skip_key,     skip_key,     not_json_chr, bad_chr},
771
  {/*OBJ_START*/
772
    unexpected_eos, syntax_error, end_object,  syntax_error, syntax_error,
773
    syntax_error,   syntax_error, read_keyname, syntax_error, syntax_error,
774
    syntax_error,   syntax_error,   syntax_error,    not_json_chr, bad_chr},
775
  {/*OBJ_END*/
776
    struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb,
777
    syntax_error,   struct_end_cm,syntax_error,  syntax_error, syntax_error,
778
    syntax_error,   syntax_error,  syntax_error,    not_json_chr, bad_chr},
779
  {/*ARRAY_START*/
780
    unexpected_eos, array_item,   syntax_error, array_item,   end_array,
781
    syntax_error,   syntax_error, array_item,  array_item,  array_item,
782
    array_item,    array_item,    syntax_error,    not_json_chr, bad_chr},
783
  {/*ARRAY_END*/
784
    struct_end_eos, syntax_error, struct_end_cb, syntax_error,  struct_end_qb,
785
    syntax_error,   struct_end_cm, syntax_error, syntax_error,  syntax_error,
786
    syntax_error,   syntax_error,  syntax_error,    not_json_chr, bad_chr},
787
  {/*DONE*/
788
    done,           syntax_error, syntax_error, syntax_error, syntax_error,
789
    syntax_error,   syntax_error, syntax_error, syntax_error, syntax_error,
790
    syntax_error,   syntax_error, syntax_error, not_json_chr, bad_chr},
791
  {/*OBJ_CONT*/
792
    unexpected_eos, syntax_error, end_object,    syntax_error,   syntax_error,
793
    syntax_error,   next_key,     syntax_error,  syntax_error,   syntax_error,
794
    syntax_error,    syntax_error,    syntax_error,    not_json_chr, bad_chr},
795
  {/*ARRAY_CONT*/
796
    unexpected_eos, syntax_error, syntax_error,  syntax_error, end_array,
797
    syntax_error,   next_item,    syntax_error,  syntax_error, syntax_error,
798
    syntax_error,    syntax_error,    syntax_error,    not_json_chr, bad_chr},
799
  {/*READ_VALUE*/
800
    unexpected_eos, read_obj,     syntax_error,  read_array,    syntax_error,
801
    syntax_error,   syntax_error, read_strn,     read_num,      read_false,
802
    read_null,      read_true,    syntax_error,    not_json_chr, bad_chr},
803
};
804
805
806
807
int json_scan_start(json_engine_t *je,
808
                    CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
809
4.36k
{
810
4.36k
  static const uchar no_time_to_die= 0;
811
812
4.36k
  json_string_setup(&je->s, i_cs, str, end);
813
4.36k
  je->stack[0]= JST_DONE;
814
4.36k
  je->stack_p= 0;
815
4.36k
  je->state= JST_VALUE;
816
4.36k
  je->killed_ptr = (uchar*)&no_time_to_die;
817
4.36k
  return 0;
818
4.36k
}
819
820
821
/* Skip colon and the value. */
822
static int skip_colon(json_engine_t *j)
823
782
{
824
782
  int t_next, c_len;
825
826
782
  get_first_nonspace(&j->s, &t_next, &c_len);
827
828
782
  if (t_next == C_COLON)
829
694
  {
830
694
    get_first_nonspace(&j->s, &t_next, &c_len);
831
694
    return json_actions[JST_VALUE][t_next](j);
832
694
 }
833
834
88
  j->s.error= (t_next == C_EOS)  ? JE_EOS :
835
88
              ((t_next == C_BAD) ? JE_BAD_CHR:
836
64
                                   JE_SYN);
837
838
88
  return 1;
839
782
}
840
841
842
/* Skip colon and the value. */
843
static int skip_key(json_engine_t *j)
844
1.86k
{
845
1.86k
  int t_next, c_len;
846
847
1.86k
  if (j->s.c_next<128 && json_instr_chr_map[j->s.c_next] == S_BKSL &&
848
1.86k
      json_handle_esc(&j->s))
849
10
    return 1;
850
851
2.27k
  while (json_read_keyname_chr(j) == 0) {}
852
853
1.85k
  if (j->s.error)
854
102
    return 1;
855
856
1.75k
  get_first_nonspace(&j->s, &t_next, &c_len);
857
1.75k
  return json_actions[JST_VALUE][t_next](j);
858
1.85k
}
859
860
861
/*
862
  Handle EOS after the end of an object or array.
863
  To do that we should pop the stack to see if
864
  we are inside an object, or an array, and
865
  run our 'state machine' accordingly.
866
*/
867
static int struct_end_eos(json_engine_t *j)
868
35
{ return json_actions[j->stack[j->stack_p]][C_EOS](j); }
869
870
871
/*
872
  Handle '}' after the end of an object or array.
873
  To do that we should pop the stack to see if
874
  we are inside an object, or an array, and
875
  run our 'state machine' accordingly.
876
*/
877
static int struct_end_cb(json_engine_t *j)
878
548
{ return json_actions[j->stack[j->stack_p]][C_RCURB](j); }
879
880
881
/*
882
  Handle ']' after the end of an object or array.
883
  To do that we should pop the stack to see if
884
  we are inside an object, or an array, and
885
  run our 'state machine' accordingly.
886
*/
887
static int struct_end_qb(json_engine_t *j)
888
602
{ return json_actions[j->stack[j->stack_p]][C_RSQRB](j); }
889
890
891
/*
892
  Handle ',' after the end of an object or array.
893
  To do that we should pop the stack to see if
894
  we are inside an object, or an array, and
895
  run our 'state machine' accordingly.
896
*/
897
static int struct_end_cm(json_engine_t *j)
898
1.10k
{ return json_actions[j->stack[j->stack_p]][C_COMMA](j); }
899
900
901
int json_read_keyname_chr(json_engine_t *j)
902
29.5k
{
903
29.5k
  int c_len, t;
904
905
29.5k
  if ((c_len= json_next_char(&j->s)) > 0)
906
27.0k
  {
907
27.0k
    j->s.c_str+= c_len;
908
27.0k
    if (j->s.c_next>= 128 || (t= json_instr_chr_map[j->s.c_next]) <= S_ETC)
909
10.4k
      return 0;
910
911
16.6k
    switch (t)
912
16.6k
    {
913
15.1k
    case S_QUOTE:
914
15.1k
      for (;;)  /* Skip spaces until ':'. */
915
15.3k
      {
916
15.3k
        if ((c_len= json_next_char(&j->s)) > 0)
917
15.3k
        {
918
15.3k
          if (j->s.c_next == ':')
919
15.0k
          {
920
15.0k
            j->s.c_str+= c_len;
921
15.0k
            j->state= JST_VALUE;
922
15.0k
            return 1;
923
15.0k
          }
924
925
316
          if (j->s.c_next < 128 && json_chr_map[j->s.c_next] == C_SPACE)
926
208
          {
927
208
            j->s.c_str+= c_len;
928
208
            continue;
929
208
          }
930
108
          j->s.error= JE_SYN;
931
108
          break;
932
316
        }
933
18
        j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
934
18
        break;
935
15.3k
      }
936
126
      return 1;
937
1.44k
    case S_BKSL:
938
1.44k
      return json_handle_esc(&j->s);
939
32
    case S_ERR:
940
32
      j->s.c_str-= c_len;
941
32
      j->s.error= JE_STRING_CONST;
942
32
      return 1;
943
16.6k
    }
944
16.6k
  }
945
2.44k
  j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR; 
946
2.44k
  return 1;
947
29.5k
}
948
949
950
int json_read_value(json_engine_t *j)
951
19.0k
{
952
19.0k
  int t_next, c_len, res;
953
954
19.0k
  j->value_type= JSON_VALUE_UNINITIALIZED;
955
19.0k
  if (j->state == JST_KEY)
956
8.18k
  {
957
10.9k
    while (json_read_keyname_chr(j) == 0) {}
958
959
8.18k
    if (j->s.error)
960
1.41k
      return 1;
961
8.18k
  }
962
963
17.6k
  get_first_nonspace(&j->s, &t_next, &c_len);
964
965
17.6k
  j->value_begin= j->s.c_str-c_len;
966
17.6k
  res= json_actions[JST_READ_VALUE][t_next](j);
967
17.6k
  j->value_end= j->s.c_str;
968
17.6k
  return res;
969
19.0k
}
970
971
972
int json_scan_next(json_engine_t *j)
973
33.3k
{
974
33.3k
  int t_next;
975
976
33.3k
  get_first_nonspace(&j->s, &t_next, &j->sav_c_len);
977
33.3k
  return *j->killed_ptr || json_actions[j->state][t_next](j);
978
33.3k
}
979
980
981
enum json_path_chr_classes {
982
  P_EOS,    /* end of string */
983
  P_USD,    /* $ */
984
  P_ASTER,  /* * */
985
  P_LSQRB,  /* [ */
986
  P_T,      /* t (for to) */
987
  P_RSQRB,  /* ] */
988
  P_POINT,  /* . */
989
  P_NEG,    /* hyphen (for negative index in path) */
990
  P_ZERO,   /* 0 */
991
  P_DIGIT,  /* 123456789 */
992
  P_L,      /* l (for "lax") */
993
  P_S,      /* s (for "strict") */
994
  P_SPACE,  /* space */
995
  P_BKSL,   /* \ */
996
  P_QUOTE,  /* " */
997
  P_ETC,    /* everything else */
998
  P_ERR,    /* character disallowed in JSON*/
999
  P_BAD,    /* invalid character */
1000
  N_PATH_CLASSES,
1001
};
1002
1003
1004
static enum json_path_chr_classes json_path_chr_map[128] = {
1005
  P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
1006
  P_ERR,   P_SPACE, P_SPACE, P_ERR,   P_ERR,   P_SPACE, P_ERR,   P_ERR,
1007
  P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
1008
  P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
1009
1010
  P_SPACE, P_ETC,   P_QUOTE, P_ETC,   P_USD,   P_ETC,   P_ETC,   P_ETC,
1011
  P_ETC,   P_ETC,   P_ASTER, P_ETC,   P_ETC,   P_NEG,   P_POINT, P_ETC,
1012
  P_ZERO,  P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT,
1013
  P_DIGIT, P_DIGIT, P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1014
1015
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1016
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_L,     P_ETC,   P_ETC,   P_ETC,
1017
  P_ETC,   P_ETC,   P_S,     P_ETC,   P_T,   P_ETC,   P_ETC,   P_ETC,
1018
  P_ETC,   P_ETC,   P_ETC,   P_LSQRB, P_BKSL, P_RSQRB, P_ETC,   P_ETC,
1019
1020
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1021
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_L,     P_ETC,   P_ETC,   P_ETC,
1022
  P_ETC,   P_ETC,   P_S,     P_ETC,   P_T,   P_ETC,   P_ETC,   P_ETC,
1023
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC
1024
};
1025
1026
1027
enum json_path_states {
1028
  PS_GO,  /* Initial state. */
1029
  PS_LAX, /* Parse the 'lax' keyword. */
1030
  PS_PT,  /* New path's step begins. */
1031
  PS_AR,  /* Parse array step. */
1032
  PS_SAR, /* space after the '['. */
1033
  PS_AWD, /* Array wildcard. */
1034
  PS_NEG,  /*  Parse '-' (hyphen) */
1035
  PS_Z,   /* '0' (as an array item number). */
1036
  PS_INT, /* Parse integer (as an array item number). */
1037
  PS_AS,  /* Space. */
1038
  PS_KEY, /* Key. */
1039
  PS_KNM, /* Parse key name. */
1040
  PS_KWD, /* Key wildcard. */
1041
  PS_AST, /* Asterisk. */
1042
  PS_DWD, /* Double wildcard. */
1043
  PS_KEYX, /* Key started with quote ("). */
1044
  PS_KNMX, /* Parse quoted key name. */
1045
  PS_LAST, /* Parse 'last' keyword */
1046
  PS_T,    /* Parse 'to' keyword. */
1047
  N_PATH_STATES, /* Below are states that aren't in the transitions table. */
1048
  PS_SCT,  /* Parse the 'strict' keyword. */
1049
  PS_EKY,  /* '.' after the keyname so next step is the key. */
1050
  PS_EKYX, /* Closing " for the quoted keyname. */
1051
  PS_EAR,  /* '[' after the keyname so next step is the array. */
1052
  PS_ESC,  /* Escaping in the keyname. */
1053
  PS_ESCX, /* Escaping in the quoted keyname. */
1054
  PS_OK,   /* Path normally ended. */
1055
  PS_KOK   /* EOS after the keyname so end the path normally. */
1056
};
1057
1058
1059
static int json_path_transitions[N_PATH_STATES][N_PATH_CLASSES]=
1060
{
1061
/*
1062
            EOS       $,      *       [       to       ]       .       -
1063
            0       1..9    L       S       SPACE   \       "       ETC
1064
            ERR              BAD
1065
*/
1066
/* GO  */ { JE_EOS, PS_PT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1067
            JE_SYN, JE_SYN, PS_LAX, PS_SCT, PS_GO, JE_SYN, JE_SYN, JE_SYN,
1068
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1069
/* LAX */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1070
            JE_SYN, JE_SYN, PS_LAX, JE_SYN, PS_GO, JE_SYN, JE_SYN, JE_SYN,
1071
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1072
/* PT */  { PS_OK, JE_SYN, PS_AST, PS_AR, JE_SYN, JE_SYN, PS_KEY, JE_SYN,
1073
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1074
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1075
/* AR */  { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_NEG,
1076
            PS_Z, PS_INT, PS_LAST, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1077
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1078
/* SAR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1079
            PS_Z, PS_INT, PS_LAST, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1080
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1081
/* AWD */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1082
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1083
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1084
/* NEG */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1085
            PS_INT, PS_INT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1086
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1087
/* Z */   { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1088
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1089
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1090
/* INT */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1091
            PS_INT, PS_INT, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1092
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1093
/* AS */  { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_T, PS_PT, JE_SYN, PS_NEG,
1094
            PS_Z, PS_INT, PS_LAST, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1095
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1096
/* KEY */ { JE_EOS, PS_KNM, PS_KWD, JE_SYN, PS_KNM, PS_KNM, JE_SYN, PS_KNM,
1097
            PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_KNM, JE_SYN, PS_KEYX, PS_KNM,
1098
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1099
/* KNM */ { PS_KOK, PS_KNM, PS_AST, PS_EAR, PS_KNM, PS_KNM, PS_EKY, PS_KNM,
1100
            PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_ESC, PS_KNM, PS_KNM,
1101
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1102
/* KWD */ { PS_OK, JE_SYN, JE_SYN, PS_AR, JE_SYN, JE_SYN, PS_EKY, JE_SYN,
1103
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1104
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1105
/* AST */ { JE_SYN, JE_SYN, PS_DWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1106
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1107
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1108
/* DWD */ { JE_SYN, JE_SYN, PS_AST, PS_AR, JE_SYN, JE_SYN, PS_KEY, JE_SYN,
1109
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1110
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1111
/* KEYX*/ { JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, JE_SYN,
1112
            PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_ESCX, PS_EKYX, PS_KNMX,
1113
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1114
/* KNMX */{ JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
1115
            PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_ESCX, PS_EKYX, PS_KNMX,
1116
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1117
/* LAST */{ JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_NEG,
1118
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1119
            JE_SYN, JE_BAD_CHR},
1120
/* T */   { JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_NEG,
1121
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1122
            JE_SYN, JE_BAD_CHR},
1123
};
1124
1125
1126
int json_path_setup(json_path_t *p,
1127
                    CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
1128
0
{
1129
0
 int c_len, t_next, state= PS_GO, is_negative_index= 0, is_last= 0,
1130
0
  prev_value=0, is_to= 0, *cur_val;
1131
0
  enum json_path_step_types double_wildcard= JSON_PATH_KEY_NULL;
1132
0
  json_string_setup(&p->s, i_cs, str, end);
1133
1134
0
  p->steps[0].type= JSON_PATH_ARRAY_WILD;
1135
0
  p->last_step= p->steps;
1136
0
  p->mode_strict= FALSE;
1137
0
  p->types_used= JSON_PATH_KEY_NULL;
1138
1139
0
  do
1140
0
  {
1141
0
    if ((c_len= json_next_char(&p->s)) <= 0)
1142
0
      t_next= json_eos(&p->s) ? P_EOS : P_BAD;
1143
0
    else
1144
0
      t_next= (p->s.c_next >= 128) ? P_ETC : json_path_chr_map[p->s.c_next];
1145
1146
0
    if ((state= json_path_transitions[state][t_next]) < 0)
1147
0
      return p->s.error= state;
1148
1149
0
    p->s.c_str+= c_len;
1150
1151
0
    switch (state)
1152
0
    {
1153
0
    case PS_LAX:
1154
0
      if ((p->s.error= skip_string_verbatim(&p->s, "ax")))
1155
0
        return 1;
1156
0
      p->mode_strict= FALSE;
1157
0
      continue;
1158
0
    case PS_SCT:
1159
0
      if ((p->s.error= skip_string_verbatim(&p->s, "rict")))
1160
0
        return 1;
1161
0
      p->mode_strict= TRUE;
1162
0
      state= PS_LAX;
1163
0
      continue;
1164
0
    case PS_KWD:
1165
0
    case PS_AWD:
1166
0
      p->last_step->type|= JSON_PATH_WILD;
1167
0
      p->types_used|= JSON_PATH_WILD;
1168
0
      continue;
1169
0
    case PS_INT:
1170
0
      cur_val= is_to ? &(p->last_step->n_item_end) :
1171
0
                       &(p->last_step->n_item);
1172
0
      if (is_last)
1173
0
      {
1174
0
        prev_value*= 10;
1175
0
        prev_value-= p->s.c_next - '0';
1176
0
        *cur_val= -1 + prev_value;
1177
0
      }
1178
0
      else
1179
0
      {
1180
0
        (*cur_val)*= 10;
1181
0
        if (is_negative_index)
1182
0
          *cur_val-= p->s.c_next - '0';
1183
0
        else
1184
0
          *cur_val+= p->s.c_next - '0';
1185
0
      }
1186
0
      continue;
1187
0
    case PS_EKYX:
1188
0
      p->last_step->key_end= p->s.c_str - c_len;
1189
0
      state= PS_PT;
1190
0
      continue;
1191
0
    case PS_EKY:
1192
0
      p->last_step->key_end= p->s.c_str - c_len;
1193
0
      state= PS_KEY;
1194
      /* fall through */
1195
0
    case PS_KEY:
1196
0
      p->last_step++;
1197
0
      is_to= 0;
1198
0
      prev_value= 0;
1199
0
      is_negative_index= 0;
1200
0
      is_last= 0;
1201
0
      if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1202
0
        return p->s.error= JE_DEPTH;
1203
0
      p->types_used|= p->last_step->type= JSON_PATH_KEY | double_wildcard;
1204
0
      double_wildcard= JSON_PATH_KEY_NULL;
1205
      /* fall through */
1206
0
    case PS_KEYX:
1207
0
      p->last_step->key= p->s.c_str;
1208
0
      continue;
1209
0
    case PS_EAR:
1210
0
      p->last_step->key_end= p->s.c_str - c_len;
1211
0
      state= PS_AR;
1212
      /* fall through */
1213
0
    case PS_AR:
1214
0
      p->last_step++;
1215
0
      is_last= 0;
1216
0
      is_to= 0;
1217
0
      prev_value= 0;
1218
0
      is_negative_index= 0;
1219
0
      if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1220
0
        return p->s.error= JE_DEPTH;
1221
0
      p->types_used|= p->last_step->type= JSON_PATH_ARRAY | double_wildcard;
1222
0
      double_wildcard= JSON_PATH_KEY_NULL;
1223
0
      p->last_step->n_item= 0;
1224
0
      continue;
1225
0
    case PS_ESC:
1226
0
      if (json_handle_esc(&p->s))
1227
0
        return 1;
1228
0
      state= PS_KNM;
1229
0
      continue;
1230
0
    case PS_ESCX:
1231
0
      if (json_handle_esc(&p->s))
1232
0
        return 1;
1233
0
      state= PS_KNMX;
1234
0
      continue;
1235
0
    case PS_KOK:
1236
0
      p->last_step->key_end= p->s.c_str - c_len;
1237
0
      state= PS_OK;
1238
0
      break; /* 'break' as the loop supposed to end after that. */
1239
0
    case PS_DWD:
1240
0
      double_wildcard= JSON_PATH_DOUBLE_WILD;
1241
0
      continue;
1242
0
    case PS_NEG:
1243
0
       p->types_used|= JSON_PATH_NEGATIVE_INDEX;
1244
0
       is_negative_index= 1;
1245
0
       continue;
1246
0
    case PS_LAST:
1247
0
      if ((p->s.error= skip_string_verbatim(&p->s, "ast")))
1248
0
       return 1;
1249
0
      p->types_used|= JSON_PATH_NEGATIVE_INDEX;
1250
0
      is_last= 1;
1251
0
      if (is_to)
1252
0
        p->last_step->n_item_end= -1;
1253
0
      else
1254
0
        p->last_step->n_item= -1;
1255
0
      continue;
1256
0
    case PS_T:
1257
0
      if ((p->s.error= skip_string_verbatim(&p->s, "o")))
1258
0
        return 1;
1259
0
      is_to= 1;
1260
0
      is_negative_index= 0;
1261
0
      is_last= 0;
1262
0
      prev_value= 0;
1263
0
      p->last_step->n_item_end= 0;
1264
0
      p->last_step->type|= JSON_PATH_ARRAY_RANGE;
1265
0
      p->types_used|= JSON_PATH_ARRAY_RANGE;
1266
0
      continue;
1267
0
    };
1268
0
  } while (state != PS_OK);
1269
1270
0
  return double_wildcard ? (p->s.error= JE_SYN) : 0;
1271
0
}
1272
1273
1274
int json_skip_to_level(json_engine_t *j, int level)
1275
2.30k
{
1276
15.9k
  do {
1277
15.9k
    if (j->stack_p < level)
1278
630
      return 0;
1279
15.9k
  } while (json_scan_next(j) == 0);
1280
1281
1.67k
  return 1;
1282
2.30k
}
1283
1284
1285
/*
1286
  works as json_skip_level() but also counts items on the current
1287
  level skipped.
1288
*/
1289
int json_skip_level_and_count(json_engine_t *j, int *n_items_skipped)
1290
0
{
1291
0
  int level= j->stack_p;
1292
1293
0
  *n_items_skipped= 0;
1294
0
  while (json_scan_next(j) == 0)
1295
0
  {
1296
0
    if (j->stack_p < level)
1297
0
      return 0;
1298
0
    if (j->stack_p == level && j->state == JST_VALUE)
1299
0
      (*n_items_skipped)++;
1300
0
  }
1301
1302
0
  return 1;
1303
0
}
1304
1305
1306
int json_skip_array_and_count(json_engine_t *je, int *n_items)
1307
0
{
1308
0
  json_engine_t j= *je;
1309
0
  *n_items= 0;
1310
1311
0
  return json_skip_level_and_count(&j, n_items); 
1312
0
}
1313
1314
1315
int json_skip_key(json_engine_t *j)
1316
13.7k
{
1317
13.7k
  if (json_read_value(j))
1318
1.04k
    return 1;
1319
1320
12.7k
  if (json_value_scalar(j))
1321
10.8k
    return 0;
1322
1323
1.82k
  return json_skip_level(j);
1324
12.7k
}
1325
1326
1327
0
#define SKIPPED_STEP_MARK INT_MAX32
1328
1329
/*
1330
  Current step of the patch matches the JSON construction.
1331
  Now we should either stop the search or go to the next
1332
  step of the path.
1333
*/
1334
static int handle_match(json_engine_t *je, json_path_t *p,
1335
                        json_path_step_t **p_cur_step, int *array_counters)
1336
0
{
1337
0
  json_path_step_t *next_step= *p_cur_step + 1;
1338
1339
0
  DBUG_ASSERT(*p_cur_step < p->last_step);
1340
1341
0
  if (json_read_value(je))
1342
0
    return 1;
1343
1344
0
  if (json_value_scalar(je))
1345
0
  {
1346
0
    while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0)
1347
0
    {
1348
0
      if (++next_step > p->last_step)
1349
0
      {
1350
0
        je->s.c_str= je->value_begin;
1351
0
        return 1;
1352
0
      }
1353
0
    }
1354
0
    return 0;
1355
0
  }
1356
1357
0
  if (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0 &&
1358
0
      je->value_type & JSON_VALUE_OBJECT)
1359
0
  {
1360
0
    do
1361
0
    {
1362
0
      array_counters[next_step - p->steps]= SKIPPED_STEP_MARK;
1363
0
      if (++next_step > p->last_step)
1364
0
      {
1365
0
        je->s.c_str= je->value_begin;
1366
0
        je->stack_p--;
1367
0
        return 1;
1368
0
      }
1369
0
    } while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0);
1370
0
  }
1371
1372
0
  if ((int) je->value_type !=
1373
0
      (int) (next_step->type & JSON_PATH_KEY_OR_ARRAY))
1374
0
    return json_skip_level(je);
1375
1376
0
  array_counters[next_step - p->steps]= 0;
1377
0
  if (next_step->type & JSON_PATH_ARRAY)
1378
0
  {
1379
0
    int array_size;
1380
0
    if (next_step->n_item >= 0)
1381
0
      array_size= 0;
1382
0
    else
1383
0
    {
1384
0
      json_engine_t j2= *je;
1385
0
      if (json_skip_array_and_count(&j2, &array_size))
1386
0
      {
1387
0
        *je= j2;
1388
0
        return 1;
1389
0
      }
1390
0
      array_size= -array_size;
1391
0
    }
1392
0
    array_counters[next_step - p->steps]= array_size;
1393
0
  }
1394
1395
0
  *p_cur_step= next_step;
1396
0
  return 0;
1397
0
}
1398
1399
1400
/*
1401
  Check if the name of the current JSON key matches
1402
  the step of the path.
1403
*/
1404
int json_key_matches(json_engine_t *je, json_string_t *k)
1405
14.7k
{
1406
16.2k
  while (json_read_keyname_chr(je) == 0)
1407
8.50k
  {
1408
8.50k
    if (json_read_string_const_chr(k) ||
1409
8.50k
        je->s.c_next != k->c_next)
1410
6.97k
      return 0;
1411
8.50k
  }
1412
1413
7.74k
  return json_read_string_const_chr(k);
1414
14.7k
}
1415
1416
1417
int json_find_path(json_engine_t *je,
1418
                   json_path_t *p, json_path_step_t **p_cur_step,
1419
                   int *array_counters)
1420
0
{
1421
0
  json_string_t key_name;
1422
0
  int res= 0;
1423
1424
0
  json_string_set_cs(&key_name, p->s.cs);
1425
1426
0
  do
1427
0
  {
1428
0
    json_path_step_t *cur_step= *p_cur_step;
1429
0
    switch (je->state)
1430
0
    {
1431
0
    case JST_KEY:
1432
0
      DBUG_ASSERT(cur_step->type & JSON_PATH_KEY);
1433
0
      if (!(cur_step->type & JSON_PATH_WILD))
1434
0
      {
1435
0
        json_string_set_str(&key_name, cur_step->key, cur_step->key_end);
1436
0
        if (!json_key_matches(je, &key_name))
1437
0
        {
1438
0
          if (json_skip_key(je))
1439
0
            goto exit;
1440
0
          continue;
1441
0
        }
1442
0
      }
1443
0
      if (cur_step == p->last_step ||
1444
0
          handle_match(je, p, p_cur_step, array_counters))
1445
0
        goto exit;
1446
0
      break;
1447
0
    case JST_VALUE:
1448
0
      DBUG_ASSERT(cur_step->type & JSON_PATH_ARRAY);
1449
0
      if (cur_step->type & JSON_PATH_ARRAY_RANGE)
1450
0
      {
1451
0
        res= (cur_step->n_item <= array_counters[cur_step - p->steps] &&
1452
0
              cur_step->n_item_end >= array_counters[cur_step - p->steps]);
1453
0
        array_counters[cur_step - p->steps]++;
1454
0
      }
1455
0
      else
1456
0
        res= cur_step->n_item == array_counters[cur_step - p->steps]++;
1457
0
      if ((cur_step->type & JSON_PATH_WILD) || res)
1458
0
      {
1459
        /* Array item matches. */
1460
0
        if (cur_step == p->last_step ||
1461
0
            handle_match(je, p, p_cur_step, array_counters))
1462
0
          goto exit;
1463
0
      }
1464
0
      else
1465
0
        json_skip_array_item(je);
1466
0
      break;
1467
0
    case JST_OBJ_END:
1468
0
      do
1469
0
      {
1470
0
        (*p_cur_step)--;
1471
0
      } while (*p_cur_step > p->steps &&
1472
0
               array_counters[*p_cur_step - p->steps] == SKIPPED_STEP_MARK);
1473
0
      break;
1474
0
    case JST_ARRAY_END:
1475
0
      (*p_cur_step)--;
1476
0
      break;
1477
0
    default:
1478
0
      DBUG_ASSERT(0);
1479
0
      break;
1480
0
    };
1481
0
  } while (json_scan_next(je) == 0);
1482
1483
  /* No luck. */
1484
0
  return 1;
1485
1486
0
exit:
1487
0
  return je->s.error;
1488
0
}
1489
1490
1491
int json_find_paths_first(json_engine_t *je, json_find_paths_t *state,
1492
                          uint n_paths, json_path_t *paths, uint *path_depths)
1493
0
{
1494
0
  state->n_paths= n_paths;
1495
0
  state->paths= paths;
1496
0
  state->cur_depth= 0;
1497
0
  state->path_depths= path_depths;
1498
0
  return json_find_paths_next(je, state);
1499
0
}
1500
1501
1502
int json_find_paths_next(json_engine_t *je, json_find_paths_t *state)
1503
0
{
1504
0
  uint p_c;
1505
0
  int path_found, no_match_found;
1506
0
  do
1507
0
  {
1508
0
    switch (je->state)
1509
0
    {
1510
0
    case JST_KEY:
1511
0
      path_found= FALSE;
1512
0
      no_match_found= TRUE;
1513
0
      for (p_c=0; p_c < state->n_paths; p_c++)
1514
0
      {
1515
0
        json_path_step_t *cur_step;
1516
0
        if (state->path_depths[p_c] <
1517
0
              state->cur_depth /* Path already failed. */ ||
1518
0
            !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1519
0
              JSON_PATH_KEY))
1520
0
          continue;
1521
1522
0
        if (!(cur_step->type & JSON_PATH_WILD))
1523
0
        {
1524
0
          json_string_t key_name;
1525
0
          json_string_setup(&key_name, state->paths[p_c].s.cs,
1526
0
                            cur_step->key, cur_step->key_end);
1527
0
          if (!json_key_matches(je, &key_name))
1528
0
            continue;
1529
0
        }
1530
0
        if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1531
0
          path_found= TRUE;
1532
0
        else
1533
0
        {
1534
0
          no_match_found= FALSE;
1535
0
          state->path_depths[p_c]= state->cur_depth + 1;
1536
0
        }
1537
0
      }
1538
0
      if (path_found)
1539
        /* Return the result. */
1540
0
        goto exit;
1541
0
      if (no_match_found)
1542
0
      {
1543
        /* No possible paths left to check. Just skip the level. */
1544
0
        if (json_skip_level(je))
1545
0
          goto exit;
1546
0
      }
1547
1548
0
      break;
1549
0
    case JST_VALUE:
1550
0
      path_found= FALSE;
1551
0
      no_match_found= TRUE;
1552
0
      for (p_c=0; p_c < state->n_paths; p_c++)
1553
0
      {
1554
0
        json_path_step_t *cur_step;
1555
0
        if (state->path_depths[p_c]< state->cur_depth /* Path already failed. */ ||
1556
0
            !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1557
0
              JSON_PATH_ARRAY))
1558
0
          continue;
1559
0
        if (cur_step->type & JSON_PATH_WILD ||
1560
0
            cur_step->n_item == state->array_counters[state->cur_depth])
1561
0
        {
1562
          /* Array item matches. */
1563
0
          if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1564
0
            path_found= TRUE;
1565
0
          else
1566
0
          {
1567
0
            no_match_found= FALSE;
1568
0
            state->path_depths[p_c]= state->cur_depth + 1;
1569
0
          }
1570
0
        }
1571
0
      }
1572
1573
0
      if (path_found)
1574
0
        goto exit;
1575
1576
0
      if (no_match_found)
1577
0
        json_skip_array_item(je);
1578
1579
0
      state->array_counters[state->cur_depth]++;
1580
0
      break;
1581
0
    case JST_OBJ_START:
1582
0
    case JST_ARRAY_START:
1583
0
      for (p_c=0; p_c < state->n_paths; p_c++)
1584
0
      {
1585
0
        if (state->path_depths[p_c] < state->cur_depth)
1586
          /* Path already failed. */
1587
0
          continue;
1588
0
        if (state->paths[p_c].steps[state->cur_depth].type &
1589
0
            ((je->state == JST_OBJ_START) ? JSON_PATH_KEY : JSON_PATH_ARRAY))
1590
0
          state->path_depths[p_c]++;
1591
0
      }
1592
0
      state->cur_depth++;
1593
0
      break;
1594
0
    case JST_OBJ_END:
1595
0
    case JST_ARRAY_END:
1596
0
      for (p_c=0; p_c < state->n_paths; p_c++)
1597
0
      {
1598
0
        if (state->path_depths[p_c] < state->cur_depth)
1599
0
          continue;
1600
0
        state->path_depths[p_c]--;
1601
0
      }
1602
0
      state->cur_depth--;
1603
0
      break;
1604
0
    default:
1605
0
      DBUG_ASSERT(0);
1606
0
      break;
1607
0
    };
1608
0
  } while (json_scan_next(je) == 0);
1609
1610
  /* No luck. */
1611
0
  return 1;
1612
1613
0
exit:
1614
0
  return je->s.error;
1615
0
}
1616
1617
1618
int json_append_ascii(CHARSET_INFO *json_cs,
1619
                      uchar *json, uchar *json_end,
1620
                      const uchar *ascii, const uchar *ascii_end)
1621
0
{
1622
0
  const uchar *json_start= json;
1623
0
  while (ascii < ascii_end)
1624
0
  {
1625
0
    int c_len;
1626
0
    if ((c_len= my_ci_wc_mb(json_cs, (my_wc_t) *ascii, json, json_end)) > 0)
1627
0
    {
1628
0
      json+= c_len;
1629
0
      ascii++;
1630
0
      continue;
1631
0
    }
1632
1633
    /* Error return. */
1634
0
    return c_len;
1635
0
  }
1636
1637
0
  return (int)(json - json_start);
1638
0
}
1639
1640
1641
int json_unescape(CHARSET_INFO *json_cs,
1642
                  const uchar *json_str, const uchar *json_end,
1643
                  CHARSET_INFO *res_cs, uchar *res, uchar *res_end)
1644
0
{
1645
0
  json_string_t s;
1646
0
  const uchar *res_b= res;
1647
1648
0
  json_string_setup(&s, json_cs, json_str, json_end);
1649
0
  while (json_read_string_const_chr(&s) == 0)
1650
0
  {
1651
0
    int c_len;
1652
0
    if ((c_len= my_ci_wc_mb(res_cs, s.c_next, res, res_end)) > 0)
1653
0
    {
1654
0
      res+= c_len;
1655
0
      continue;
1656
0
    }
1657
0
    if (c_len == MY_CS_ILUNI)
1658
0
    {
1659
0
      return -1;
1660
0
    }
1661
    /* Result buffer is too small. */
1662
0
    return -1;
1663
0
  }
1664
1665
0
  return s.error==JE_EOS ? (int)(res - res_b) : -1;
1666
0
}
1667
1668
1669
/* When we need to replace a character with the escaping. */
1670
enum json_esc_char_classes {
1671
  ESC_= 0,    /* No need to escape. */
1672
  ESC_U= 'u', /* Character not allowed in JSON. Always escape as \uXXXX. */
1673
  ESC_B= 'b', /* Backspace. Escape as \b */
1674
  ESC_F= 'f', /* Formfeed. Escape as \f */
1675
  ESC_N= 'n', /* Newline. Escape as \n */
1676
  ESC_R= 'r', /* Return. Escape as \r */
1677
  ESC_T= 't', /* Tab. Escape as \s */
1678
  ESC_BS= '\\'  /* Backslash or '"'. Escape by the \\ prefix. */
1679
};
1680
1681
1682
/* This specifies how we should escape the character. */
1683
static enum json_esc_char_classes json_escape_chr_map[0x60] = {
1684
  ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1685
  ESC_B,   ESC_T,   ESC_N,   ESC_U,   ESC_F,   ESC_R,   ESC_U,   ESC_U,
1686
  ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1687
  ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1688
1689
  ESC_,    ESC_,    ESC_BS,  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1690
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1691
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1692
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1693
1694
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1695
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1696
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1697
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_BS,  ESC_,    ESC_,    ESC_,
1698
};
1699
1700
1701
static const char hexconv[17] = "0123456789ABCDEF";
1702
1703
1704
int json_escape(CHARSET_INFO *str_cs,
1705
                const uchar *str, const uchar *str_end,
1706
                CHARSET_INFO *json_cs, uchar *json, uchar *json_end)
1707
0
{
1708
0
  const uchar *json_start= json;
1709
1710
0
  while (str < str_end)
1711
0
  {
1712
0
    my_wc_t c_chr;
1713
0
    int c_len;
1714
0
    if ((c_len= my_ci_mb_wc(str_cs, &c_chr, str, str_end)) > 0)
1715
0
    {
1716
0
      enum json_esc_char_classes c_class;
1717
      
1718
0
      str+= c_len;
1719
0
      if (c_chr >= 0x60 || (c_class= json_escape_chr_map[c_chr]) == ESC_)
1720
0
      {
1721
0
        if ((c_len= my_ci_wc_mb(json_cs, c_chr, json, json_end)) > 0)
1722
0
        {
1723
0
          json+= c_len;
1724
0
          continue;
1725
0
        }
1726
0
        if (c_len < 0)
1727
0
        {
1728
          /* JSON buffer is depleted. */
1729
0
          return JSON_ERROR_OUT_OF_SPACE;
1730
0
        }
1731
1732
        /* JSON charset cannot convert this character. */
1733
0
        c_class= ESC_U;
1734
0
      }
1735
1736
0
      if ((c_len= my_ci_wc_mb(json_cs, '\\', json, json_end)) <= 0 ||
1737
0
          (c_len= my_ci_wc_mb(json_cs, (c_class == ESC_BS) ? c_chr : c_class,
1738
0
                                       json+= c_len, json_end)) <= 0)
1739
0
      {
1740
        /* JSON buffer is depleted. */
1741
0
        return JSON_ERROR_OUT_OF_SPACE;
1742
0
      }
1743
0
      json+= c_len;
1744
1745
0
      if (c_class != ESC_U)
1746
0
        continue;
1747
1748
0
      {
1749
        /* We have to use /uXXXX escaping. */
1750
0
        uchar utf16buf[4];
1751
0
        uchar code_str[8];
1752
0
        int u_len= my_uni_utf16(0, c_chr, utf16buf, utf16buf + 4);
1753
1754
0
        code_str[0]= hexconv[utf16buf[0] >> 4];
1755
0
        code_str[1]= hexconv[utf16buf[0] & 15];
1756
0
        code_str[2]= hexconv[utf16buf[1] >> 4];
1757
0
        code_str[3]= hexconv[utf16buf[1] & 15];
1758
1759
0
        if (u_len > 2)
1760
0
        {
1761
0
          code_str[4]= hexconv[utf16buf[2] >> 4];
1762
0
          code_str[5]= hexconv[utf16buf[2] & 15];
1763
0
          code_str[6]= hexconv[utf16buf[3] >> 4];
1764
0
          code_str[7]= hexconv[utf16buf[3] & 15];
1765
0
        }
1766
        
1767
0
        if ((c_len= json_append_ascii(json_cs, json, json_end,
1768
0
                                      code_str, code_str+u_len*2)) > 0)
1769
0
        {
1770
0
          json+= c_len;
1771
0
          continue;
1772
0
        }
1773
        /* JSON buffer is depleted. */
1774
0
        return JSON_ERROR_OUT_OF_SPACE;
1775
0
      }
1776
0
    }
1777
0
    else /* c_len == 0, an illegal symbol. */
1778
0
      return JSON_ERROR_ILLEGAL_SYMBOL;
1779
0
  }
1780
1781
0
  return (int)(json - json_start);
1782
0
}
1783
1784
1785
int json_get_path_start(json_engine_t *je, CHARSET_INFO *i_cs,
1786
                        const uchar *str, const uchar *end,
1787
                        json_path_t *p)
1788
0
{
1789
0
  json_scan_start(je, i_cs, str, end);
1790
0
  p->last_step= p->steps - 1; 
1791
0
  return 0;
1792
0
}
1793
1794
1795
int json_get_path_next(json_engine_t *je, json_path_t *p)
1796
0
{
1797
0
  if (p->last_step < p->steps)
1798
0
  {
1799
0
    if (json_read_value(je))
1800
0
      return 1;
1801
1802
0
    p->last_step= p->steps;
1803
0
    p->steps[0].type= JSON_PATH_ARRAY_WILD;
1804
0
    p->steps[0].n_item= 0;
1805
0
    return 0;
1806
0
  }
1807
0
  else
1808
0
  {
1809
0
    if (json_value_scalar(je))
1810
0
    {
1811
0
      if (p->last_step->type & JSON_PATH_ARRAY)
1812
0
        p->last_step->n_item++;
1813
0
    }
1814
0
    else
1815
0
    {
1816
0
      p->last_step++;
1817
0
      p->last_step->type= (enum json_path_step_types) je->value_type;
1818
0
      p->last_step->n_item= 0;
1819
0
    }
1820
1821
0
    if (json_scan_next(je))
1822
0
      return 1;
1823
0
  }
1824
1825
0
  do
1826
0
  {
1827
0
    switch (je->state)
1828
0
    {
1829
0
    case JST_KEY:
1830
0
      p->last_step->key= je->s.c_str;
1831
0
      do
1832
0
      {
1833
0
        p->last_step->key_end= je->s.c_str;
1834
0
      } while (json_read_keyname_chr(je) == 0);
1835
0
      if (je->s.error)
1836
0
        return 1;
1837
      /* Now we have je.state == JST_VALUE, so let's handle it. */
1838
1839
      /* fall through */
1840
0
    case JST_VALUE:
1841
0
      if (json_read_value(je))
1842
0
        return 1;
1843
0
      return 0;
1844
0
    case JST_OBJ_END:
1845
0
    case JST_ARRAY_END:
1846
0
      p->last_step--;
1847
0
      if (p->last_step->type & JSON_PATH_ARRAY)
1848
0
        p->last_step->n_item++;
1849
0
      break;
1850
0
    default:
1851
0
      break;
1852
0
    }
1853
0
  } while (json_scan_next(je) == 0);
1854
1855
0
  return 1;
1856
0
}
1857
1858
1859
static enum json_types smart_read_value(json_engine_t *je,
1860
                                        const char **value, int *value_len)
1861
960
{
1862
960
  if (json_read_value(je))
1863
437
    goto err_return;
1864
1865
523
  *value= (char *) je->value;
1866
1867
523
  if (json_value_scalar(je))
1868
43
    *value_len= je->value_len;
1869
480
  else
1870
480
  {
1871
480
    if (json_skip_level(je))
1872
461
      goto err_return;
1873
1874
19
    *value_len= (int) ((char *) je->s.c_str - *value);
1875
19
  }
1876
1877
62
  compile_time_assert((enum json_types)JSON_VALUE_OBJECT == JSV_OBJECT);
1878
62
  compile_time_assert((enum json_types)JSON_VALUE_ARRAY == JSV_ARRAY);
1879
62
  compile_time_assert((enum json_types)JSON_VALUE_STRING == JSV_STRING);
1880
62
  compile_time_assert((enum json_types)JSON_VALUE_NUMBER == JSV_NUMBER);
1881
62
  compile_time_assert((enum json_types)JSON_VALUE_TRUE == JSV_TRUE);
1882
62
  compile_time_assert((enum json_types)JSON_VALUE_FALSE == JSV_FALSE);
1883
62
  compile_time_assert((enum json_types)JSON_VALUE_NULL == JSV_NULL);
1884
1885
62
  return (enum json_types) je->value_type;
1886
1887
898
err_return:
1888
898
  return JSV_BAD_JSON;
1889
523
}
1890
1891
1892
enum json_types json_type(const char *js, const char *js_end,
1893
                          const char **value, int *value_len)
1894
0
{
1895
0
  json_engine_t je;
1896
1897
0
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1898
0
                  (const uchar *) js_end);
1899
1900
0
  return smart_read_value(&je, value, value_len);
1901
0
}
1902
1903
1904
enum json_types json_get_array_item(const char *js, const char *js_end,
1905
                                    int n_item,
1906
                                    const char **value, int *value_len)
1907
0
{
1908
0
  json_engine_t je;
1909
0
  int c_item= 0;
1910
1911
0
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1912
0
                  (const uchar *) js_end);
1913
1914
0
  if (json_read_value(&je) ||
1915
0
      je.value_type != JSON_VALUE_ARRAY)
1916
0
    goto err_return;
1917
1918
0
  while (!json_scan_next(&je))
1919
0
  {
1920
0
    switch (je.state)
1921
0
    {
1922
0
    case JST_VALUE:
1923
0
      if (c_item == n_item)
1924
0
        return smart_read_value(&je, value, value_len);
1925
1926
0
      if (json_skip_key(&je))
1927
0
        goto err_return;
1928
1929
0
      c_item++;
1930
0
      break;
1931
1932
0
    case JST_ARRAY_END:
1933
0
      *value= (const char *) (je.s.c_str - je.sav_c_len);
1934
0
      *value_len= c_item;
1935
0
      return JSV_NOTHING;
1936
0
    }
1937
0
  }
1938
1939
0
err_return:
1940
0
  return JSV_BAD_JSON;
1941
0
}
1942
1943
1944
/** Simple json lookup for a value by the key.
1945
1946
  Expects JSON object.
1947
  Only scans the 'first level' of the object, not
1948
  the nested structures.
1949
1950
  @param js          [in]       json object to search in
1951
  @param js_end      [in]       end of json string
1952
  @param key         [in]       key to search for
1953
  @param key_end     [in]         - " -
1954
  @param value_start [out]      pointer into js (value or closing })
1955
  @param value_len   [out]      length of the value found or number of keys
1956
1957
  @retval the type of the key value
1958
  @retval JSV_BAD_JSON - syntax error found reading JSON.
1959
                         or not JSON object.
1960
  @retval JSV_NOTHING - no such key found.
1961
*/
1962
enum json_types json_get_object_key(const char *js, const char *js_end,
1963
                                    const char *key,
1964
                                    const char **value, int *value_len)
1965
2.18k
{
1966
2.18k
  const char *key_end= key + strlen(key);
1967
2.18k
  json_engine_t je;
1968
2.18k
  json_string_t key_name;
1969
2.18k
  int n_keys= 0;
1970
1971
2.18k
  json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
1972
1973
2.18k
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1974
2.18k
                  (const uchar *) js_end);
1975
1976
2.18k
  if (json_read_value(&je) ||
1977
2.18k
      je.value_type != JSON_VALUE_OBJECT)
1978
379
    goto err_return;
1979
1980
7.52k
  while (!json_scan_next(&je))
1981
7.36k
  {
1982
7.36k
    switch (je.state)
1983
7.36k
    {
1984
7.35k
    case JST_KEY:
1985
7.35k
      n_keys++;
1986
7.35k
      json_string_set_str(&key_name, (const uchar *) key,
1987
7.35k
                          (const uchar *) key_end);
1988
7.35k
      if (json_key_matches(&je, &key_name))
1989
960
        return smart_read_value(&je, value, value_len);
1990
1991
6.39k
      if (json_skip_key(&je))
1992
681
        goto err_return;
1993
1994
5.71k
      break;
1995
1996
5.71k
    case JST_OBJ_END:
1997
8
      *value= (const char *) (je.s.c_str - je.sav_c_len);
1998
8
      *value_len= n_keys;
1999
8
      return JSV_NOTHING;
2000
7.36k
    }
2001
7.36k
  }
2002
2003
1.21k
err_return:
2004
1.21k
  return JSV_BAD_JSON;
2005
1.80k
}
2006
2007
2008
enum json_types json_get_object_nkey(const char *js __attribute__((unused)),
2009
                                     const char *js_end __attribute__((unused)),
2010
                                     int nkey __attribute__((unused)),
2011
                                     const char **keyname __attribute__((unused)),
2012
                                     const char **keyname_end __attribute__((unused)),
2013
                                     const char **value __attribute__((unused)),
2014
                                     int *value_len __attribute__((unused)))
2015
0
{
2016
0
  json_engine_t je;
2017
0
  int keys_found= 0;
2018
2019
0
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
2020
0
                  (const uchar *) js_end);
2021
2022
0
  if (json_read_value(&je) ||
2023
0
      je.value_type != JSON_VALUE_OBJECT)
2024
0
    goto err_return;
2025
2026
0
  while (!json_scan_next(&je))
2027
0
  {
2028
0
    switch (je.state)
2029
0
    {
2030
0
    case JST_KEY:
2031
0
      if (nkey == keys_found)
2032
0
      {
2033
0
        *keyname= (char *) je.s.c_str;
2034
0
        while (json_read_keyname_chr(&je) == 0)
2035
0
          *keyname_end= (char *) je.s.c_str;
2036
2037
0
        return smart_read_value(&je, value, value_len);
2038
0
      }
2039
2040
0
      keys_found++;
2041
0
      if (json_skip_key(&je))
2042
0
        goto err_return;
2043
2044
0
      break;
2045
2046
0
    case JST_OBJ_END:
2047
0
      return JSV_NOTHING;
2048
0
    }
2049
0
  }
2050
2051
0
err_return:
2052
0
  return JSV_BAD_JSON;
2053
0
}
2054
2055
2056
/** Check if json is valid (well-formed)
2057
2058
  @retval 0 - success, json is well-formed
2059
  @retval 1 - error, json is invalid
2060
*/
2061
int json_valid(const char *js, size_t js_len, CHARSET_INFO *cs)
2062
0
{
2063
0
  json_engine_t je;
2064
0
  json_scan_start(&je, cs, (const uchar *) js, (const uchar *) js + js_len);
2065
0
  while (json_scan_next(&je) == 0) /* no-op */ ;
2066
0
  return je.s.error == 0;
2067
0
}
2068
2069
2070
/*
2071
  Expects the JSON object as an js argument, and the key name.
2072
  Looks for this key in the object and returns
2073
  the location of all the text related to it.
2074
  The text includes the comma, separating this key.
2075
2076
  comma_pos - the hint where the comma is. It is important
2077
       if you plan to replace the key rather than just cut.
2078
    1  - comma is on the left
2079
    2  - comma is on the right.
2080
    0  - no comma at all (the object has just this single key)
2081
 
2082
  if no such key found *key_start is set to NULL.
2083
*/
2084
int json_locate_key(const char *js, const char *js_end,
2085
                    const char *kname,
2086
                    const char **key_start, const char **key_end,
2087
                    int *comma_pos)
2088
2.18k
{
2089
2.18k
  const char *kname_end= kname + strlen(kname);
2090
2.18k
  json_engine_t je;
2091
2.18k
  json_string_t key_name;
2092
2.18k
  int t_next, c_len, match_result;
2093
2094
2.18k
  json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
2095
2096
2.18k
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
2097
2.18k
                  (const uchar *) js_end);
2098
2099
2.18k
  if (json_read_value(&je) ||
2100
2.18k
      je.value_type != JSON_VALUE_OBJECT)
2101
379
    goto err_return;
2102
2103
1.80k
  *key_start= (const char *) je.s.c_str;
2104
1.80k
  *comma_pos= 0;
2105
2106
7.52k
  while (!json_scan_next(&je))
2107
7.36k
  {
2108
7.36k
    switch (je.state)
2109
7.36k
    {
2110
7.35k
    case JST_KEY:
2111
7.35k
      json_string_set_str(&key_name, (const uchar *) kname,
2112
7.35k
                          (const uchar *) kname_end);
2113
7.35k
      match_result= json_key_matches(&je, &key_name);
2114
7.35k
      if (json_skip_key(&je))
2115
1.57k
        goto err_return;
2116
5.78k
      get_first_nonspace(&je.s, &t_next, &c_len);
2117
5.78k
      je.s.c_str-= c_len;
2118
2119
5.78k
      if (match_result)
2120
62
      {
2121
62
        *key_end= (const char *) je.s.c_str;
2122
2123
62
        if (*comma_pos == 1)
2124
2
          return 0;
2125
2126
60
        DBUG_ASSERT(*comma_pos == 0);
2127
2128
60
        if (t_next == C_COMMA)
2129
1
        {
2130
1
          *key_end+= c_len;
2131
1
          *comma_pos= 2;
2132
1
        }
2133
59
        else if (t_next == C_RCURB)
2134
1
          *comma_pos= 0;
2135
58
        else
2136
58
          goto err_return;
2137
2
        return 0;
2138
60
      }
2139
2140
5.71k
      *key_start= (const char *) je.s.c_str;
2141
5.71k
      *comma_pos= 1;
2142
5.71k
      break;
2143
2144
8
    case JST_OBJ_END:
2145
8
      *key_start= NULL;
2146
8
      return 0;
2147
7.36k
    }
2148
7.36k
  }
2149
2150
2.17k
err_return:
2151
2.17k
  return 1;
2152
2153
1.80k
}