Coverage Report

Created: 2024-06-18 07:03

/src/server/strings/json_lib.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (c) 2016, 2022, MariaDB Corporation.
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
15
16
#include <my_global.h>
17
#include <string.h>
18
#include <m_ctype.h>
19
#include "json_lib.h"
20
21
/*
22
  JSON escaping lets user specify UTF16 codes of characters.
23
  So we're going to need the UTF16 charset capabilities. Let's import
24
  them from the utf16 charset.
25
*/
26
int my_utf16_uni(CHARSET_INFO *cs,
27
                 my_wc_t *pwc, const uchar *s, const uchar *e);
28
int my_uni_utf16(CHARSET_INFO *cs, my_wc_t wc, uchar *s, uchar *e);
29
30
31
void json_string_set_str(json_string_t *s,
32
                         const uchar *str, const uchar *end)
33
19.5k
{
34
19.5k
  s->c_str= str;
35
19.5k
  s->str_end= end;
36
19.5k
}
37
38
39
void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs)
40
8.52k
{
41
8.52k
  s->cs= i_cs;
42
8.52k
  s->error= 0;
43
8.52k
  s->wc= i_cs->cset->mb_wc;
44
8.52k
}
45
46
47
static void json_string_setup(json_string_t *s,
48
                              CHARSET_INFO *i_cs, const uchar *str,
49
                              const uchar *end)
50
4.26k
{
51
4.26k
  json_string_set_cs(s, i_cs);
52
4.26k
  json_string_set_str(s, str, end);
53
4.26k
}
54
55
56
enum json_char_classes {
57
  C_EOS,    /* end of string */
58
  C_LCURB,  /* {  */
59
  C_RCURB,  /* } */
60
  C_LSQRB,  /* [ */
61
  C_RSQRB,  /* ] */
62
  C_COLON,  /* : */
63
  C_COMMA,  /* , */
64
  C_QUOTE,  /* " */
65
  C_DIGIT,  /* -0123456789 */
66
  C_LOW_F,  /* 'f' (for "false") */
67
  C_LOW_N,  /* 'n' (for "null") */
68
  C_LOW_T,  /* 't' (for "true") */
69
  C_ETC,    /* everything else */
70
  C_ERR,    /* character disallowed in JSON */
71
  C_BAD,    /* invalid character, charset handler cannot read it */
72
  NR_C_CLASSES, /* Counter for classes that handled with functions. */
73
  C_SPACE   /* space. Doesn't need specific handlers, so after the counter.*/
74
};
75
76
77
/*
78
  This array maps first 128 Unicode Code Points into classes.
79
  The remaining Unicode characters should be mapped to C_ETC.
80
*/
81
82
static enum json_char_classes json_chr_map[128] = {
83
  C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
84
  C_ERR,   C_SPACE, C_SPACE, C_ERR,   C_ERR,   C_SPACE, C_ERR,   C_ERR,
85
  C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
86
  C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
87
88
  C_SPACE, C_ETC,   C_QUOTE, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
89
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_COMMA, C_DIGIT, C_ETC,   C_ETC,
90
  C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
91
  C_DIGIT, C_DIGIT, C_COLON, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
92
93
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
94
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
95
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
96
  C_ETC,   C_ETC,   C_ETC,   C_LSQRB, C_ETC,   C_RSQRB, C_ETC,   C_ETC,
97
98
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_F, C_ETC,
99
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_N, C_ETC,
100
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_T, C_ETC,   C_ETC,   C_ETC,
101
  C_ETC,   C_ETC,   C_ETC,   C_LCURB, C_ETC,   C_RCURB, C_ETC,   C_ETC
102
};
103
104
105
/*
106
  JSON parser actually has more states than the 'enum json_states'
107
  declares. But the rest of the states aren't seen to the user so let's
108
  specify them here to avoid confusion.
109
*/
110
111
enum json_all_states {
112
  JST_DONE= NR_JSON_USER_STATES,         /* ok to finish     */
113
  JST_OBJ_CONT= NR_JSON_USER_STATES+1,   /* object continues */
114
  JST_ARRAY_CONT= NR_JSON_USER_STATES+2, /* array continues  */
115
  JST_READ_VALUE= NR_JSON_USER_STATES+3, /* value is being read */
116
  NR_JSON_STATES= NR_JSON_USER_STATES+4
117
};
118
119
120
typedef int (*json_state_handler)(json_engine_t *);
121
122
123
/* The string is broken. */
124
static int unexpected_eos(json_engine_t *j)
125
523
{
126
523
  j->s.error= JE_EOS;
127
523
  return 1;
128
523
}
129
130
131
/* This symbol here breaks the JSON syntax. */
132
static int syntax_error(json_engine_t *j)
133
470
{
134
470
  j->s.error= JE_SYN;
135
470
  return 1;
136
470
}
137
138
139
/* Value of object. */
140
static int mark_object(json_engine_t *j)
141
1.70k
{
142
1.70k
  j->state= JST_OBJ_START;
143
1.70k
  if (++j->stack_p < JSON_DEPTH_LIMIT)
144
1.70k
  {
145
1.70k
    j->stack[j->stack_p]= JST_OBJ_CONT;
146
1.70k
    return 0;
147
1.70k
  }
148
4
  j->s.error= JE_DEPTH;
149
4
  return 1;
150
1.70k
}
151
152
153
/* Read value of object. */
154
static int read_obj(json_engine_t *j)
155
4.29k
{
156
4.29k
  j->state= JST_OBJ_START;
157
4.29k
  j->value_type= JSON_VALUE_OBJECT;
158
4.29k
  j->value= j->value_begin;
159
4.29k
  if (++j->stack_p < JSON_DEPTH_LIMIT)
160
4.29k
  {
161
4.29k
    j->stack[j->stack_p]= JST_OBJ_CONT;
162
4.29k
    return 0;
163
4.29k
  }
164
0
  j->s.error= JE_DEPTH;
165
0
  return 1;
166
4.29k
}
167
168
169
/* Value of array. */
170
static int mark_array(json_engine_t *j)
171
1.50k
{
172
1.50k
  j->state= JST_ARRAY_START;
173
1.50k
  if (++j->stack_p < JSON_DEPTH_LIMIT)
174
1.49k
  {
175
1.49k
    j->stack[j->stack_p]= JST_ARRAY_CONT;
176
1.49k
    j->value= j->value_begin;
177
1.49k
    return 0;
178
1.49k
  }
179
6
  j->s.error= JE_DEPTH;
180
6
  return 1;
181
1.50k
}
182
183
/* Read value of object. */
184
static int read_array(json_engine_t *j)
185
1.58k
{
186
1.58k
  j->state= JST_ARRAY_START;
187
1.58k
  j->value_type= JSON_VALUE_ARRAY;
188
1.58k
  j->value= j->value_begin;
189
1.58k
  if (++j->stack_p < JSON_DEPTH_LIMIT)
190
1.58k
  {
191
1.58k
    j->stack[j->stack_p]= JST_ARRAY_CONT;
192
1.58k
    return 0;
193
1.58k
  }
194
0
  j->s.error= JE_DEPTH;
195
0
  return 1;
196
1.58k
}
197
198
199
200
/*
201
  Character classes inside the JSON string constant.
202
  We mostly need this to parse escaping properly.
203
  Escapings available in JSON are:
204
  \" - quotation mark
205
  \\ - backslash
206
  \b - backspace UNICODE 8
207
  \f - formfeed UNICODE 12
208
  \n - newline UNICODE 10
209
  \r - carriage return UNICODE 13
210
  \t - horizontal tab UNICODE 9
211
  \u{four-hex-digits} - code in UCS16 character set
212
*/
213
enum json_string_char_classes {
214
  S_0= 0,
215
  S_1= 1,
216
  S_2= 2,
217
  S_3= 3,
218
  S_4= 4,
219
  S_5= 5,
220
  S_6= 6,
221
  S_7= 7,
222
  S_8= 8,
223
  S_9= 9,
224
  S_A= 10,
225
  S_B= 11,
226
  S_C= 12,
227
  S_D= 13,
228
  S_E= 14,
229
  S_F= 15,
230
  S_ETC= 36,    /* rest of characters. */
231
  S_QUOTE= 37,
232
  S_BKSL= 38, /* \ */
233
  S_ERR= 100,   /* disallowed */
234
};
235
236
237
/* This maps characters to their types inside a string constant. */
238
static enum json_string_char_classes json_instr_chr_map[128] = {
239
  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
240
  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
241
  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
242
  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
243
244
  S_ETC,   S_ETC,   S_QUOTE, S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
245
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
246
  S_0,     S_1,     S_2,     S_3,     S_4,     S_5,     S_6,     S_7,
247
  S_8,     S_9,     S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
248
249
  S_ETC,   S_A,     S_B,     S_C,     S_D,     S_E,     S_F,     S_ETC,
250
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
251
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
252
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_BKSL,  S_ETC,   S_ETC,   S_ETC,
253
254
  S_ETC,   S_A,     S_B,     S_C,     S_D,     S_E,     S_F,     S_ETC,
255
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
256
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
257
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC
258
};
259
260
261
static int read_4_hexdigits(json_string_t *s, uchar *dest)
262
7.42k
{
263
7.42k
  int i, t, c_len;
264
27.0k
  for (i=0; i<4; i++)
265
23.4k
  {
266
23.4k
    if ((c_len= json_next_char(s)) <= 0)
267
1.79k
      return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
268
269
21.6k
    if (s->c_next >= 128 || (t= json_instr_chr_map[s->c_next]) > S_F)
270
2.01k
      return s->error= JE_SYN;
271
272
19.6k
    s->c_str+= c_len;
273
19.6k
    dest[i/2]+= (i % 2) ? t : t*16;
274
19.6k
  }
275
3.62k
  return 0;
276
7.42k
}
277
278
279
static int json_handle_esc(json_string_t *s)
280
10.9k
{
281
10.9k
  int t, c_len;
282
  
283
10.9k
  if ((c_len= json_next_char(s)) <= 0)
284
524
    return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
285
286
10.4k
  s->c_str+= c_len;
287
10.4k
  switch (s->c_next)
288
10.4k
  {
289
250
    case 'b':
290
250
      s->c_next= 8;
291
250
      return 0;
292
494
    case 'f':
293
494
      s->c_next= 12;
294
494
      return 0;
295
574
    case 'n':
296
574
      s->c_next= 10;
297
574
      return 0;
298
378
    case 'r':
299
378
      s->c_next= 13;
300
378
      return 0;
301
376
    case 't':
302
376
      s->c_next= 9;
303
376
      return 0;
304
10.4k
  }
305
306
8.36k
  if (s->c_next < 128 && (t= json_instr_chr_map[s->c_next]) == S_ERR)
307
246
  {
308
246
    s->c_str-= c_len;
309
246
    return s->error= JE_ESCAPING;
310
246
  }
311
312
313
8.12k
  if (s->c_next != 'u')
314
1.95k
    return 0;
315
316
6.16k
  {
317
    /*
318
      Read the four-hex-digits code.
319
      If symbol is not in the Basic Multilingual Plane, we're reading
320
      the string for the next four digits to compose the UTF-16 surrogate pair.
321
    */
322
6.16k
    uchar code[4]= {0,0,0,0};
323
324
6.16k
    if (read_4_hexdigits(s, code))
325
3.30k
      return 1;
326
327
2.85k
    if ((c_len= my_utf16_uni(0, &s->c_next, code, code+2)) == 2)
328
324
      return 0;
329
330
2.53k
    if (c_len != MY_CS_TOOSMALL4)
331
194
      return s->error= JE_BAD_CHR;
332
333
2.34k
    if ((c_len= json_next_char(s)) <= 0)
334
212
      return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
335
2.12k
    if (s->c_next != '\\')
336
274
      return s->error= JE_SYN;
337
338
1.85k
    s->c_str+= c_len;
339
1.85k
    if ((c_len= json_next_char(s)) <= 0)
340
338
      return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
341
1.51k
    if (s->c_next != 'u')
342
252
      return s->error= JE_SYN;
343
1.26k
    s->c_str+= c_len;
344
345
1.26k
    if (read_4_hexdigits(s, code+2))
346
500
      return 1;
347
348
764
    if ((c_len= my_utf16_uni(0, &s->c_next, code, code+4)) == 4)
349
556
      return 0;
350
764
  }
351
208
  return s->error= JE_BAD_CHR;
352
764
}
353
354
355
int json_read_string_const_chr(json_string_t *js)
356
16.9k
{
357
16.9k
  int c_len;
358
359
16.9k
  if ((c_len= json_next_char(js)) > 0)
360
14.3k
  {
361
14.3k
    js->c_str+= c_len;
362
14.3k
    return (js->c_next == '\\') ? json_handle_esc(js) : 0;
363
14.3k
  }
364
2.57k
  js->error= json_eos(js) ? JE_EOS : JE_BAD_CHR; 
365
2.57k
  return 1;
366
16.9k
}
367
368
369
static int skip_str_constant(json_engine_t *j)
370
884
{
371
884
  int t, c_len;
372
884
  for (;;)
373
1.66k
  {
374
1.66k
    if ((c_len= json_next_char(&j->s)) > 0)
375
1.57k
    {
376
1.57k
      j->s.c_str+= c_len;
377
1.57k
      if (j->s.c_next >= 128 || ((t=json_instr_chr_map[j->s.c_next]) <= S_ETC))
378
586
        continue;
379
380
986
      if (j->s.c_next == '"')
381
770
        break;
382
216
      if (j->s.c_next == '\\')
383
200
      {
384
200
        j->value_escaped= 1;
385
200
        if (json_handle_esc(&j->s))
386
10
          return 1;
387
190
        continue;
388
200
      }
389
      /* Symbol not allowed in JSON. */
390
16
      return j->s.error= JE_NOT_JSON_CHR;
391
216
    }
392
88
    else
393
88
      return j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR; 
394
1.66k
  }
395
396
770
  j->state= j->stack[j->stack_p];
397
770
  return 0;
398
884
}
399
400
401
/* Scalar string. */
402
static int v_string(json_engine_t *j)
403
524
{
404
524
  return skip_str_constant(j) || json_scan_next(j);
405
524
}
406
407
408
/* Read scalar string. */
409
static int read_strn(json_engine_t *j)
410
360
{
411
360
  j->value= j->s.c_str;
412
360
  j->value_type= JSON_VALUE_STRING;
413
360
  j->value_escaped= 0;
414
415
360
  if (skip_str_constant(j))
416
90
    return 1;
417
418
270
  j->state= j->stack[j->stack_p];
419
270
  j->value_len= (int)(j->s.c_str - j->value) - j->s.c_next_len;
420
270
  return 0;
421
360
}
422
423
424
/*
425
  We have dedicated parser for numeric constants. It's similar
426
  to the main JSON parser, we similarly define character classes,
427
  map characters to classes and implement the state-per-class
428
  table. Though we don't create functions that handle
429
  particular classes, just specify what new state should parser
430
  get in this case.
431
*/
432
enum json_num_char_classes {
433
  N_MINUS,
434
  N_PLUS,
435
  N_ZERO,
436
  N_DIGIT,
437
  N_POINT,
438
  N_E,
439
  N_END,
440
  N_EEND,
441
  N_ERR,
442
  N_NUM_CLASSES
443
};
444
445
446
static enum json_num_char_classes json_num_chr_map[128] = {
447
  N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
448
  N_ERR,   N_END,   N_END,   N_ERR,   N_ERR,   N_END,   N_ERR,   N_ERR,
449
  N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
450
  N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
451
452
  N_END,   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
453
  N_EEND,  N_EEND,  N_EEND,  N_PLUS,  N_END,   N_MINUS, N_POINT, N_EEND,
454
  N_ZERO,  N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT,
455
  N_DIGIT, N_DIGIT, N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
456
457
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_E,     N_EEND,  N_EEND,
458
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
459
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
460
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_END,   N_EEND,  N_EEND,
461
462
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_E,     N_EEND,  N_EEND,
463
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
464
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
465
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,   N_END,   N_EEND,  N_EEND,
466
};
467
468
469
enum json_num_states {
470
  NS_OK,  /* Number ended. */
471
  NS_GO,  /* Initial state. */
472
  NS_GO1, /* If the number starts with '-'. */
473
  NS_Z,   /* If the number starts with '0'. */
474
  NS_Z1,  /* If the numbers starts with '-0'. */
475
  NS_INT, /* Integer part. */
476
  NS_FRAC,/* Fractional part. */
477
  NS_EX,  /* Exponential part begins. */
478
  NS_EX1, /* Exponential part continues. */
479
  NS_NUM_STATES
480
};
481
482
483
static int json_num_states[NS_NUM_STATES][N_NUM_CLASSES]=
484
{
485
/*         -        +       0         1..9    POINT    E       END_OK   ERROR */
486
/*OK*/   { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
487
/*GO*/   { NS_GO1,  JE_SYN, NS_Z,     NS_INT, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
488
/*GO1*/  { JE_SYN,  JE_SYN, NS_Z1,    NS_INT, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
489
/*ZERO*/ { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, NS_FRAC, NS_EX,  NS_OK,  JE_BAD_CHR },
490
/*ZE1*/  { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, NS_FRAC, NS_EX,  NS_OK,  JE_BAD_CHR },
491
/*INT*/  { JE_SYN,  JE_SYN, NS_INT,   NS_INT, NS_FRAC, NS_EX,  NS_OK,  JE_BAD_CHR },
492
/*FRAC*/ { JE_SYN,  JE_SYN, NS_FRAC,  NS_FRAC,JE_SYN,  NS_EX,  NS_OK,  JE_BAD_CHR },
493
/*EX*/   { NS_EX,   NS_EX,  NS_EX1,   NS_EX1, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR }, 
494
/*EX1*/  { JE_SYN,  JE_SYN, NS_EX1,   NS_EX1, JE_SYN,  JE_SYN, NS_OK,  JE_BAD_CHR }
495
};
496
497
498
static uint json_num_state_flags[NS_NUM_STATES]=
499
{
500
/*OK*/   0,
501
/*GO*/   0,
502
/*GO1*/  JSON_NUM_NEG,
503
/*ZERO*/ 0,
504
/*ZE1*/  0,
505
/*INT*/  0,
506
/*FRAC*/ JSON_NUM_FRAC_PART,
507
/*EX*/   JSON_NUM_EXP,
508
/*EX1*/  0,
509
};
510
511
512
static int skip_num_constant(json_engine_t *j)
513
11.3k
{
514
11.3k
  int state= json_num_states[NS_GO][json_num_chr_map[j->s.c_next]];
515
11.3k
  int c_len;
516
517
11.3k
  j->num_flags= 0;
518
11.3k
  for (;;)
519
11.7k
  {
520
11.7k
    j->num_flags|= json_num_state_flags[state];
521
11.7k
    if ((c_len= json_next_char(&j->s)) > 0 && j->s.c_next < 128)
522
11.5k
    {
523
11.5k
      if ((state= json_num_states[state][json_num_chr_map[j->s.c_next]]) > 0)
524
386
      {
525
386
        j->s.c_str+= c_len;
526
386
        continue;
527
386
      }
528
11.1k
      break;
529
11.5k
    }
530
531
146
    if ((j->s.error=
532
146
          json_eos(&j->s) ? json_num_states[state][N_END] : JE_BAD_CHR) < 0)
533
72
      return 1;
534
74
    else
535
74
      break;
536
146
  }
537
538
11.2k
  j->state= j->stack[j->stack_p];
539
11.2k
  return 0;
540
11.3k
}
541
542
543
/* Scalar numeric. */
544
static int v_number(json_engine_t *j)
545
932
{
546
932
  return skip_num_constant(j) || json_scan_next(j);
547
932
}
548
549
550
/* Read numeric constant. */
551
static int read_num(json_engine_t *j)
552
10.3k
{
553
10.3k
  j->value= j->value_begin;
554
10.3k
  if (skip_num_constant(j) == 0)
555
10.3k
  {
556
10.3k
    j->value_type= JSON_VALUE_NUMBER;
557
10.3k
    j->value_len= (int)(j->s.c_str - j->value_begin);
558
10.3k
    return 0;
559
10.3k
  }
560
40
  return 1;
561
10.3k
}
562
563
564
/* Check that the JSON string matches the argument and skip it. */
565
static int skip_string_verbatim(json_string_t *s, const char *str)
566
3.20k
{
567
3.20k
  int c_len;
568
11.7k
  while (*str)
569
9.48k
  {
570
9.48k
    if ((c_len= json_next_char(s)) > 0)
571
9.27k
    {
572
9.27k
      if (s->c_next == (my_wc_t) *(str++))
573
8.50k
      {
574
8.50k
        s->c_str+= c_len;
575
8.50k
        continue;
576
8.50k
      }
577
770
      return s->error= JE_SYN;
578
9.27k
    }
579
208
    return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR; 
580
9.48k
  }
581
582
2.22k
  return 0;
583
3.20k
}
584
585
586
/* Scalar false. */
587
static int v_false(json_engine_t *j)
588
586
{
589
586
  if (skip_string_verbatim(&j->s, "alse"))
590
216
   return 1;
591
370
  j->state= j->stack[j->stack_p];
592
370
  return json_scan_next(j);
593
586
}
594
595
596
/* Scalar null. */
597
static int v_null(json_engine_t *j)
598
548
{
599
548
  if (skip_string_verbatim(&j->s, "ull"))
600
174
   return 1;
601
374
  j->state= j->stack[j->stack_p];
602
374
  return json_scan_next(j);
603
548
}
604
605
606
/* Scalar true. */
607
static int v_true(json_engine_t *j)
608
682
{
609
682
  if (skip_string_verbatim(&j->s, "rue"))
610
184
   return 1;
611
498
  j->state= j->stack[j->stack_p];
612
498
  return json_scan_next(j);
613
682
}
614
615
616
/* Read false. */
617
static int read_false(json_engine_t *j)
618
590
{
619
590
  j->value_type= JSON_VALUE_FALSE;
620
590
  j->value= j->value_begin;
621
590
  j->state= j->stack[j->stack_p];
622
590
  j->value_len= 5;
623
590
  return skip_string_verbatim(&j->s, "alse");
624
590
}
625
626
627
/* Read null. */
628
static int read_null(json_engine_t *j)
629
464
{
630
464
  j->value_type= JSON_VALUE_NULL;
631
464
  j->value= j->value_begin;
632
464
  j->state= j->stack[j->stack_p];
633
464
  j->value_len= 4;
634
464
  return skip_string_verbatim(&j->s, "ull");
635
464
}
636
637
638
/* Read true. */
639
static int read_true(json_engine_t *j)
640
330
{
641
330
  j->value_type= JSON_VALUE_TRUE;
642
330
  j->value= j->value_begin;
643
330
  j->state= j->stack[j->stack_p];
644
330
  j->value_len= 4;
645
330
  return skip_string_verbatim(&j->s, "rue");
646
330
}
647
648
649
/* Disallowed character. */
650
static int not_json_chr(json_engine_t *j)
651
46
{
652
46
  j->s.error= JE_NOT_JSON_CHR;
653
46
  return 1;
654
46
}
655
656
657
/* Bad character. */
658
static int bad_chr(json_engine_t *j)
659
127
{
660
127
  j->s.error= JE_BAD_CHR;
661
127
  return 1;
662
127
}
663
664
665
/* Correct finish. */
666
static int done(json_engine_t *j  __attribute__((unused)))
667
0
{
668
0
  return 1;
669
0
}
670
671
672
/* End of the object. */
673
static int end_object(json_engine_t *j)
674
990
{
675
990
  j->stack_p--;
676
990
  j->state= JST_OBJ_END;
677
990
  return 0;
678
990
}
679
680
681
/* End of the array. */
682
static int end_array(json_engine_t *j)
683
1.08k
{
684
1.08k
  j->stack_p--;
685
1.08k
  j->state= JST_ARRAY_END;
686
1.08k
  return 0;
687
1.08k
}
688
689
690
/* Start reading key name. */
691
static int read_keyname(json_engine_t *j)
692
5.11k
{
693
5.11k
  j->state= JST_KEY;
694
5.11k
  return 0;
695
5.11k
}
696
697
698
static void get_first_nonspace(json_string_t *js, int *t_next, int *c_len)
699
71.4k
{
700
71.4k
  do
701
74.7k
  {
702
74.7k
    if ((*c_len= json_next_char(js)) <= 0)
703
823
      *t_next= json_eos(js) ? C_EOS : C_BAD;
704
73.9k
    else
705
73.9k
    {
706
73.9k
      *t_next= (js->c_next < 128) ? json_chr_map[js->c_next] : C_ETC;
707
73.9k
      js->c_str+= *c_len;
708
73.9k
    }
709
74.7k
  } while (*t_next == C_SPACE);
710
71.4k
}
711
712
713
/* Next key name. */
714
static int next_key(json_engine_t *j)
715
12.4k
{
716
12.4k
  int t_next, c_len;
717
12.4k
  get_first_nonspace(&j->s, &t_next, &c_len);
718
719
12.4k
  if (t_next == C_QUOTE)
720
12.3k
  {
721
12.3k
    j->state= JST_KEY;
722
12.3k
    return 0;
723
12.3k
  }
724
725
102
  j->s.error= (t_next == C_EOS)  ? JE_EOS :
726
102
              ((t_next == C_BAD) ? JE_BAD_CHR :
727
62
                                   JE_SYN);
728
102
  return 1;
729
12.4k
}
730
731
732
/* Forward declarations. */
733
static int skip_colon(json_engine_t *j);
734
static int skip_key(json_engine_t *j);
735
static int struct_end_cb(json_engine_t *j);
736
static int struct_end_qb(json_engine_t *j);
737
static int struct_end_cm(json_engine_t *j);
738
static int struct_end_eos(json_engine_t *j);
739
740
741
static int next_item(json_engine_t *j)
742
2.22k
{
743
2.22k
  j->state= JST_VALUE;
744
2.22k
  return 0;
745
2.22k
}
746
747
748
static int array_item(json_engine_t *j)
749
2.52k
{
750
2.52k
  j->state= JST_VALUE;
751
2.52k
  j->s.c_str-= j->sav_c_len;
752
2.52k
  return 0;
753
2.52k
}
754
755
756
static json_state_handler json_actions[NR_JSON_STATES][NR_C_CLASSES]=
757
/*
758
   EOS              {            }             [             ]
759
   :                ,            "             -0..9         f
760
   n                t              ETC          ERR           BAD
761
*/
762
{
763
  {/*VALUE*/
764
    unexpected_eos, mark_object, syntax_error, mark_array,   syntax_error,
765
    syntax_error,   syntax_error,v_string,     v_number,     v_false,
766
    v_null,         v_true,       syntax_error, not_json_chr, bad_chr},
767
  {/*KEY*/
768
    unexpected_eos, skip_key,    skip_key,     skip_key,     skip_key,
769
    skip_key,       skip_key,    skip_colon,   skip_key,     skip_key,
770
    skip_key,       skip_key,     skip_key,     not_json_chr, bad_chr},
771
  {/*OBJ_START*/
772
    unexpected_eos, syntax_error, end_object,  syntax_error, syntax_error,
773
    syntax_error,   syntax_error, read_keyname, syntax_error, syntax_error,
774
    syntax_error,   syntax_error,   syntax_error,    not_json_chr, bad_chr},
775
  {/*OBJ_END*/
776
    struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb,
777
    syntax_error,   struct_end_cm,syntax_error,  syntax_error, syntax_error,
778
    syntax_error,   syntax_error,  syntax_error,    not_json_chr, bad_chr},
779
  {/*ARRAY_START*/
780
    unexpected_eos, array_item,   syntax_error, array_item,   end_array,
781
    syntax_error,   syntax_error, array_item,  array_item,  array_item,
782
    array_item,    array_item,    syntax_error,    not_json_chr, bad_chr},
783
  {/*ARRAY_END*/
784
    struct_end_eos, syntax_error, struct_end_cb, syntax_error,  struct_end_qb,
785
    syntax_error,   struct_end_cm, syntax_error, syntax_error,  syntax_error,
786
    syntax_error,   syntax_error,  syntax_error,    not_json_chr, bad_chr},
787
  {/*DONE*/
788
    done,           syntax_error, syntax_error, syntax_error, syntax_error,
789
    syntax_error,   syntax_error, syntax_error, syntax_error, syntax_error,
790
    syntax_error,   syntax_error, syntax_error, not_json_chr, bad_chr},
791
  {/*OBJ_CONT*/
792
    unexpected_eos, syntax_error, end_object,    syntax_error,   syntax_error,
793
    syntax_error,   next_key,     syntax_error,  syntax_error,   syntax_error,
794
    syntax_error,    syntax_error,    syntax_error,    not_json_chr, bad_chr},
795
  {/*ARRAY_CONT*/
796
    unexpected_eos, syntax_error, syntax_error,  syntax_error, end_array,
797
    syntax_error,   next_item,    syntax_error,  syntax_error, syntax_error,
798
    syntax_error,    syntax_error,    syntax_error,    not_json_chr, bad_chr},
799
  {/*READ_VALUE*/
800
    unexpected_eos, read_obj,     syntax_error,  read_array,    syntax_error,
801
    syntax_error,   syntax_error, read_strn,     read_num,      read_false,
802
    read_null,      read_true,    syntax_error,    not_json_chr, bad_chr},
803
};
804
805
806
807
int json_scan_start(json_engine_t *je,
808
                    CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
809
4.26k
{
810
4.26k
  static const uchar no_time_to_die= 0;
811
812
4.26k
  json_string_setup(&je->s, i_cs, str, end);
813
4.26k
  je->stack[0]= JST_DONE;
814
4.26k
  je->stack_p= 0;
815
4.26k
  je->state= JST_VALUE;
816
4.26k
  je->killed_ptr = (uchar*)&no_time_to_die;
817
4.26k
  return 0;
818
4.26k
}
819
820
821
/* Skip colon and the value. */
822
static int skip_colon(json_engine_t *j)
823
774
{
824
774
  int t_next, c_len;
825
826
774
  get_first_nonspace(&j->s, &t_next, &c_len);
827
828
774
  if (t_next == C_COLON)
829
684
  {
830
684
    get_first_nonspace(&j->s, &t_next, &c_len);
831
684
    return json_actions[JST_VALUE][t_next](j);
832
684
 }
833
834
90
  j->s.error= (t_next == C_EOS)  ? JE_EOS :
835
90
              ((t_next == C_BAD) ? JE_BAD_CHR:
836
66
                                   JE_SYN);
837
838
90
  return 1;
839
774
}
840
841
842
/* Skip colon and the value. */
843
static int skip_key(json_engine_t *j)
844
1.33k
{
845
1.33k
  int t_next, c_len;
846
847
1.33k
  if (j->s.c_next<128 && json_instr_chr_map[j->s.c_next] == S_BKSL &&
848
1.33k
      json_handle_esc(&j->s))
849
8
    return 1;
850
851
1.51k
  while (json_read_keyname_chr(j) == 0) {}
852
853
1.32k
  if (j->s.error)
854
106
    return 1;
855
856
1.22k
  get_first_nonspace(&j->s, &t_next, &c_len);
857
1.22k
  return json_actions[JST_VALUE][t_next](j);
858
1.32k
}
859
860
861
/*
862
  Handle EOS after the end of an object or array.
863
  To do that we should pop the stack to see if
864
  we are inside an object, or an array, and
865
  run our 'state machine' accordingly.
866
*/
867
static int struct_end_eos(json_engine_t *j)
868
31
{ return json_actions[j->stack[j->stack_p]][C_EOS](j); }
869
870
871
/*
872
  Handle '}' after the end of an object or array.
873
  To do that we should pop the stack to see if
874
  we are inside an object, or an array, and
875
  run our 'state machine' accordingly.
876
*/
877
static int struct_end_cb(json_engine_t *j)
878
262
{ return json_actions[j->stack[j->stack_p]][C_RCURB](j); }
879
880
881
/*
882
  Handle ']' after the end of an object or array.
883
  To do that we should pop the stack to see if
884
  we are inside an object, or an array, and
885
  run our 'state machine' accordingly.
886
*/
887
static int struct_end_qb(json_engine_t *j)
888
610
{ return json_actions[j->stack[j->stack_p]][C_RSQRB](j); }
889
890
891
/*
892
  Handle ',' after the end of an object or array.
893
  To do that we should pop the stack to see if
894
  we are inside an object, or an array, and
895
  run our 'state machine' accordingly.
896
*/
897
static int struct_end_cm(json_engine_t *j)
898
1.10k
{ return json_actions[j->stack[j->stack_p]][C_COMMA](j); }
899
900
901
int json_read_keyname_chr(json_engine_t *j)
902
27.5k
{
903
27.5k
  int c_len, t;
904
905
27.5k
  if ((c_len= json_next_char(&j->s)) > 0)
906
25.0k
  {
907
25.0k
    j->s.c_str+= c_len;
908
25.0k
    if (j->s.c_next>= 128 || (t= json_instr_chr_map[j->s.c_next]) <= S_ETC)
909
8.66k
      return 0;
910
911
16.4k
    switch (t)
912
16.4k
    {
913
15.2k
    case S_QUOTE:
914
15.2k
      for (;;)  /* Skip spaces until ':'. */
915
15.4k
      {
916
15.4k
        if ((c_len= json_next_char(&j->s)) > 0)
917
15.4k
        {
918
15.4k
          if (j->s.c_next == ':')
919
15.1k
          {
920
15.1k
            j->s.c_str+= c_len;
921
15.1k
            j->state= JST_VALUE;
922
15.1k
            return 1;
923
15.1k
          }
924
925
298
          if (j->s.c_next < 128 && json_chr_map[j->s.c_next] == C_SPACE)
926
200
          {
927
200
            j->s.c_str+= c_len;
928
200
            continue;
929
200
          }
930
98
          j->s.error= JE_SYN;
931
98
          break;
932
298
        }
933
22
        j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
934
22
        break;
935
15.4k
      }
936
120
      return 1;
937
1.15k
    case S_BKSL:
938
1.15k
      return json_handle_esc(&j->s);
939
26
    case S_ERR:
940
26
      j->s.c_str-= c_len;
941
26
      j->s.error= JE_STRING_CONST;
942
26
      return 1;
943
16.4k
    }
944
16.4k
  }
945
2.43k
  j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR; 
946
2.43k
  return 1;
947
27.5k
}
948
949
950
int json_read_value(json_engine_t *j)
951
19.5k
{
952
19.5k
  int t_next, c_len, res;
953
954
19.5k
  j->value_type= JSON_VALUE_UNINITIALIZED;
955
19.5k
  if (j->state == JST_KEY)
956
8.18k
  {
957
9.07k
    while (json_read_keyname_chr(j) == 0) {}
958
959
8.18k
    if (j->s.error)
960
1.42k
      return 1;
961
8.18k
  }
962
963
18.1k
  get_first_nonspace(&j->s, &t_next, &c_len);
964
965
18.1k
  j->value_begin= j->s.c_str-c_len;
966
18.1k
  res= json_actions[JST_READ_VALUE][t_next](j);
967
18.1k
  j->value_end= j->s.c_str;
968
18.1k
  return res;
969
19.5k
}
970
971
972
int json_scan_next(json_engine_t *j)
973
32.0k
{
974
32.0k
  int t_next;
975
976
32.0k
  get_first_nonspace(&j->s, &t_next, &j->sav_c_len);
977
32.0k
  return *j->killed_ptr || json_actions[j->state][t_next](j);
978
32.0k
}
979
980
981
enum json_path_chr_classes {
982
  P_EOS,    /* end of string */
983
  P_USD,    /* $ */
984
  P_ASTER,  /* * */
985
  P_LSQRB,  /* [ */
986
  P_T,      /* t (for to) */
987
  P_RSQRB,  /* ] */
988
  P_POINT,  /* . */
989
  P_NEG,    /* hyphen (for negative index in path) */
990
  P_ZERO,   /* 0 */
991
  P_DIGIT,  /* 123456789 */
992
  P_L,      /* l (for "lax") */
993
  P_S,      /* s (for "strict") */
994
  P_SPACE,  /* space */
995
  P_BKSL,   /* \ */
996
  P_QUOTE,  /* " */
997
  P_ETC,    /* everything else */
998
  P_ERR,    /* character disallowed in JSON*/
999
  P_BAD,    /* invalid character */
1000
  N_PATH_CLASSES,
1001
};
1002
1003
1004
static enum json_path_chr_classes json_path_chr_map[128] = {
1005
  P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
1006
  P_ERR,   P_SPACE, P_SPACE, P_ERR,   P_ERR,   P_SPACE, P_ERR,   P_ERR,
1007
  P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
1008
  P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
1009
1010
  P_SPACE, P_ETC,   P_QUOTE, P_ETC,   P_USD,   P_ETC,   P_ETC,   P_ETC,
1011
  P_ETC,   P_ETC,   P_ASTER, P_ETC,   P_ETC,   P_NEG,   P_POINT, P_ETC,
1012
  P_ZERO,  P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT,
1013
  P_DIGIT, P_DIGIT, P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1014
1015
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1016
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_L,     P_ETC,   P_ETC,   P_ETC,
1017
  P_ETC,   P_ETC,   P_S,     P_ETC,   P_T,   P_ETC,   P_ETC,   P_ETC,
1018
  P_ETC,   P_ETC,   P_ETC,   P_LSQRB, P_BKSL, P_RSQRB, P_ETC,   P_ETC,
1019
1020
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1021
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_L,     P_ETC,   P_ETC,   P_ETC,
1022
  P_ETC,   P_ETC,   P_S,     P_ETC,   P_T,   P_ETC,   P_ETC,   P_ETC,
1023
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC
1024
};
1025
1026
1027
enum json_path_states {
1028
  PS_GO,  /* Initial state. */
1029
  PS_LAX, /* Parse the 'lax' keyword. */
1030
  PS_PT,  /* New path's step begins. */
1031
  PS_AR,  /* Parse array step. */
1032
  PS_SAR, /* space after the '['. */
1033
  PS_AWD, /* Array wildcard. */
1034
  PS_NEG,  /*  Parse '-' (hyphen) */
1035
  PS_Z,   /* '0' (as an array item number). */
1036
  PS_INT, /* Parse integer (as an array item number). */
1037
  PS_AS,  /* Space. */
1038
  PS_KEY, /* Key. */
1039
  PS_KNM, /* Parse key name. */
1040
  PS_KWD, /* Key wildcard. */
1041
  PS_AST, /* Asterisk. */
1042
  PS_DWD, /* Double wildcard. */
1043
  PS_KEYX, /* Key started with quote ("). */
1044
  PS_KNMX, /* Parse quoted key name. */
1045
  PS_LAST, /* Parse 'last' keyword */
1046
  PS_T,    /* Parse 'to' keyword. */
1047
  N_PATH_STATES, /* Below are states that aren't in the transitions table. */
1048
  PS_SCT,  /* Parse the 'strict' keyword. */
1049
  PS_EKY,  /* '.' after the keyname so next step is the key. */
1050
  PS_EKYX, /* Closing " for the quoted keyname. */
1051
  PS_EAR,  /* '[' after the keyname so next step is the array. */
1052
  PS_ESC,  /* Escaping in the keyname. */
1053
  PS_ESCX, /* Escaping in the quoted keyname. */
1054
  PS_OK,   /* Path normally ended. */
1055
  PS_KOK   /* EOS after the keyname so end the path normally. */
1056
};
1057
1058
1059
static int json_path_transitions[N_PATH_STATES][N_PATH_CLASSES]=
1060
{
1061
/*
1062
            EOS       $,      *       [       to       ]       .       -
1063
            0       1..9    L       S       SPACE   \       "       ETC
1064
            ERR              BAD
1065
*/
1066
/* GO  */ { JE_EOS, PS_PT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1067
            JE_SYN, JE_SYN, PS_LAX, PS_SCT, PS_GO, JE_SYN, JE_SYN, JE_SYN,
1068
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1069
/* LAX */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1070
            JE_SYN, JE_SYN, PS_LAX, JE_SYN, PS_GO, JE_SYN, JE_SYN, JE_SYN,
1071
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1072
/* PT */  { PS_OK, JE_SYN, PS_AST, PS_AR, JE_SYN, JE_SYN, PS_KEY, JE_SYN,
1073
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1074
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1075
/* AR */  { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_NEG,
1076
            PS_Z, PS_INT, PS_LAST, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1077
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1078
/* SAR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1079
            PS_Z, PS_INT, PS_LAST, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1080
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1081
/* AWD */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1082
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1083
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1084
/* NEG */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1085
            PS_INT, PS_INT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1086
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1087
/* Z */   { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1088
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1089
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1090
/* INT */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1091
            PS_INT, PS_INT, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1092
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1093
/* AS */  { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_T, PS_PT, JE_SYN, PS_NEG,
1094
            PS_Z, PS_INT, PS_LAST, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1095
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1096
/* KEY */ { JE_EOS, PS_KNM, PS_KWD, JE_SYN, PS_KNM, PS_KNM, JE_SYN, PS_KNM,
1097
            PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_KNM, JE_SYN, PS_KEYX, PS_KNM,
1098
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1099
/* KNM */ { PS_KOK, PS_KNM, PS_AST, PS_EAR, PS_KNM, PS_KNM, PS_EKY, PS_KNM,
1100
            PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_ESC, PS_KNM, PS_KNM,
1101
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1102
/* KWD */ { PS_OK, JE_SYN, JE_SYN, PS_AR, JE_SYN, JE_SYN, PS_EKY, JE_SYN,
1103
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1104
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1105
/* AST */ { JE_SYN, JE_SYN, PS_DWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1106
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1107
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1108
/* DWD */ { JE_SYN, JE_SYN, PS_AST, PS_AR, JE_SYN, JE_SYN, PS_KEY, JE_SYN,
1109
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1110
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1111
/* KEYX*/ { JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, JE_SYN,
1112
            PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_ESCX, PS_EKYX, PS_KNMX,
1113
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1114
/* KNMX */{ JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
1115
            PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_ESCX, PS_EKYX, PS_KNMX,
1116
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1117
/* LAST */{ JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_NEG,
1118
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1119
            JE_SYN, JE_BAD_CHR},
1120
/* T */   { JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_NEG,
1121
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1122
            JE_SYN, JE_BAD_CHR},
1123
};
1124
1125
1126
int json_path_setup(json_path_t *p,
1127
                    CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
1128
0
{
1129
0
 int c_len, t_next, state= PS_GO, is_negative_index= 0, is_last= 0,
1130
0
  prev_value=0, is_to= 0, *cur_val;
1131
0
  enum json_path_step_types double_wildcard= JSON_PATH_KEY_NULL;
1132
0
  json_string_setup(&p->s, i_cs, str, end);
1133
1134
0
  p->steps[0].type= JSON_PATH_ARRAY_WILD;
1135
0
  p->last_step= p->steps;
1136
0
  p->mode_strict= FALSE;
1137
0
  p->types_used= JSON_PATH_KEY_NULL;
1138
1139
0
  do
1140
0
  {
1141
0
    if ((c_len= json_next_char(&p->s)) <= 0)
1142
0
      t_next= json_eos(&p->s) ? P_EOS : P_BAD;
1143
0
    else
1144
0
      t_next= (p->s.c_next >= 128) ? P_ETC : json_path_chr_map[p->s.c_next];
1145
1146
0
    if ((state= json_path_transitions[state][t_next]) < 0)
1147
0
      return p->s.error= state;
1148
1149
0
    p->s.c_str+= c_len;
1150
1151
0
    switch (state)
1152
0
    {
1153
0
    case PS_LAX:
1154
0
      if ((p->s.error= skip_string_verbatim(&p->s, "ax")))
1155
0
        return 1;
1156
0
      p->mode_strict= FALSE;
1157
0
      continue;
1158
0
    case PS_SCT:
1159
0
      if ((p->s.error= skip_string_verbatim(&p->s, "rict")))
1160
0
        return 1;
1161
0
      p->mode_strict= TRUE;
1162
0
      state= PS_LAX;
1163
0
      continue;
1164
0
    case PS_KWD:
1165
0
    case PS_AWD:
1166
0
      p->last_step->type|= JSON_PATH_WILD;
1167
0
      p->types_used|= JSON_PATH_WILD;
1168
0
      continue;
1169
0
    case PS_INT:
1170
0
      cur_val= is_to ? &(p->last_step->n_item_end) :
1171
0
                       &(p->last_step->n_item);
1172
0
      if (is_last)
1173
0
      {
1174
0
        prev_value*= 10;
1175
0
        prev_value-= p->s.c_next - '0';
1176
0
        *cur_val= -1 + prev_value;
1177
0
      }
1178
0
      else
1179
0
      {
1180
0
        (*cur_val)*= 10;
1181
0
        if (is_negative_index)
1182
0
          *cur_val-= p->s.c_next - '0';
1183
0
        else
1184
0
          *cur_val+= p->s.c_next - '0';
1185
0
      }
1186
0
      continue;
1187
0
    case PS_EKYX:
1188
0
      p->last_step->key_end= p->s.c_str - c_len;
1189
0
      state= PS_PT;
1190
0
      continue;
1191
0
    case PS_EKY:
1192
0
      p->last_step->key_end= p->s.c_str - c_len;
1193
0
      state= PS_KEY;
1194
      /* fall through */
1195
0
    case PS_KEY:
1196
0
      p->last_step++;
1197
0
      is_to= 0;
1198
0
      prev_value= 0;
1199
0
      is_negative_index= 0;
1200
0
      is_last= 0;
1201
0
      if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1202
0
        return p->s.error= JE_DEPTH;
1203
0
      p->types_used|= p->last_step->type= JSON_PATH_KEY | double_wildcard;
1204
0
      double_wildcard= JSON_PATH_KEY_NULL;
1205
      /* fall through */
1206
0
    case PS_KEYX:
1207
0
      p->last_step->key= p->s.c_str;
1208
0
      continue;
1209
0
    case PS_EAR:
1210
0
      p->last_step->key_end= p->s.c_str - c_len;
1211
0
      state= PS_AR;
1212
      /* fall through */
1213
0
    case PS_AR:
1214
0
      p->last_step++;
1215
0
      is_last= 0;
1216
0
      is_to= 0;
1217
0
      prev_value= 0;
1218
0
      is_negative_index= 0;
1219
0
      if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1220
0
        return p->s.error= JE_DEPTH;
1221
0
      p->types_used|= p->last_step->type= JSON_PATH_ARRAY | double_wildcard;
1222
0
      double_wildcard= JSON_PATH_KEY_NULL;
1223
0
      p->last_step->n_item= 0;
1224
0
      continue;
1225
0
    case PS_ESC:
1226
0
      if (json_handle_esc(&p->s))
1227
0
        return 1;
1228
0
      state= PS_KNM;
1229
0
      continue;
1230
0
    case PS_ESCX:
1231
0
      if (json_handle_esc(&p->s))
1232
0
        return 1;
1233
0
      state= PS_KNMX;
1234
0
      continue;
1235
0
    case PS_KOK:
1236
0
      p->last_step->key_end= p->s.c_str - c_len;
1237
0
      state= PS_OK;
1238
0
      break; /* 'break' as the loop supposed to end after that. */
1239
0
    case PS_DWD:
1240
0
      double_wildcard= JSON_PATH_DOUBLE_WILD;
1241
0
      continue;
1242
0
    case PS_NEG:
1243
0
       p->types_used|= JSON_PATH_NEGATIVE_INDEX;
1244
0
       is_negative_index= 1;
1245
0
       continue;
1246
0
    case PS_LAST:
1247
0
      if ((p->s.error= skip_string_verbatim(&p->s, "ast")))
1248
0
       return 1;
1249
0
      p->types_used|= JSON_PATH_NEGATIVE_INDEX;
1250
0
      is_last= 1;
1251
0
      if (is_to)
1252
0
        p->last_step->n_item_end= -1;
1253
0
      else
1254
0
        p->last_step->n_item= -1;
1255
0
      continue;
1256
0
    case PS_T:
1257
0
      if ((p->s.error= skip_string_verbatim(&p->s, "o")))
1258
0
        return 1;
1259
0
      is_to= 1;
1260
0
      is_negative_index= 0;
1261
0
      is_last= 0;
1262
0
      prev_value= 0;
1263
0
      p->last_step->n_item_end= 0;
1264
0
      p->last_step->type|= JSON_PATH_ARRAY_RANGE;
1265
0
      p->types_used|= JSON_PATH_ARRAY_RANGE;
1266
0
      continue;
1267
0
    };
1268
0
  } while (state != PS_OK);
1269
1270
0
  return double_wildcard ? (p->s.error= JE_SYN) : 0;
1271
0
}
1272
1273
1274
int json_skip_to_level(json_engine_t *j, int level)
1275
2.27k
{
1276
14.3k
  do {
1277
14.3k
    if (j->stack_p < level)
1278
636
      return 0;
1279
14.3k
  } while (json_scan_next(j) == 0);
1280
1281
1.63k
  return 1;
1282
2.27k
}
1283
1284
1285
/*
1286
  works as json_skip_level() but also counts items on the current
1287
  level skipped.
1288
*/
1289
int json_skip_level_and_count(json_engine_t *j, int *n_items_skipped)
1290
0
{
1291
0
  int level= j->stack_p;
1292
1293
0
  *n_items_skipped= 0;
1294
0
  while (json_scan_next(j) == 0)
1295
0
  {
1296
0
    if (j->stack_p < level)
1297
0
      return 0;
1298
0
    if (j->stack_p == level && j->state == JST_VALUE)
1299
0
      (*n_items_skipped)++;
1300
0
  }
1301
1302
0
  return 1;
1303
0
}
1304
1305
1306
int json_skip_array_and_count(json_engine_t *je, int *n_items)
1307
0
{
1308
0
  json_engine_t j= *je;
1309
0
  *n_items= 0;
1310
1311
0
  return json_skip_level_and_count(&j, n_items); 
1312
0
}
1313
1314
1315
int json_skip_key(json_engine_t *j)
1316
14.4k
{
1317
14.4k
  if (json_read_value(j))
1318
1.06k
    return 1;
1319
1320
13.3k
  if (json_value_scalar(j))
1321
11.5k
    return 0;
1322
1323
1.85k
  return json_skip_level(j);
1324
13.3k
}
1325
1326
1327
0
#define SKIPPED_STEP_MARK INT_MAX32
1328
1329
/*
1330
  Current step of the patch matches the JSON construction.
1331
  Now we should either stop the search or go to the next
1332
  step of the path.
1333
*/
1334
static int handle_match(json_engine_t *je, json_path_t *p,
1335
                        json_path_step_t **p_cur_step, int *array_counters)
1336
0
{
1337
0
  json_path_step_t *next_step= *p_cur_step + 1;
1338
1339
0
  DBUG_ASSERT(*p_cur_step < p->last_step);
1340
1341
0
  if (json_read_value(je))
1342
0
    return 1;
1343
1344
0
  if (json_value_scalar(je))
1345
0
  {
1346
0
    while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0)
1347
0
    {
1348
0
      if (++next_step > p->last_step)
1349
0
      {
1350
0
        je->s.c_str= je->value_begin;
1351
0
        return 1;
1352
0
      }
1353
0
    }
1354
0
    return 0;
1355
0
  }
1356
1357
0
  if (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0 &&
1358
0
      je->value_type & JSON_VALUE_OBJECT)
1359
0
  {
1360
0
    do
1361
0
    {
1362
0
      array_counters[next_step - p->steps]= SKIPPED_STEP_MARK;
1363
0
      if (++next_step > p->last_step)
1364
0
      {
1365
0
        je->s.c_str= je->value_begin;
1366
0
        je->stack_p--;
1367
0
        return 1;
1368
0
      }
1369
0
    } while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0);
1370
0
  }
1371
1372
0
  if ((int) je->value_type !=
1373
0
      (int) (next_step->type & JSON_PATH_KEY_OR_ARRAY))
1374
0
    return json_skip_level(je);
1375
1376
0
  array_counters[next_step - p->steps]= 0;
1377
0
  if (next_step->type & JSON_PATH_ARRAY)
1378
0
  {
1379
0
    int array_size;
1380
0
    if (next_step->n_item >= 0)
1381
0
      array_size= 0;
1382
0
    else
1383
0
    {
1384
0
      json_engine_t j2= *je;
1385
0
      if (json_skip_array_and_count(&j2, &array_size))
1386
0
      {
1387
0
        *je= j2;
1388
0
        return 1;
1389
0
      }
1390
0
      array_size= -array_size;
1391
0
    }
1392
0
    array_counters[next_step - p->steps]= array_size;
1393
0
  }
1394
1395
0
  *p_cur_step= next_step;
1396
0
  return 0;
1397
0
}
1398
1399
1400
/*
1401
  Check if the name of the current JSON key matches
1402
  the step of the path.
1403
*/
1404
int json_key_matches(json_engine_t *je, json_string_t *k)
1405
15.3k
{
1406
16.9k
  while (json_read_keyname_chr(je) == 0)
1407
8.55k
  {
1408
8.55k
    if (json_read_string_const_chr(k) ||
1409
8.55k
        je->s.c_next != k->c_next)
1410
6.94k
      return 0;
1411
8.55k
  }
1412
1413
8.37k
  return json_read_string_const_chr(k);
1414
15.3k
}
1415
1416
1417
int json_find_path(json_engine_t *je,
1418
                   json_path_t *p, json_path_step_t **p_cur_step,
1419
                   int *array_counters)
1420
0
{
1421
0
  json_string_t key_name;
1422
0
  int res= 0;
1423
1424
0
  json_string_set_cs(&key_name, p->s.cs);
1425
1426
0
  do
1427
0
  {
1428
0
    json_path_step_t *cur_step= *p_cur_step;
1429
0
    switch (je->state)
1430
0
    {
1431
0
    case JST_KEY:
1432
0
      DBUG_ASSERT(cur_step->type & JSON_PATH_KEY);
1433
0
      if (!(cur_step->type & JSON_PATH_WILD))
1434
0
      {
1435
0
        json_string_set_str(&key_name, cur_step->key, cur_step->key_end);
1436
0
        if (!json_key_matches(je, &key_name))
1437
0
        {
1438
0
          if (json_skip_key(je))
1439
0
            goto exit;
1440
0
          continue;
1441
0
        }
1442
0
      }
1443
0
      if (cur_step == p->last_step ||
1444
0
          handle_match(je, p, p_cur_step, array_counters))
1445
0
        goto exit;
1446
0
      break;
1447
0
    case JST_VALUE:
1448
0
      DBUG_ASSERT(cur_step->type & JSON_PATH_ARRAY);
1449
0
      if (cur_step->type & JSON_PATH_ARRAY_RANGE)
1450
0
      {
1451
0
        res= (cur_step->n_item <= array_counters[cur_step - p->steps] &&
1452
0
              cur_step->n_item_end >= array_counters[cur_step - p->steps]);
1453
0
        array_counters[cur_step - p->steps]++;
1454
0
      }
1455
0
      else
1456
0
        res= cur_step->n_item == array_counters[cur_step - p->steps]++;
1457
0
      if ((cur_step->type & JSON_PATH_WILD) || res)
1458
0
      {
1459
        /* Array item matches. */
1460
0
        if (cur_step == p->last_step ||
1461
0
            handle_match(je, p, p_cur_step, array_counters))
1462
0
          goto exit;
1463
0
      }
1464
0
      else
1465
0
        json_skip_array_item(je);
1466
0
      break;
1467
0
    case JST_OBJ_END:
1468
0
      do
1469
0
      {
1470
0
        (*p_cur_step)--;
1471
0
      } while (*p_cur_step > p->steps &&
1472
0
               array_counters[*p_cur_step - p->steps] == SKIPPED_STEP_MARK);
1473
0
      break;
1474
0
    case JST_ARRAY_END:
1475
0
      (*p_cur_step)--;
1476
0
      break;
1477
0
    default:
1478
0
      DBUG_ASSERT(0);
1479
0
      break;
1480
0
    };
1481
0
  } while (json_scan_next(je) == 0);
1482
1483
  /* No luck. */
1484
0
  return 1;
1485
1486
0
exit:
1487
0
  return je->s.error;
1488
0
}
1489
1490
1491
int json_find_paths_first(json_engine_t *je, json_find_paths_t *state,
1492
                          uint n_paths, json_path_t *paths, uint *path_depths)
1493
0
{
1494
0
  state->n_paths= n_paths;
1495
0
  state->paths= paths;
1496
0
  state->cur_depth= 0;
1497
0
  state->path_depths= path_depths;
1498
0
  return json_find_paths_next(je, state);
1499
0
}
1500
1501
1502
int json_find_paths_next(json_engine_t *je, json_find_paths_t *state)
1503
0
{
1504
0
  uint p_c;
1505
0
  int path_found, no_match_found;
1506
0
  do
1507
0
  {
1508
0
    switch (je->state)
1509
0
    {
1510
0
    case JST_KEY:
1511
0
      path_found= FALSE;
1512
0
      no_match_found= TRUE;
1513
0
      for (p_c=0; p_c < state->n_paths; p_c++)
1514
0
      {
1515
0
        json_path_step_t *cur_step;
1516
0
        if (state->path_depths[p_c] <
1517
0
              state->cur_depth /* Path already failed. */ ||
1518
0
            !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1519
0
              JSON_PATH_KEY))
1520
0
          continue;
1521
1522
0
        if (!(cur_step->type & JSON_PATH_WILD))
1523
0
        {
1524
0
          json_string_t key_name;
1525
0
          json_string_setup(&key_name, state->paths[p_c].s.cs,
1526
0
                            cur_step->key, cur_step->key_end);
1527
0
          if (!json_key_matches(je, &key_name))
1528
0
            continue;
1529
0
        }
1530
0
        if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1531
0
          path_found= TRUE;
1532
0
        else
1533
0
        {
1534
0
          no_match_found= FALSE;
1535
0
          state->path_depths[p_c]= state->cur_depth + 1;
1536
0
        }
1537
0
      }
1538
0
      if (path_found)
1539
        /* Return the result. */
1540
0
        goto exit;
1541
0
      if (no_match_found)
1542
0
      {
1543
        /* No possible paths left to check. Just skip the level. */
1544
0
        if (json_skip_level(je))
1545
0
          goto exit;
1546
0
      }
1547
1548
0
      break;
1549
0
    case JST_VALUE:
1550
0
      path_found= FALSE;
1551
0
      no_match_found= TRUE;
1552
0
      for (p_c=0; p_c < state->n_paths; p_c++)
1553
0
      {
1554
0
        json_path_step_t *cur_step;
1555
0
        if (state->path_depths[p_c]< state->cur_depth /* Path already failed. */ ||
1556
0
            !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1557
0
              JSON_PATH_ARRAY))
1558
0
          continue;
1559
0
        if (cur_step->type & JSON_PATH_WILD ||
1560
0
            cur_step->n_item == state->array_counters[state->cur_depth])
1561
0
        {
1562
          /* Array item matches. */
1563
0
          if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1564
0
            path_found= TRUE;
1565
0
          else
1566
0
          {
1567
0
            no_match_found= FALSE;
1568
0
            state->path_depths[p_c]= state->cur_depth + 1;
1569
0
          }
1570
0
        }
1571
0
      }
1572
1573
0
      if (path_found)
1574
0
        goto exit;
1575
1576
0
      if (no_match_found)
1577
0
        json_skip_array_item(je);
1578
1579
0
      state->array_counters[state->cur_depth]++;
1580
0
      break;
1581
0
    case JST_OBJ_START:
1582
0
    case JST_ARRAY_START:
1583
0
      for (p_c=0; p_c < state->n_paths; p_c++)
1584
0
      {
1585
0
        if (state->path_depths[p_c] < state->cur_depth)
1586
          /* Path already failed. */
1587
0
          continue;
1588
0
        if (state->paths[p_c].steps[state->cur_depth].type &
1589
0
            ((je->state == JST_OBJ_START) ? JSON_PATH_KEY : JSON_PATH_ARRAY))
1590
0
          state->path_depths[p_c]++;
1591
0
      }
1592
0
      state->cur_depth++;
1593
0
      break;
1594
0
    case JST_OBJ_END:
1595
0
    case JST_ARRAY_END:
1596
0
      for (p_c=0; p_c < state->n_paths; p_c++)
1597
0
      {
1598
0
        if (state->path_depths[p_c] < state->cur_depth)
1599
0
          continue;
1600
0
        state->path_depths[p_c]--;
1601
0
      }
1602
0
      state->cur_depth--;
1603
0
      break;
1604
0
    default:
1605
0
      DBUG_ASSERT(0);
1606
0
      break;
1607
0
    };
1608
0
  } while (json_scan_next(je) == 0);
1609
1610
  /* No luck. */
1611
0
  return 1;
1612
1613
0
exit:
1614
0
  return je->s.error;
1615
0
}
1616
1617
1618
int json_append_ascii(CHARSET_INFO *json_cs,
1619
                      uchar *json, uchar *json_end,
1620
                      const uchar *ascii, const uchar *ascii_end)
1621
0
{
1622
0
  const uchar *json_start= json;
1623
0
  while (ascii < ascii_end)
1624
0
  {
1625
0
    int c_len;
1626
0
    if ((c_len= my_ci_wc_mb(json_cs, (my_wc_t) *ascii, json, json_end)) > 0)
1627
0
    {
1628
0
      json+= c_len;
1629
0
      ascii++;
1630
0
      continue;
1631
0
    }
1632
1633
    /* Error return. */
1634
0
    return c_len;
1635
0
  }
1636
1637
0
  return (int)(json - json_start);
1638
0
}
1639
1640
1641
int json_unescape(CHARSET_INFO *json_cs,
1642
                  const uchar *json_str, const uchar *json_end,
1643
                  CHARSET_INFO *res_cs, uchar *res, uchar *res_end)
1644
0
{
1645
0
  json_string_t s;
1646
0
  const uchar *res_b= res;
1647
1648
0
  json_string_setup(&s, json_cs, json_str, json_end);
1649
0
  while (json_read_string_const_chr(&s) == 0)
1650
0
  {
1651
0
    int c_len;
1652
0
    if ((c_len= my_ci_wc_mb(res_cs, s.c_next, res, res_end)) > 0)
1653
0
    {
1654
0
      res+= c_len;
1655
0
      continue;
1656
0
    }
1657
0
    if (c_len == MY_CS_ILUNI)
1658
0
    {
1659
      /*
1660
        Result charset doesn't support the json's character.
1661
        Let's replace it with the '?' symbol.
1662
      */
1663
0
      if ((c_len= my_ci_wc_mb(res_cs, '?', res, res_end)) > 0)
1664
0
      {
1665
0
        res+= c_len;
1666
0
        continue;
1667
0
      }
1668
0
    }
1669
    /* Result buffer is too small. */
1670
0
    return -1;
1671
0
  }
1672
1673
0
  return s.error==JE_EOS ? (int)(res - res_b) : -1;
1674
0
}
1675
1676
1677
/* When we need to replace a character with the escaping. */
1678
enum json_esc_char_classes {
1679
  ESC_= 0,    /* No need to escape. */
1680
  ESC_U= 'u', /* Character not allowed in JSON. Always escape as \uXXXX. */
1681
  ESC_B= 'b', /* Backspace. Escape as \b */
1682
  ESC_F= 'f', /* Formfeed. Escape as \f */
1683
  ESC_N= 'n', /* Newline. Escape as \n */
1684
  ESC_R= 'r', /* Return. Escape as \r */
1685
  ESC_T= 't', /* Tab. Escape as \s */
1686
  ESC_BS= '\\'  /* Backslash or '"'. Escape by the \\ prefix. */
1687
};
1688
1689
1690
/* This specifies how we should escape the character. */
1691
static enum json_esc_char_classes json_escape_chr_map[0x60] = {
1692
  ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1693
  ESC_B,   ESC_T,   ESC_N,   ESC_U,   ESC_F,   ESC_R,   ESC_U,   ESC_U,
1694
  ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1695
  ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1696
1697
  ESC_,    ESC_,    ESC_BS,  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1698
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1699
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1700
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1701
1702
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1703
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1704
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1705
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_BS,  ESC_,    ESC_,    ESC_,
1706
};
1707
1708
1709
static const char hexconv[16] = "0123456789ABCDEF";
1710
1711
1712
int json_escape(CHARSET_INFO *str_cs,
1713
                const uchar *str, const uchar *str_end,
1714
                CHARSET_INFO *json_cs, uchar *json, uchar *json_end)
1715
0
{
1716
0
  const uchar *json_start= json;
1717
1718
0
  while (str < str_end)
1719
0
  {
1720
0
    my_wc_t c_chr;
1721
0
    int c_len;
1722
0
    if ((c_len= my_ci_mb_wc(str_cs, &c_chr, str, str_end)) > 0)
1723
0
    {
1724
0
      enum json_esc_char_classes c_class;
1725
      
1726
0
      str+= c_len;
1727
0
      if (c_chr >= 0x60 || (c_class= json_escape_chr_map[c_chr]) == ESC_)
1728
0
      {
1729
0
        if ((c_len= my_ci_wc_mb(json_cs, c_chr, json, json_end)) > 0)
1730
0
        {
1731
0
          json+= c_len;
1732
0
          continue;
1733
0
        }
1734
0
        if (c_len < 0)
1735
0
        {
1736
          /* JSON buffer is depleted. */
1737
0
          return JSON_ERROR_OUT_OF_SPACE;
1738
0
        }
1739
1740
        /* JSON charset cannot convert this character. */
1741
0
        c_class= ESC_U;
1742
0
      }
1743
1744
0
      if ((c_len= my_ci_wc_mb(json_cs, '\\', json, json_end)) <= 0 ||
1745
0
          (c_len= my_ci_wc_mb(json_cs, (c_class == ESC_BS) ? c_chr : c_class,
1746
0
                                       json+= c_len, json_end)) <= 0)
1747
0
      {
1748
        /* JSON buffer is depleted. */
1749
0
        return JSON_ERROR_OUT_OF_SPACE;
1750
0
      }
1751
0
      json+= c_len;
1752
1753
0
      if (c_class != ESC_U)
1754
0
        continue;
1755
1756
0
      {
1757
        /* We have to use /uXXXX escaping. */
1758
0
        uchar utf16buf[4];
1759
0
        uchar code_str[8];
1760
0
        int u_len= my_uni_utf16(0, c_chr, utf16buf, utf16buf + 4);
1761
1762
0
        code_str[0]= hexconv[utf16buf[0] >> 4];
1763
0
        code_str[1]= hexconv[utf16buf[0] & 15];
1764
0
        code_str[2]= hexconv[utf16buf[1] >> 4];
1765
0
        code_str[3]= hexconv[utf16buf[1] & 15];
1766
1767
0
        if (u_len > 2)
1768
0
        {
1769
0
          code_str[4]= hexconv[utf16buf[2] >> 4];
1770
0
          code_str[5]= hexconv[utf16buf[2] & 15];
1771
0
          code_str[6]= hexconv[utf16buf[3] >> 4];
1772
0
          code_str[7]= hexconv[utf16buf[3] & 15];
1773
0
        }
1774
        
1775
0
        if ((c_len= json_append_ascii(json_cs, json, json_end,
1776
0
                                      code_str, code_str+u_len*2)) > 0)
1777
0
        {
1778
0
          json+= c_len;
1779
0
          continue;
1780
0
        }
1781
        /* JSON buffer is depleted. */
1782
0
        return JSON_ERROR_OUT_OF_SPACE;
1783
0
      }
1784
0
    }
1785
0
    else /* c_len == 0, an illegal symbol. */
1786
0
      return JSON_ERROR_ILLEGAL_SYMBOL;
1787
0
  }
1788
1789
0
  return (int)(json - json_start);
1790
0
}
1791
1792
1793
int json_get_path_start(json_engine_t *je, CHARSET_INFO *i_cs,
1794
                        const uchar *str, const uchar *end,
1795
                        json_path_t *p)
1796
0
{
1797
0
  json_scan_start(je, i_cs, str, end);
1798
0
  p->last_step= p->steps - 1; 
1799
0
  return 0;
1800
0
}
1801
1802
1803
int json_get_path_next(json_engine_t *je, json_path_t *p)
1804
0
{
1805
0
  if (p->last_step < p->steps)
1806
0
  {
1807
0
    if (json_read_value(je))
1808
0
      return 1;
1809
1810
0
    p->last_step= p->steps;
1811
0
    p->steps[0].type= JSON_PATH_ARRAY_WILD;
1812
0
    p->steps[0].n_item= 0;
1813
0
    return 0;
1814
0
  }
1815
0
  else
1816
0
  {
1817
0
    if (json_value_scalar(je))
1818
0
    {
1819
0
      if (p->last_step->type & JSON_PATH_ARRAY)
1820
0
        p->last_step->n_item++;
1821
0
    }
1822
0
    else
1823
0
    {
1824
0
      p->last_step++;
1825
0
      p->last_step->type= (enum json_path_step_types) je->value_type;
1826
0
      p->last_step->n_item= 0;
1827
0
    }
1828
1829
0
    if (json_scan_next(je))
1830
0
      return 1;
1831
0
  }
1832
1833
0
  do
1834
0
  {
1835
0
    switch (je->state)
1836
0
    {
1837
0
    case JST_KEY:
1838
0
      p->last_step->key= je->s.c_str;
1839
0
      do
1840
0
      {
1841
0
        p->last_step->key_end= je->s.c_str;
1842
0
      } while (json_read_keyname_chr(je) == 0);
1843
0
      if (je->s.error)
1844
0
        return 1;
1845
      /* Now we have je.state == JST_VALUE, so let's handle it. */
1846
1847
      /* fall through */
1848
0
    case JST_VALUE:
1849
0
      if (json_read_value(je))
1850
0
        return 1;
1851
0
      return 0;
1852
0
    case JST_OBJ_END:
1853
0
    case JST_ARRAY_END:
1854
0
      p->last_step--;
1855
0
      if (p->last_step->type & JSON_PATH_ARRAY)
1856
0
        p->last_step->n_item++;
1857
0
      break;
1858
0
    default:
1859
0
      break;
1860
0
    }
1861
0
  } while (json_scan_next(je) == 0);
1862
1863
0
  return 1;
1864
0
}
1865
1866
1867
static enum json_types smart_read_value(json_engine_t *je,
1868
                                        const char **value, int *value_len)
1869
897
{
1870
897
  if (json_read_value(je))
1871
431
    goto err_return;
1872
1873
466
  *value= (char *) je->value;
1874
1875
466
  if (json_value_scalar(je))
1876
42
    *value_len= je->value_len;
1877
424
  else
1878
424
  {
1879
424
    if (json_skip_level(je))
1880
403
      goto err_return;
1881
1882
21
    *value_len= (int) ((char *) je->s.c_str - *value);
1883
21
  }
1884
1885
63
  compile_time_assert((int) JSON_VALUE_OBJECT == (int) JSV_OBJECT);
1886
63
  compile_time_assert((int) JSON_VALUE_ARRAY == (int) JSV_ARRAY);
1887
63
  compile_time_assert((int) JSON_VALUE_STRING == (int) JSV_STRING);
1888
63
  compile_time_assert((int) JSON_VALUE_NUMBER == (int) JSV_NUMBER);
1889
63
  compile_time_assert((int) JSON_VALUE_TRUE == (int) JSV_TRUE);
1890
63
  compile_time_assert((int) JSON_VALUE_FALSE == (int) JSV_FALSE);
1891
63
  compile_time_assert((int) JSON_VALUE_NULL == (int) JSV_NULL);
1892
1893
63
  return (enum json_types) je->value_type;
1894
1895
834
err_return:
1896
834
  return JSV_BAD_JSON;
1897
466
}
1898
1899
1900
enum json_types json_type(const char *js, const char *js_end,
1901
                          const char **value, int *value_len)
1902
0
{
1903
0
  json_engine_t je;
1904
1905
0
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1906
0
                  (const uchar *) js_end);
1907
1908
0
  return smart_read_value(&je, value, value_len);
1909
0
}
1910
1911
1912
enum json_types json_get_array_item(const char *js, const char *js_end,
1913
                                    int n_item,
1914
                                    const char **value, int *value_len)
1915
0
{
1916
0
  json_engine_t je;
1917
0
  int c_item= 0;
1918
1919
0
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1920
0
                  (const uchar *) js_end);
1921
1922
0
  if (json_read_value(&je) ||
1923
0
      je.value_type != JSON_VALUE_ARRAY)
1924
0
    goto err_return;
1925
1926
0
  while (!json_scan_next(&je))
1927
0
  {
1928
0
    switch (je.state)
1929
0
    {
1930
0
    case JST_VALUE:
1931
0
      if (c_item == n_item)
1932
0
        return smart_read_value(&je, value, value_len);
1933
1934
0
      if (json_skip_key(&je))
1935
0
        goto err_return;
1936
1937
0
      c_item++;
1938
0
      break;
1939
1940
0
    case JST_ARRAY_END:
1941
0
      *value= (const char *) (je.s.c_str - je.sav_c_len);
1942
0
      *value_len= c_item;
1943
0
      return JSV_NOTHING;
1944
0
    }
1945
0
  }
1946
1947
0
err_return:
1948
0
  return JSV_BAD_JSON;
1949
0
}
1950
1951
1952
/** Simple json lookup for a value by the key.
1953
1954
  Expects JSON object.
1955
  Only scans the 'first level' of the object, not
1956
  the nested structures.
1957
1958
  @param js          [in]       json object to search in
1959
  @param js_end      [in]       end of json string
1960
  @param key         [in]       key to search for
1961
  @param key_end     [in]         - " -
1962
  @param value_start [out]      pointer into js (value or closing })
1963
  @param value_len   [out]      length of the value found or number of keys
1964
1965
  @retval the type of the key value
1966
  @retval JSV_BAD_JSON - syntax error found reading JSON.
1967
                         or not JSON object.
1968
  @retval JSV_NOTHING - no such key found.
1969
*/
1970
enum json_types json_get_object_key(const char *js, const char *js_end,
1971
                                    const char *key,
1972
                                    const char **value, int *value_len)
1973
2.13k
{
1974
2.13k
  const char *key_end= key + strlen(key);
1975
2.13k
  json_engine_t je;
1976
2.13k
  json_string_t key_name;
1977
2.13k
  int n_keys= 0;
1978
1979
2.13k
  json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
1980
1981
2.13k
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1982
2.13k
                  (const uchar *) js_end);
1983
1984
2.13k
  if (json_read_value(&je) ||
1985
2.13k
      je.value_type != JSON_VALUE_OBJECT)
1986
326
    goto err_return;
1987
1988
7.83k
  while (!json_scan_next(&je))
1989
7.67k
  {
1990
7.67k
    switch (je.state)
1991
7.67k
    {
1992
7.66k
    case JST_KEY:
1993
7.66k
      n_keys++;
1994
7.66k
      json_string_set_str(&key_name, (const uchar *) key,
1995
7.66k
                          (const uchar *) key_end);
1996
7.66k
      if (json_key_matches(&je, &key_name))
1997
897
        return smart_read_value(&je, value, value_len);
1998
1999
6.76k
      if (json_skip_key(&je))
2000
732
        goto err_return;
2001
2002
6.03k
      break;
2003
2004
6.03k
    case JST_OBJ_END:
2005
10
      *value= (const char *) (je.s.c_str - je.sav_c_len);
2006
10
      *value_len= n_keys;
2007
10
      return JSV_NOTHING;
2008
7.67k
    }
2009
7.67k
  }
2010
2011
1.22k
err_return:
2012
1.22k
  return JSV_BAD_JSON;
2013
1.80k
}
2014
2015
2016
enum json_types json_get_object_nkey(const char *js __attribute__((unused)),
2017
                                     const char *js_end __attribute__((unused)),
2018
                                     int nkey __attribute__((unused)),
2019
                                     const char **keyname __attribute__((unused)),
2020
                                     const char **keyname_end __attribute__((unused)),
2021
                                     const char **value __attribute__((unused)),
2022
                                     int *value_len __attribute__((unused)))
2023
0
{
2024
0
  return JSV_NOTHING;
2025
0
}
2026
2027
2028
/** Check if json is valid (well-formed)
2029
2030
  @retval 0 - success, json is well-formed
2031
  @retval 1 - error, json is invalid
2032
*/
2033
int json_valid(const char *js, size_t js_len, CHARSET_INFO *cs)
2034
0
{
2035
0
  json_engine_t je;
2036
0
  json_scan_start(&je, cs, (const uchar *) js, (const uchar *) js + js_len);
2037
0
  while (json_scan_next(&je) == 0) /* no-op */ ;
2038
0
  return je.s.error == 0;
2039
0
}
2040
2041
2042
/*
2043
  Expects the JSON object as an js argument, and the key name.
2044
  Looks for this key in the object and returns
2045
  the location of all the text related to it.
2046
  The text includes the comma, separating this key.
2047
2048
  comma_pos - the hint where the comma is. It is important
2049
       if you plan to replace the key rather than just cut.
2050
    1  - comma is on the left
2051
    2  - comma is on the right.
2052
    0  - no comma at all (the object has just this single key)
2053
 
2054
  if no such key found *key_start is set to NULL.
2055
*/
2056
int json_locate_key(const char *js, const char *js_end,
2057
                    const char *kname,
2058
                    const char **key_start, const char **key_end,
2059
                    int *comma_pos)
2060
2.13k
{
2061
2.13k
  const char *kname_end= kname + strlen(kname);
2062
2.13k
  json_engine_t je;
2063
2.13k
  json_string_t key_name;
2064
2.13k
  int t_next, c_len, match_result;
2065
2066
2.13k
  json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
2067
2068
2.13k
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
2069
2.13k
                  (const uchar *) js_end);
2070
2071
2.13k
  if (json_read_value(&je) ||
2072
2.13k
      je.value_type != JSON_VALUE_OBJECT)
2073
326
    goto err_return;
2074
2075
1.80k
  *key_start= (const char *) je.s.c_str;
2076
1.80k
  *comma_pos= 0;
2077
2078
7.83k
  while (!json_scan_next(&je))
2079
7.67k
  {
2080
7.67k
    switch (je.state)
2081
7.67k
    {
2082
7.66k
    case JST_KEY:
2083
7.66k
      json_string_set_str(&key_name, (const uchar *) kname,
2084
7.66k
                          (const uchar *) kname_end);
2085
7.66k
      match_result= json_key_matches(&je, &key_name);
2086
7.66k
      if (json_skip_key(&je))
2087
1.56k
        goto err_return;
2088
6.09k
      get_first_nonspace(&je.s, &t_next, &c_len);
2089
6.09k
      je.s.c_str-= c_len;
2090
2091
6.09k
      if (match_result)
2092
63
      {
2093
63
        *key_end= (const char *) je.s.c_str;
2094
2095
63
        if (*comma_pos == 1)
2096
2
          return 0;
2097
2098
61
        DBUG_ASSERT(*comma_pos == 0);
2099
2100
61
        if (t_next == C_COMMA)
2101
1
        {
2102
1
          *key_end+= c_len;
2103
1
          *comma_pos= 2;
2104
1
        }
2105
60
        else if (t_next == C_RCURB)
2106
1
          *comma_pos= 0;
2107
59
        else
2108
59
          goto err_return;
2109
2
        return 0;
2110
61
      }
2111
2112
6.03k
      *key_start= (const char *) je.s.c_str;
2113
6.03k
      *comma_pos= 1;
2114
6.03k
      break;
2115
2116
10
    case JST_OBJ_END:
2117
10
      *key_start= NULL;
2118
10
      return 0;
2119
7.67k
    }
2120
7.67k
  }
2121
2122
2.11k
err_return:
2123
2.11k
  return 1;
2124
2125
1.80k
}