Coverage Report

Created: 2025-08-28 07:34

/src/server/strings/json_lib.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (c) 2016, 2022, MariaDB Corporation.
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
15
16
#include <my_global.h>
17
#include <string.h>
18
#include <m_ctype.h>
19
#include "json_lib.h"
20
21
/*
22
  JSON escaping lets user specify UTF16 codes of characters.
23
  So we're going to need the UTF16 charset capabilities. Let's import
24
  them from the utf16 charset.
25
*/
26
int my_utf16_uni(CHARSET_INFO *cs,
27
                 my_wc_t *pwc, const uchar *s, const uchar *e);
28
int my_uni_utf16(CHARSET_INFO *cs, my_wc_t wc, uchar *s, uchar *e);
29
30
31
void json_string_set_str(json_string_t *s,
32
                         const uchar *str, const uchar *end)
33
19.3k
{
34
19.3k
  s->c_str= str;
35
19.3k
  s->str_end= end;
36
19.3k
}
37
38
39
void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs)
40
8.44k
{
41
8.44k
  s->cs= i_cs;
42
8.44k
  s->error= 0;
43
8.44k
  s->wc= i_cs->cset->mb_wc;
44
8.44k
}
45
46
47
static void json_string_setup(json_string_t *s,
48
                              CHARSET_INFO *i_cs, const uchar *str,
49
                              const uchar *end)
50
4.22k
{
51
4.22k
  json_string_set_cs(s, i_cs);
52
4.22k
  json_string_set_str(s, str, end);
53
4.22k
}
54
55
56
enum json_char_classes {
57
  C_EOS,    /* end of string */
58
  C_LCURB,  /* {  */
59
  C_RCURB,  /* } */
60
  C_LSQRB,  /* [ */
61
  C_RSQRB,  /* ] */
62
  C_COLON,  /* : */
63
  C_COMMA,  /* , */
64
  C_QUOTE,  /* " */
65
  C_DIGIT,  /* -0123456789 */
66
  C_LOW_F,  /* 'f' (for "false") */
67
  C_LOW_N,  /* 'n' (for "null") */
68
  C_LOW_T,  /* 't' (for "true") */
69
  C_ETC,    /* everything else */
70
  C_ERR,    /* character disallowed in JSON */
71
  C_BAD,    /* invalid character, charset handler cannot read it */
72
  NR_C_CLASSES, /* Counter for classes that handled with functions. */
73
  C_SPACE   /* space. Doesn't need specific handlers, so after the counter.*/
74
};
75
76
77
/*
78
  This array maps first 128 Unicode Code Points into classes.
79
  The remaining Unicode characters should be mapped to C_ETC.
80
*/
81
82
static enum json_char_classes json_chr_map[128] = {
83
  C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
84
  C_ERR,   C_SPACE, C_SPACE, C_ERR,   C_ERR,   C_SPACE, C_ERR,   C_ERR,
85
  C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
86
  C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
87
88
  C_SPACE, C_ETC,   C_QUOTE, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
89
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_COMMA, C_DIGIT, C_ETC,   C_ETC,
90
  C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
91
  C_DIGIT, C_DIGIT, C_COLON, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
92
93
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
94
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
95
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
96
  C_ETC,   C_ETC,   C_ETC,   C_LSQRB, C_ETC,   C_RSQRB, C_ETC,   C_ETC,
97
98
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_F, C_ETC,
99
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_N, C_ETC,
100
  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_T, C_ETC,   C_ETC,   C_ETC,
101
  C_ETC,   C_ETC,   C_ETC,   C_LCURB, C_ETC,   C_RCURB, C_ETC,   C_ETC
102
};
103
104
105
/*
106
  JSON parser actually has more states than the 'enum json_states'
107
  declares. But the rest of the states aren't seen to the user so let's
108
  specify them here to avoid confusion.
109
*/
110
111
enum json_all_states {
112
  JST_DONE= NR_JSON_USER_STATES,         /* ok to finish     */
113
  JST_OBJ_CONT= NR_JSON_USER_STATES+1,   /* object continues */
114
  JST_ARRAY_CONT= NR_JSON_USER_STATES+2, /* array continues  */
115
  JST_READ_VALUE= NR_JSON_USER_STATES+3, /* value is being read */
116
  NR_JSON_STATES= NR_JSON_USER_STATES+4
117
};
118
119
120
typedef int (*json_state_handler)(json_engine_t *);
121
122
123
/* The string is broken. */
124
static int unexpected_eos(json_engine_t *j)
125
524
{
126
524
  j->s.error= JE_EOS;
127
524
  return 1;
128
524
}
129
130
131
/* This symbol here breaks the JSON syntax. */
132
static int syntax_error(json_engine_t *j)
133
466
{
134
466
  j->s.error= JE_SYN;
135
466
  return 1;
136
466
}
137
138
139
/* Value of object. */
140
static int mark_object(json_engine_t *j)
141
1.58k
{
142
1.58k
  j->state= JST_OBJ_START;
143
1.58k
  if (++j->stack_p < JSON_DEPTH_LIMIT)
144
1.58k
  {
145
1.58k
    j->stack[j->stack_p]= JST_OBJ_CONT;
146
1.58k
    return 0;
147
1.58k
  }
148
4
  j->s.error= JE_DEPTH;
149
4
  return 1;
150
1.58k
}
151
152
153
/* Read value of object. */
154
static int read_obj(json_engine_t *j)
155
4.34k
{
156
4.34k
  j->state= JST_OBJ_START;
157
4.34k
  j->value_type= JSON_VALUE_OBJECT;
158
4.34k
  j->value= j->value_begin;
159
4.34k
  if (++j->stack_p < JSON_DEPTH_LIMIT)
160
4.34k
  {
161
4.34k
    j->stack[j->stack_p]= JST_OBJ_CONT;
162
4.34k
    return 0;
163
4.34k
  }
164
0
  j->s.error= JE_DEPTH;
165
0
  return 1;
166
4.34k
}
167
168
169
/* Value of array. */
170
static int mark_array(json_engine_t *j)
171
1.41k
{
172
1.41k
  j->state= JST_ARRAY_START;
173
1.41k
  if (++j->stack_p < JSON_DEPTH_LIMIT)
174
1.41k
  {
175
1.41k
    j->stack[j->stack_p]= JST_ARRAY_CONT;
176
1.41k
    j->value= j->value_begin;
177
1.41k
    return 0;
178
1.41k
  }
179
4
  j->s.error= JE_DEPTH;
180
4
  return 1;
181
1.41k
}
182
183
/* Read value of object. */
184
static int read_array(json_engine_t *j)
185
1.44k
{
186
1.44k
  j->state= JST_ARRAY_START;
187
1.44k
  j->value_type= JSON_VALUE_ARRAY;
188
1.44k
  j->value= j->value_begin;
189
1.44k
  if (++j->stack_p < JSON_DEPTH_LIMIT)
190
1.44k
  {
191
1.44k
    j->stack[j->stack_p]= JST_ARRAY_CONT;
192
1.44k
    return 0;
193
1.44k
  }
194
0
  j->s.error= JE_DEPTH;
195
0
  return 1;
196
1.44k
}
197
198
199
200
/*
201
  Character classes inside the JSON string constant.
202
  We mostly need this to parse escaping properly.
203
  Escapings available in JSON are:
204
  \" - quotation mark
205
  \\ - backslash
206
  \b - backspace UNICODE 8
207
  \f - formfeed UNICODE 12
208
  \n - newline UNICODE 10
209
  \r - carriage return UNICODE 13
210
  \t - horizontal tab UNICODE 9
211
  \u{four-hex-digits} - code in UCS16 character set
212
*/
213
enum json_string_char_classes {
214
  S_0= 0,
215
  S_1= 1,
216
  S_2= 2,
217
  S_3= 3,
218
  S_4= 4,
219
  S_5= 5,
220
  S_6= 6,
221
  S_7= 7,
222
  S_8= 8,
223
  S_9= 9,
224
  S_A= 10,
225
  S_B= 11,
226
  S_C= 12,
227
  S_D= 13,
228
  S_E= 14,
229
  S_F= 15,
230
  S_ETC= 36,    /* rest of characters. */
231
  S_QUOTE= 37,
232
  S_BKSL= 38, /* \ */
233
  S_ERR= 100,   /* disallowed */
234
};
235
236
237
/* This maps characters to their types inside a string constant. */
238
static enum json_string_char_classes json_instr_chr_map[128] = {
239
  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
240
  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
241
  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
242
  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
243
244
  S_ETC,   S_ETC,   S_QUOTE, S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
245
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
246
  S_0,     S_1,     S_2,     S_3,     S_4,     S_5,     S_6,     S_7,
247
  S_8,     S_9,     S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
248
249
  S_ETC,   S_A,     S_B,     S_C,     S_D,     S_E,     S_F,     S_ETC,
250
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
251
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
252
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_BKSL,  S_ETC,   S_ETC,   S_ETC,
253
254
  S_ETC,   S_A,     S_B,     S_C,     S_D,     S_E,     S_F,     S_ETC,
255
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
256
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
257
  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC
258
};
259
260
261
static int read_4_hexdigits(json_string_t *s, uchar *dest)
262
8.64k
{
263
8.64k
  int i, t, c_len;
264
31.3k
  for (i=0; i<4; i++)
265
27.4k
  {
266
27.4k
    if ((c_len= json_next_char(s)) <= 0)
267
2.64k
      return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
268
269
24.7k
    if (s->c_next >= 128 || (t= json_instr_chr_map[s->c_next]) > S_F)
270
2.02k
      return s->error= JE_SYN;
271
272
22.7k
    s->c_str+= c_len;
273
22.7k
    dest[i/2]+= (i % 2) ? t : t*16;
274
22.7k
  }
275
3.97k
  return 0;
276
8.64k
}
277
278
279
static int json_handle_esc(json_string_t *s)
280
12.9k
{
281
12.9k
  int t, c_len;
282
  
283
12.9k
  if ((c_len= json_next_char(s)) <= 0)
284
540
    return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
285
286
12.4k
  s->c_str+= c_len;
287
12.4k
  switch (s->c_next)
288
12.4k
  {
289
698
    case 'b':
290
698
      s->c_next= 8;
291
698
      return 0;
292
216
    case 'f':
293
216
      s->c_next= 12;
294
216
      return 0;
295
844
    case 'n':
296
844
      s->c_next= 10;
297
844
      return 0;
298
234
    case 'r':
299
234
      s->c_next= 13;
300
234
      return 0;
301
208
    case 't':
302
208
      s->c_next= 9;
303
208
      return 0;
304
12.4k
  }
305
306
10.2k
  if (s->c_next < 128 && (t= json_instr_chr_map[s->c_next]) == S_ERR)
307
236
  {
308
236
    s->c_str-= c_len;
309
236
    return s->error= JE_ESCAPING;
310
236
  }
311
312
313
9.99k
  if (s->c_next != 'u')
314
2.35k
    return 0;
315
316
7.63k
  {
317
    /*
318
      Read the four-hex-digits code.
319
      If symbol is not in the Basic Multilingual Plane, we're reading
320
      the string for the next four digits to compose the UTF-16 surrogate pair.
321
    */
322
7.63k
    uchar code[4]= {0,0,0,0};
323
324
7.63k
    if (read_4_hexdigits(s, code))
325
4.22k
      return 1;
326
327
3.41k
    if ((c_len= my_utf16_uni(0, &s->c_next, code, code+2)) == 2)
328
684
      return 0;
329
330
2.73k
    if (c_len != MY_CS_TOOSMALL4)
331
194
      return s->error= JE_BAD_CHR;
332
333
2.54k
    if ((c_len= json_next_char(s)) <= 0)
334
702
      return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
335
1.83k
    if (s->c_next != '\\')
336
276
      return s->error= JE_SYN;
337
338
1.56k
    s->c_str+= c_len;
339
1.56k
    if ((c_len= json_next_char(s)) <= 0)
340
294
      return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
341
1.26k
    if (s->c_next != 'u')
342
260
      return s->error= JE_SYN;
343
1.00k
    s->c_str+= c_len;
344
345
1.00k
    if (read_4_hexdigits(s, code+2))
346
450
      return 1;
347
348
558
    if ((c_len= my_utf16_uni(0, &s->c_next, code, code+4)) == 4)
349
346
      return 0;
350
558
  }
351
212
  return s->error= JE_BAD_CHR;
352
558
}
353
354
355
int json_read_string_const_chr(json_string_t *js)
356
16.7k
{
357
16.7k
  int c_len;
358
359
16.7k
  if ((c_len= json_next_char(js)) > 0)
360
15.3k
  {
361
15.3k
    js->c_str+= c_len;
362
15.3k
    return (js->c_next == '\\') ? json_handle_esc(js) : 0;
363
15.3k
  }
364
1.38k
  js->error= json_eos(js) ? JE_EOS : JE_BAD_CHR; 
365
1.38k
  return 1;
366
16.7k
}
367
368
369
static int skip_str_constant(json_engine_t *j)
370
1.03k
{
371
1.03k
  int t, c_len;
372
1.03k
  for (;;)
373
1.88k
  {
374
1.88k
    if ((c_len= json_next_char(&j->s)) > 0)
375
1.75k
    {
376
1.75k
      j->s.c_str+= c_len;
377
1.75k
      if (j->s.c_next >= 128 || ((t=json_instr_chr_map[j->s.c_next]) <= S_ETC))
378
658
        continue;
379
380
1.10k
      if (j->s.c_next == '"')
381
876
        break;
382
224
      if (j->s.c_next == '\\')
383
204
      {
384
204
        j->value_escaped= 1;
385
204
        if (json_handle_esc(&j->s))
386
14
          return 1;
387
190
        continue;
388
204
      }
389
      /* Symbol not allowed in JSON. */
390
20
      return j->s.error= JE_NOT_JSON_CHR;
391
224
    }
392
124
    else
393
124
      return j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR; 
394
1.88k
  }
395
396
876
  j->state= j->stack[j->stack_p];
397
876
  return 0;
398
1.03k
}
399
400
401
/* Scalar string. */
402
static int v_string(json_engine_t *j)
403
610
{
404
610
  return skip_str_constant(j) || json_scan_next(j);
405
610
}
406
407
408
/* Read scalar string. */
409
static int read_strn(json_engine_t *j)
410
424
{
411
424
  j->value= j->s.c_str;
412
424
  j->value_type= JSON_VALUE_STRING;
413
424
  j->value_escaped= 0;
414
415
424
  if (skip_str_constant(j))
416
140
    return 1;
417
418
284
  j->state= j->stack[j->stack_p];
419
284
  j->value_len= (int)(j->s.c_str - j->value) - j->s.c_next_len;
420
284
  return 0;
421
424
}
422
423
424
/*
425
  We have dedicated parser for numeric constants. It's similar
426
  to the main JSON parser, we similarly define character classes,
427
  map characters to classes and implement the state-per-class
428
  table. Though we don't create functions that handle
429
  particular classes, just specify what new state should parser
430
  get in this case.
431
*/
432
enum json_num_char_classes {
433
  N_MINUS,
434
  N_PLUS,
435
  N_ZERO,
436
  N_DIGIT,
437
  N_POINT,
438
  N_E,
439
  N_END,
440
  N_EEND,
441
  N_ERR,
442
  N_NUM_CLASSES
443
};
444
445
446
static enum json_num_char_classes json_num_chr_map[128] = {
447
  N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
448
  N_ERR,   N_END,   N_END,   N_ERR,   N_ERR,   N_END,   N_ERR,   N_ERR,
449
  N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
450
  N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
451
452
  N_END,   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
453
  N_EEND,  N_EEND,  N_EEND,  N_PLUS,  N_END,   N_MINUS, N_POINT, N_EEND,
454
  N_ZERO,  N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT,
455
  N_DIGIT, N_DIGIT, N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
456
457
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_E,     N_EEND,  N_EEND,
458
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
459
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
460
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_END,   N_EEND,  N_EEND,
461
462
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_E,     N_EEND,  N_EEND,
463
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
464
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
465
  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,   N_END,   N_EEND,  N_EEND,
466
};
467
468
469
enum json_num_states {
470
  NS_OK,  /* Number ended. */
471
  NS_GO,  /* Initial state. */
472
  NS_GO1, /* If the number starts with '-'. */
473
  NS_Z,   /* If the number starts with '0'. */
474
  NS_Z1,  /* If the numbers starts with '-0'. */
475
  NS_INT, /* Integer part. */
476
  NS_FRAC,/* Fractional part. */
477
  NS_EX,  /* Exponential part begins. */
478
  NS_EX1, /* Exponential part continues. */
479
  NS_NUM_STATES
480
};
481
482
483
static int json_num_states[NS_NUM_STATES][N_NUM_CLASSES]=
484
{
485
/*         -        +       0         1..9    POINT    E       END_OK   ERROR */
486
/*OK*/   { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
487
/*GO*/   { NS_GO1,  JE_SYN, NS_Z,     NS_INT, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
488
/*GO1*/  { JE_SYN,  JE_SYN, NS_Z1,    NS_INT, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
489
/*ZERO*/ { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, NS_FRAC, NS_EX,  NS_OK,  JE_BAD_CHR },
490
/*ZE1*/  { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, NS_FRAC, NS_EX,  NS_OK,  JE_BAD_CHR },
491
/*INT*/  { JE_SYN,  JE_SYN, NS_INT,   NS_INT, NS_FRAC, NS_EX,  NS_OK,  JE_BAD_CHR },
492
/*FRAC*/ { JE_SYN,  JE_SYN, NS_FRAC,  NS_FRAC,JE_SYN,  NS_EX,  NS_OK,  JE_BAD_CHR },
493
/*EX*/   { NS_EX,   NS_EX,  NS_EX1,   NS_EX1, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR }, 
494
/*EX1*/  { JE_SYN,  JE_SYN, NS_EX1,   NS_EX1, JE_SYN,  JE_SYN, NS_OK,  JE_BAD_CHR }
495
};
496
497
498
static uint json_num_state_flags[NS_NUM_STATES]=
499
{
500
/*OK*/   0,
501
/*GO*/   0,
502
/*GO1*/  JSON_NUM_NEG,
503
/*ZERO*/ 0,
504
/*ZE1*/  0,
505
/*INT*/  0,
506
/*FRAC*/ JSON_NUM_FRAC_PART,
507
/*EX*/   JSON_NUM_EXP,
508
/*EX1*/  0,
509
};
510
511
512
static int skip_num_constant(json_engine_t *j)
513
11.1k
{
514
11.1k
  int state= json_num_states[NS_GO][json_num_chr_map[j->s.c_next]];
515
11.1k
  int c_len;
516
517
11.1k
  j->num_flags= 0;
518
11.1k
  for (;;)
519
11.5k
  {
520
11.5k
    j->num_flags|= json_num_state_flags[state];
521
11.5k
    if ((c_len= json_next_char(&j->s)) > 0 && j->s.c_next < 128)
522
11.4k
    {
523
11.4k
      if ((state= json_num_states[state][json_num_chr_map[j->s.c_next]]) > 0)
524
406
      {
525
406
        j->s.c_str+= c_len;
526
406
        continue;
527
406
      }
528
11.0k
      break;
529
11.4k
    }
530
531
128
    if ((j->s.error=
532
128
          json_eos(&j->s) ? json_num_states[state][N_END] : JE_BAD_CHR) < 0)
533
64
      return 1;
534
64
    else
535
64
      break;
536
128
  }
537
538
11.1k
  j->state= j->stack[j->stack_p];
539
11.1k
  return 0;
540
11.1k
}
541
542
543
/* Scalar numeric. */
544
static int v_number(json_engine_t *j)
545
1.07k
{
546
1.07k
  return skip_num_constant(j) || json_scan_next(j);
547
1.07k
}
548
549
550
/* Read numeric constant. */
551
static int read_num(json_engine_t *j)
552
10.1k
{
553
10.1k
  j->value= j->value_begin;
554
10.1k
  if (skip_num_constant(j) == 0)
555
10.0k
  {
556
10.0k
    j->value_type= JSON_VALUE_NUMBER;
557
10.0k
    j->value_len= (int)(j->s.c_str - j->value_begin);
558
10.0k
    return 0;
559
10.0k
  }
560
34
  return 1;
561
10.1k
}
562
563
564
/* Check that the JSON string matches the argument and skip it. */
565
static int skip_string_verbatim(json_string_t *s, const char *str)
566
3.52k
{
567
3.52k
  int c_len;
568
13.1k
  while (*str)
569
10.5k
  {
570
10.5k
    if ((c_len= json_next_char(s)) > 0)
571
10.3k
    {
572
10.3k
      if (s->c_next == (my_wc_t) *(str++))
573
9.62k
      {
574
9.62k
        s->c_str+= c_len;
575
9.62k
        continue;
576
9.62k
      }
577
726
      return s->error= JE_SYN;
578
10.3k
    }
579
210
    return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR; 
580
10.5k
  }
581
582
2.58k
  return 0;
583
3.52k
}
584
585
586
/* Scalar false. */
587
static int v_false(json_engine_t *j)
588
672
{
589
672
  if (skip_string_verbatim(&j->s, "alse"))
590
222
   return 1;
591
450
  j->state= j->stack[j->stack_p];
592
450
  return json_scan_next(j);
593
672
}
594
595
596
/* Scalar null. */
597
static int v_null(json_engine_t *j)
598
744
{
599
744
  if (skip_string_verbatim(&j->s, "ull"))
600
178
   return 1;
601
566
  j->state= j->stack[j->stack_p];
602
566
  return json_scan_next(j);
603
744
}
604
605
606
/* Scalar true. */
607
static int v_true(json_engine_t *j)
608
642
{
609
642
  if (skip_string_verbatim(&j->s, "rue"))
610
176
   return 1;
611
466
  j->state= j->stack[j->stack_p];
612
466
  return json_scan_next(j);
613
642
}
614
615
616
/* Read false. */
617
static int read_false(json_engine_t *j)
618
550
{
619
550
  j->value_type= JSON_VALUE_FALSE;
620
550
  j->value= j->value_begin;
621
550
  j->state= j->stack[j->stack_p];
622
550
  j->value_len= 5;
623
550
  return skip_string_verbatim(&j->s, "alse");
624
550
}
625
626
627
/* Read null. */
628
static int read_null(json_engine_t *j)
629
462
{
630
462
  j->value_type= JSON_VALUE_NULL;
631
462
  j->value= j->value_begin;
632
462
  j->state= j->stack[j->stack_p];
633
462
  j->value_len= 4;
634
462
  return skip_string_verbatim(&j->s, "ull");
635
462
}
636
637
638
/* Read true. */
639
static int read_true(json_engine_t *j)
640
454
{
641
454
  j->value_type= JSON_VALUE_TRUE;
642
454
  j->value= j->value_begin;
643
454
  j->state= j->stack[j->stack_p];
644
454
  j->value_len= 4;
645
454
  return skip_string_verbatim(&j->s, "rue");
646
454
}
647
648
649
/* Disallowed character. */
650
static int not_json_chr(json_engine_t *j)
651
38
{
652
38
  j->s.error= JE_NOT_JSON_CHR;
653
38
  return 1;
654
38
}
655
656
657
/* Bad character. */
658
static int bad_chr(json_engine_t *j)
659
156
{
660
156
  j->s.error= JE_BAD_CHR;
661
156
  return 1;
662
156
}
663
664
665
/* Correct finish. */
666
static int done(json_engine_t *j  __attribute__((unused)))
667
0
{
668
0
  return 1;
669
0
}
670
671
672
/* End of the object. */
673
static int end_object(json_engine_t *j)
674
976
{
675
976
  j->stack_p--;
676
976
  j->state= JST_OBJ_END;
677
976
  return 0;
678
976
}
679
680
681
/* End of the array. */
682
static int end_array(json_engine_t *j)
683
852
{
684
852
  j->stack_p--;
685
852
  j->state= JST_ARRAY_END;
686
852
  return 0;
687
852
}
688
689
690
/* Start reading key name. */
691
static int read_keyname(json_engine_t *j)
692
5.05k
{
693
5.05k
  j->state= JST_KEY;
694
5.05k
  return 0;
695
5.05k
}
696
697
698
static void get_first_nonspace(json_string_t *js, int *t_next, int *c_len)
699
71.3k
{
700
71.3k
  do
701
76.5k
  {
702
76.5k
    if ((*c_len= json_next_char(js)) <= 0)
703
846
      *t_next= json_eos(js) ? C_EOS : C_BAD;
704
75.7k
    else
705
75.7k
    {
706
75.7k
      *t_next= (js->c_next < 128) ? json_chr_map[js->c_next] : C_ETC;
707
75.7k
      js->c_str+= *c_len;
708
75.7k
    }
709
76.5k
  } while (*t_next == C_SPACE);
710
71.3k
}
711
712
713
/* Next key name. */
714
static int next_key(json_engine_t *j)
715
12.4k
{
716
12.4k
  int t_next, c_len;
717
12.4k
  get_first_nonspace(&j->s, &t_next, &c_len);
718
719
12.4k
  if (t_next == C_QUOTE)
720
12.3k
  {
721
12.3k
    j->state= JST_KEY;
722
12.3k
    return 0;
723
12.3k
  }
724
725
90
  j->s.error= (t_next == C_EOS)  ? JE_EOS :
726
90
              ((t_next == C_BAD) ? JE_BAD_CHR :
727
52
                                   JE_SYN);
728
90
  return 1;
729
12.4k
}
730
731
732
/* Forward declarations. */
733
static int skip_colon(json_engine_t *j);
734
static int skip_key(json_engine_t *j);
735
static int struct_end_cb(json_engine_t *j);
736
static int struct_end_qb(json_engine_t *j);
737
static int struct_end_cm(json_engine_t *j);
738
static int struct_end_eos(json_engine_t *j);
739
740
741
static int next_item(json_engine_t *j)
742
2.40k
{
743
2.40k
  j->state= JST_VALUE;
744
2.40k
  return 0;
745
2.40k
}
746
747
748
static int array_item(json_engine_t *j)
749
2.45k
{
750
2.45k
  j->state= JST_VALUE;
751
2.45k
  j->s.c_str-= j->sav_c_len;
752
2.45k
  return 0;
753
2.45k
}
754
755
756
static json_state_handler json_actions[NR_JSON_STATES][NR_C_CLASSES]=
757
/*
758
   EOS              {            }             [             ]
759
   :                ,            "             -0..9         f
760
   n                t              ETC          ERR           BAD
761
*/
762
{
763
  {/*VALUE*/
764
    unexpected_eos, mark_object, syntax_error, mark_array,   syntax_error,
765
    syntax_error,   syntax_error,v_string,     v_number,     v_false,
766
    v_null,         v_true,       syntax_error, not_json_chr, bad_chr},
767
  {/*KEY*/
768
    unexpected_eos, skip_key,    skip_key,     skip_key,     skip_key,
769
    skip_key,       skip_key,    skip_colon,   skip_key,     skip_key,
770
    skip_key,       skip_key,     skip_key,     not_json_chr, bad_chr},
771
  {/*OBJ_START*/
772
    unexpected_eos, syntax_error, end_object,  syntax_error, syntax_error,
773
    syntax_error,   syntax_error, read_keyname, syntax_error, syntax_error,
774
    syntax_error,   syntax_error,   syntax_error,    not_json_chr, bad_chr},
775
  {/*OBJ_END*/
776
    struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb,
777
    syntax_error,   struct_end_cm,syntax_error,  syntax_error, syntax_error,
778
    syntax_error,   syntax_error,  syntax_error,    not_json_chr, bad_chr},
779
  {/*ARRAY_START*/
780
    unexpected_eos, array_item,   syntax_error, array_item,   end_array,
781
    syntax_error,   syntax_error, array_item,  array_item,  array_item,
782
    array_item,    array_item,    syntax_error,    not_json_chr, bad_chr},
783
  {/*ARRAY_END*/
784
    struct_end_eos, syntax_error, struct_end_cb, syntax_error,  struct_end_qb,
785
    syntax_error,   struct_end_cm, syntax_error, syntax_error,  syntax_error,
786
    syntax_error,   syntax_error,  syntax_error,    not_json_chr, bad_chr},
787
  {/*DONE*/
788
    done,           syntax_error, syntax_error, syntax_error, syntax_error,
789
    syntax_error,   syntax_error, syntax_error, syntax_error, syntax_error,
790
    syntax_error,   syntax_error, syntax_error, not_json_chr, bad_chr},
791
  {/*OBJ_CONT*/
792
    unexpected_eos, syntax_error, end_object,    syntax_error,   syntax_error,
793
    syntax_error,   next_key,     syntax_error,  syntax_error,   syntax_error,
794
    syntax_error,    syntax_error,    syntax_error,    not_json_chr, bad_chr},
795
  {/*ARRAY_CONT*/
796
    unexpected_eos, syntax_error, syntax_error,  syntax_error, end_array,
797
    syntax_error,   next_item,    syntax_error,  syntax_error, syntax_error,
798
    syntax_error,    syntax_error,    syntax_error,    not_json_chr, bad_chr},
799
  {/*READ_VALUE*/
800
    unexpected_eos, read_obj,     syntax_error,  read_array,    syntax_error,
801
    syntax_error,   syntax_error, read_strn,     read_num,      read_false,
802
    read_null,      read_true,    syntax_error,    not_json_chr, bad_chr},
803
};
804
805
806
807
int json_scan_start(json_engine_t *je,
808
                    CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
809
4.22k
{
810
4.22k
  static const uchar no_time_to_die= 0;
811
812
4.22k
  json_string_setup(&je->s, i_cs, str, end);
813
4.22k
  je->stack[0]= JST_DONE;
814
4.22k
  je->stack_p= 0;
815
4.22k
  je->state= JST_VALUE;
816
4.22k
  je->killed_ptr = (uchar*)&no_time_to_die;
817
4.22k
  return 0;
818
4.22k
}
819
820
821
/* Skip colon and the value. */
822
static int skip_colon(json_engine_t *j)
823
658
{
824
658
  int t_next, c_len;
825
826
658
  get_first_nonspace(&j->s, &t_next, &c_len);
827
828
658
  if (t_next == C_COLON)
829
570
  {
830
570
    get_first_nonspace(&j->s, &t_next, &c_len);
831
570
    return json_actions[JST_VALUE][t_next](j);
832
570
 }
833
834
88
  j->s.error= (t_next == C_EOS)  ? JE_EOS :
835
88
              ((t_next == C_BAD) ? JE_BAD_CHR:
836
66
                                   JE_SYN);
837
838
88
  return 1;
839
658
}
840
841
842
/* Skip colon and the value. */
843
static int skip_key(json_engine_t *j)
844
1.59k
{
845
1.59k
  int t_next, c_len;
846
847
1.59k
  if (j->s.c_next<128 && json_instr_chr_map[j->s.c_next] == S_BKSL &&
848
1.59k
      json_handle_esc(&j->s))
849
8
    return 1;
850
851
2.26k
  while (json_read_keyname_chr(j) == 0) {}
852
853
1.59k
  if (j->s.error)
854
100
    return 1;
855
856
1.49k
  get_first_nonspace(&j->s, &t_next, &c_len);
857
1.49k
  return json_actions[JST_VALUE][t_next](j);
858
1.59k
}
859
860
861
/*
862
  Handle EOS after the end of an object or array.
863
  To do that we should pop the stack to see if
864
  we are inside an object, or an array, and
865
  run our 'state machine' accordingly.
866
*/
867
static int struct_end_eos(json_engine_t *j)
868
39
{ return json_actions[j->stack[j->stack_p]][C_EOS](j); }
869
870
871
/*
872
  Handle '}' after the end of an object or array.
873
  To do that we should pop the stack to see if
874
  we are inside an object, or an array, and
875
  run our 'state machine' accordingly.
876
*/
877
static int struct_end_cb(json_engine_t *j)
878
256
{ return json_actions[j->stack[j->stack_p]][C_RCURB](j); }
879
880
881
/*
882
  Handle ']' after the end of an object or array.
883
  To do that we should pop the stack to see if
884
  we are inside an object, or an array, and
885
  run our 'state machine' accordingly.
886
*/
887
static int struct_end_qb(json_engine_t *j)
888
542
{ return json_actions[j->stack[j->stack_p]][C_RSQRB](j); }
889
890
891
/*
892
  Handle ',' after the end of an object or array.
893
  To do that we should pop the stack to see if
894
  we are inside an object, or an array, and
895
  run our 'state machine' accordingly.
896
*/
897
static int struct_end_cm(json_engine_t *j)
898
932
{ return json_actions[j->stack[j->stack_p]][C_COMMA](j); }
899
900
901
int json_read_keyname_chr(json_engine_t *j)
902
31.4k
{
903
31.4k
  int c_len, t;
904
905
31.4k
  if ((c_len= json_next_char(&j->s)) > 0)
906
28.9k
  {
907
28.9k
    j->s.c_str+= c_len;
908
28.9k
    if (j->s.c_next>= 128 || (t= json_instr_chr_map[j->s.c_next]) <= S_ETC)
909
12.2k
      return 0;
910
911
16.7k
    switch (t)
912
16.7k
    {
913
15.3k
    case S_QUOTE:
914
15.3k
      for (;;)  /* Skip spaces until ':'. */
915
15.6k
      {
916
15.6k
        if ((c_len= json_next_char(&j->s)) > 0)
917
15.6k
        {
918
15.6k
          if (j->s.c_next == ':')
919
15.2k
          {
920
15.2k
            j->s.c_str+= c_len;
921
15.2k
            j->state= JST_VALUE;
922
15.2k
            return 1;
923
15.2k
          }
924
925
378
          if (j->s.c_next < 128 && json_chr_map[j->s.c_next] == C_SPACE)
926
296
          {
927
296
            j->s.c_str+= c_len;
928
296
            continue;
929
296
          }
930
82
          j->s.error= JE_SYN;
931
82
          break;
932
378
        }
933
24
        j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
934
24
        break;
935
15.6k
      }
936
106
      return 1;
937
1.33k
    case S_BKSL:
938
1.33k
      return json_handle_esc(&j->s);
939
36
    case S_ERR:
940
36
      j->s.c_str-= c_len;
941
36
      j->s.error= JE_STRING_CONST;
942
36
      return 1;
943
16.7k
    }
944
16.7k
  }
945
2.46k
  j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR; 
946
2.46k
  return 1;
947
31.4k
}
948
949
950
int json_read_value(json_engine_t *j)
951
19.3k
{
952
19.3k
  int t_next, c_len, res;
953
954
19.3k
  j->value_type= JSON_VALUE_UNINITIALIZED;
955
19.3k
  if (j->state == JST_KEY)
956
8.65k
  {
957
12.4k
    while (json_read_keyname_chr(j) == 0) {}
958
959
8.65k
    if (j->s.error)
960
1.42k
      return 1;
961
8.65k
  }
962
963
17.9k
  get_first_nonspace(&j->s, &t_next, &c_len);
964
965
17.9k
  j->value_begin= j->s.c_str-c_len;
966
17.9k
  res= json_actions[JST_READ_VALUE][t_next](j);
967
17.9k
  j->value_end= j->s.c_str;
968
17.9k
  return res;
969
19.3k
}
970
971
972
int json_scan_next(json_engine_t *j)
973
32.1k
{
974
32.1k
  int t_next;
975
976
32.1k
  get_first_nonspace(&j->s, &t_next, &j->sav_c_len);
977
32.1k
  return *j->killed_ptr || json_actions[j->state][t_next](j);
978
32.1k
}
979
980
981
enum json_path_chr_classes {
982
  P_EOS,    /* end of string */
983
  P_USD,    /* $ */
984
  P_ASTER,  /* * */
985
  P_LSQRB,  /* [ */
986
  P_T,      /* t (for to) */
987
  P_RSQRB,  /* ] */
988
  P_POINT,  /* . */
989
  P_NEG,    /* hyphen (for negative index in path) */
990
  P_ZERO,   /* 0 */
991
  P_DIGIT,  /* 123456789 */
992
  P_L,      /* l (for "lax") */
993
  P_S,      /* s (for "strict") */
994
  P_SPACE,  /* space */
995
  P_BKSL,   /* \ */
996
  P_QUOTE,  /* " */
997
  P_ETC,    /* everything else */
998
  P_ERR,    /* character disallowed in JSON*/
999
  P_BAD,    /* invalid character */
1000
  N_PATH_CLASSES,
1001
};
1002
1003
1004
static enum json_path_chr_classes json_path_chr_map[128] = {
1005
  P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
1006
  P_ERR,   P_SPACE, P_SPACE, P_ERR,   P_ERR,   P_SPACE, P_ERR,   P_ERR,
1007
  P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
1008
  P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
1009
1010
  P_SPACE, P_ETC,   P_QUOTE, P_ETC,   P_USD,   P_ETC,   P_ETC,   P_ETC,
1011
  P_ETC,   P_ETC,   P_ASTER, P_ETC,   P_ETC,   P_NEG,   P_POINT, P_ETC,
1012
  P_ZERO,  P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT,
1013
  P_DIGIT, P_DIGIT, P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1014
1015
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1016
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_L,     P_ETC,   P_ETC,   P_ETC,
1017
  P_ETC,   P_ETC,   P_S,     P_ETC,   P_T,   P_ETC,   P_ETC,   P_ETC,
1018
  P_ETC,   P_ETC,   P_ETC,   P_LSQRB, P_BKSL, P_RSQRB, P_ETC,   P_ETC,
1019
1020
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1021
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_L,     P_ETC,   P_ETC,   P_ETC,
1022
  P_ETC,   P_ETC,   P_S,     P_ETC,   P_T,   P_ETC,   P_ETC,   P_ETC,
1023
  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC
1024
};
1025
1026
1027
enum json_path_states {
1028
  PS_GO,  /* Initial state. */
1029
  PS_LAX, /* Parse the 'lax' keyword. */
1030
  PS_PT,  /* New path's step begins. */
1031
  PS_AR,  /* Parse array step. */
1032
  PS_SAR, /* space after the '['. */
1033
  PS_AWD, /* Array wildcard. */
1034
  PS_NEG,  /*  Parse '-' (hyphen) */
1035
  PS_Z,   /* '0' (as an array item number). */
1036
  PS_INT, /* Parse integer (as an array item number). */
1037
  PS_AS,  /* Space. */
1038
  PS_KEY, /* Key. */
1039
  PS_KNM, /* Parse key name. */
1040
  PS_KWD, /* Key wildcard. */
1041
  PS_AST, /* Asterisk. */
1042
  PS_DWD, /* Double wildcard. */
1043
  PS_KEYX, /* Key started with quote ("). */
1044
  PS_KNMX, /* Parse quoted key name. */
1045
  PS_LAST, /* Parse 'last' keyword */
1046
  PS_T,    /* Parse 'to' keyword. */
1047
  N_PATH_STATES, /* Below are states that aren't in the transitions table. */
1048
  PS_SCT,  /* Parse the 'strict' keyword. */
1049
  PS_EKY,  /* '.' after the keyname so next step is the key. */
1050
  PS_EKYX, /* Closing " for the quoted keyname. */
1051
  PS_EAR,  /* '[' after the keyname so next step is the array. */
1052
  PS_ESC,  /* Escaping in the keyname. */
1053
  PS_ESCX, /* Escaping in the quoted keyname. */
1054
  PS_OK,   /* Path normally ended. */
1055
  PS_KOK   /* EOS after the keyname so end the path normally. */
1056
};
1057
1058
1059
static int json_path_transitions[N_PATH_STATES][N_PATH_CLASSES]=
1060
{
1061
/*
1062
            EOS       $,      *       [       to       ]       .       -
1063
            0       1..9    L       S       SPACE   \       "       ETC
1064
            ERR              BAD
1065
*/
1066
/* GO  */ { JE_EOS, PS_PT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1067
            JE_SYN, JE_SYN, PS_LAX, PS_SCT, PS_GO, JE_SYN, JE_SYN, JE_SYN,
1068
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1069
/* LAX */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1070
            JE_SYN, JE_SYN, PS_LAX, JE_SYN, PS_GO, JE_SYN, JE_SYN, JE_SYN,
1071
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1072
/* PT */  { PS_OK, JE_SYN, PS_AST, PS_AR, JE_SYN, JE_SYN, PS_KEY, JE_SYN,
1073
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1074
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1075
/* AR */  { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_NEG,
1076
            PS_Z, PS_INT, PS_LAST, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1077
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1078
/* SAR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1079
            PS_Z, PS_INT, PS_LAST, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1080
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1081
/* AWD */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1082
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1083
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1084
/* NEG */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1085
            PS_INT, PS_INT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1086
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1087
/* Z */   { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1088
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1089
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1090
/* INT */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1091
            PS_INT, PS_INT, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1092
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1093
/* AS */  { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_T, PS_PT, JE_SYN, PS_NEG,
1094
            PS_Z, PS_INT, PS_LAST, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1095
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1096
/* KEY */ { JE_EOS, PS_KNM, PS_KWD, JE_SYN, PS_KNM, PS_KNM, JE_SYN, PS_KNM,
1097
            PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_KNM, JE_SYN, PS_KEYX, PS_KNM,
1098
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1099
/* KNM */ { PS_KOK, PS_KNM, PS_AST, PS_EAR, PS_KNM, PS_KNM, PS_EKY, PS_KNM,
1100
            PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_ESC, PS_KNM, PS_KNM,
1101
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1102
/* KWD */ { PS_OK, JE_SYN, JE_SYN, PS_AR, JE_SYN, JE_SYN, PS_EKY, JE_SYN,
1103
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1104
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1105
/* AST */ { JE_SYN, JE_SYN, PS_DWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1106
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1107
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1108
/* DWD */ { JE_SYN, JE_SYN, PS_AST, PS_AR, JE_SYN, JE_SYN, PS_KEY, JE_SYN,
1109
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1110
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1111
/* KEYX*/ { JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, JE_SYN,
1112
            PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_ESCX, PS_EKYX, PS_KNMX,
1113
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1114
/* KNMX */{ JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
1115
            PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_ESCX, PS_EKYX, PS_KNMX,
1116
            JE_NOT_JSON_CHR, JE_BAD_CHR},
1117
/* LAST */{ JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_NEG,
1118
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1119
            JE_SYN, JE_BAD_CHR},
1120
/* T */   { JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_NEG,
1121
            JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1122
            JE_SYN, JE_BAD_CHR},
1123
};
1124
1125
1126
int json_path_setup(json_path_t *p,
1127
                    CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
1128
0
{
1129
0
 int c_len, t_next, state= PS_GO, is_negative_index= 0, is_last= 0,
1130
0
  prev_value=0, is_to= 0, *cur_val;
1131
0
  enum json_path_step_types double_wildcard= JSON_PATH_KEY_NULL;
1132
0
  json_string_setup(&p->s, i_cs, str, end);
1133
1134
0
  p->steps[0].type= JSON_PATH_ARRAY_WILD;
1135
0
  p->last_step= p->steps;
1136
0
  p->mode_strict= FALSE;
1137
0
  p->types_used= JSON_PATH_KEY_NULL;
1138
1139
0
  do
1140
0
  {
1141
0
    if ((c_len= json_next_char(&p->s)) <= 0)
1142
0
      t_next= json_eos(&p->s) ? P_EOS : P_BAD;
1143
0
    else
1144
0
      t_next= (p->s.c_next >= 128) ? P_ETC : json_path_chr_map[p->s.c_next];
1145
1146
0
    if ((state= json_path_transitions[state][t_next]) < 0)
1147
0
      return p->s.error= state;
1148
1149
0
    p->s.c_str+= c_len;
1150
1151
0
    switch (state)
1152
0
    {
1153
0
    case PS_LAX:
1154
0
      if ((p->s.error= skip_string_verbatim(&p->s, "ax")))
1155
0
        return 1;
1156
0
      p->mode_strict= FALSE;
1157
0
      continue;
1158
0
    case PS_SCT:
1159
0
      if ((p->s.error= skip_string_verbatim(&p->s, "rict")))
1160
0
        return 1;
1161
0
      p->mode_strict= TRUE;
1162
0
      state= PS_LAX;
1163
0
      continue;
1164
0
    case PS_KWD:
1165
0
    case PS_AWD:
1166
0
      p->last_step->type|= JSON_PATH_WILD;
1167
0
      p->types_used|= JSON_PATH_WILD;
1168
0
      continue;
1169
0
    case PS_INT:
1170
0
      cur_val= is_to ? &(p->last_step->n_item_end) :
1171
0
                       &(p->last_step->n_item);
1172
0
      if (is_last)
1173
0
      {
1174
0
        prev_value*= 10;
1175
0
        prev_value-= p->s.c_next - '0';
1176
0
        *cur_val= -1 + prev_value;
1177
0
      }
1178
0
      else
1179
0
      {
1180
0
        (*cur_val)*= 10;
1181
0
        if (is_negative_index)
1182
0
          *cur_val-= p->s.c_next - '0';
1183
0
        else
1184
0
          *cur_val+= p->s.c_next - '0';
1185
0
      }
1186
0
      continue;
1187
0
    case PS_EKYX:
1188
0
      p->last_step->key_end= p->s.c_str - c_len;
1189
0
      state= PS_PT;
1190
0
      continue;
1191
0
    case PS_EKY:
1192
0
      p->last_step->key_end= p->s.c_str - c_len;
1193
0
      state= PS_KEY;
1194
      /* fall through */
1195
0
    case PS_KEY:
1196
0
      p->last_step++;
1197
0
      is_to= 0;
1198
0
      prev_value= 0;
1199
0
      is_negative_index= 0;
1200
0
      is_last= 0;
1201
0
      if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1202
0
        return p->s.error= JE_DEPTH;
1203
0
      p->types_used|= p->last_step->type= JSON_PATH_KEY | double_wildcard;
1204
0
      double_wildcard= JSON_PATH_KEY_NULL;
1205
      /* fall through */
1206
0
    case PS_KEYX:
1207
0
      p->last_step->key= p->s.c_str;
1208
0
      continue;
1209
0
    case PS_EAR:
1210
0
      p->last_step->key_end= p->s.c_str - c_len;
1211
0
      state= PS_AR;
1212
      /* fall through */
1213
0
    case PS_AR:
1214
0
      p->last_step++;
1215
0
      is_last= 0;
1216
0
      is_to= 0;
1217
0
      prev_value= 0;
1218
0
      is_negative_index= 0;
1219
0
      if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1220
0
        return p->s.error= JE_DEPTH;
1221
0
      p->types_used|= p->last_step->type= JSON_PATH_ARRAY | double_wildcard;
1222
0
      double_wildcard= JSON_PATH_KEY_NULL;
1223
0
      p->last_step->n_item= 0;
1224
0
      continue;
1225
0
    case PS_ESC:
1226
0
      if (json_handle_esc(&p->s))
1227
0
        return 1;
1228
0
      state= PS_KNM;
1229
0
      continue;
1230
0
    case PS_ESCX:
1231
0
      if (json_handle_esc(&p->s))
1232
0
        return 1;
1233
0
      state= PS_KNMX;
1234
0
      continue;
1235
0
    case PS_KOK:
1236
0
      p->last_step->key_end= p->s.c_str - c_len;
1237
0
      state= PS_OK;
1238
0
      break; /* 'break' as the loop supposed to end after that. */
1239
0
    case PS_DWD:
1240
0
      double_wildcard= JSON_PATH_DOUBLE_WILD;
1241
0
      continue;
1242
0
    case PS_NEG:
1243
0
       p->types_used|= JSON_PATH_NEGATIVE_INDEX;
1244
0
       is_negative_index= 1;
1245
0
       continue;
1246
0
    case PS_LAST:
1247
0
      if ((p->s.error= skip_string_verbatim(&p->s, "ast")))
1248
0
       return 1;
1249
0
      p->types_used|= JSON_PATH_NEGATIVE_INDEX;
1250
0
      is_last= 1;
1251
0
      if (is_to)
1252
0
        p->last_step->n_item_end= -1;
1253
0
      else
1254
0
        p->last_step->n_item= -1;
1255
0
      continue;
1256
0
    case PS_T:
1257
0
      if ((p->s.error= skip_string_verbatim(&p->s, "o")))
1258
0
        return 1;
1259
0
      is_to= 1;
1260
0
      is_negative_index= 0;
1261
0
      is_last= 0;
1262
0
      prev_value= 0;
1263
0
      p->last_step->n_item_end= 0;
1264
0
      p->last_step->type|= JSON_PATH_ARRAY_RANGE;
1265
0
      p->types_used|= JSON_PATH_ARRAY_RANGE;
1266
0
      continue;
1267
0
    };
1268
0
  } while (state != PS_OK);
1269
1270
0
  return double_wildcard ? (p->s.error= JE_SYN) : 0;
1271
0
}
1272
1273
1274
int json_skip_to_level(json_engine_t *j, int level)
1275
2.23k
{
1276
14.1k
  do {
1277
14.1k
    if (j->stack_p < level)
1278
628
      return 0;
1279
14.1k
  } while (json_scan_next(j) == 0);
1280
1281
1.60k
  return 1;
1282
2.23k
}
1283
1284
1285
/*
1286
  works as json_skip_level() but also counts items on the current
1287
  level skipped.
1288
*/
1289
int json_skip_level_and_count(json_engine_t *j, int *n_items_skipped)
1290
0
{
1291
0
  int level= j->stack_p;
1292
1293
0
  *n_items_skipped= 0;
1294
0
  while (json_scan_next(j) == 0)
1295
0
  {
1296
0
    if (j->stack_p < level)
1297
0
      return 0;
1298
0
    if (j->stack_p == level && j->state == JST_VALUE)
1299
0
      (*n_items_skipped)++;
1300
0
  }
1301
1302
0
  return 1;
1303
0
}
1304
1305
1306
int json_skip_array_and_count(json_engine_t *je, int *n_items)
1307
0
{
1308
0
  json_engine_t j= *je;
1309
0
  *n_items= 0;
1310
1311
0
  return json_skip_level_and_count(&j, n_items); 
1312
0
}
1313
1314
1315
int json_skip_key(json_engine_t *j)
1316
14.2k
{
1317
14.2k
  if (json_read_value(j))
1318
1.06k
    return 1;
1319
1320
13.1k
  if (json_value_scalar(j))
1321
11.3k
    return 0;
1322
1323
1.79k
  return json_skip_level(j);
1324
13.1k
}
1325
1326
1327
0
#define SKIPPED_STEP_MARK INT_MAX32
1328
1329
/*
1330
  Current step of the patch matches the JSON construction.
1331
  Now we should either stop the search or go to the next
1332
  step of the path.
1333
*/
1334
static int handle_match(json_engine_t *je, json_path_t *p,
1335
                        json_path_step_t **p_cur_step, int *array_counters)
1336
0
{
1337
0
  json_path_step_t *next_step= *p_cur_step + 1;
1338
1339
0
  DBUG_ASSERT(*p_cur_step < p->last_step);
1340
1341
0
  if (json_read_value(je))
1342
0
    return 1;
1343
1344
0
  if (json_value_scalar(je))
1345
0
  {
1346
0
    while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0)
1347
0
    {
1348
0
      if (++next_step > p->last_step)
1349
0
      {
1350
0
        je->s.c_str= je->value_begin;
1351
0
        return 1;
1352
0
      }
1353
0
    }
1354
0
    return 0;
1355
0
  }
1356
1357
0
  if (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0 &&
1358
0
      je->value_type & JSON_VALUE_OBJECT)
1359
0
  {
1360
0
    do
1361
0
    {
1362
0
      array_counters[next_step - p->steps]= SKIPPED_STEP_MARK;
1363
0
      if (++next_step > p->last_step)
1364
0
      {
1365
0
        je->s.c_str= je->value_begin;
1366
0
        je->stack_p--;
1367
0
        return 1;
1368
0
      }
1369
0
    } while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0);
1370
0
  }
1371
1372
0
  if ((int) je->value_type !=
1373
0
      (int) (next_step->type & JSON_PATH_KEY_OR_ARRAY))
1374
0
    return json_skip_level(je);
1375
1376
0
  array_counters[next_step - p->steps]= 0;
1377
0
  if (next_step->type & JSON_PATH_ARRAY)
1378
0
  {
1379
0
    int array_size;
1380
0
    if (next_step->n_item >= 0)
1381
0
      array_size= 0;
1382
0
    else
1383
0
    {
1384
0
      json_engine_t j2= *je;
1385
0
      if (json_skip_array_and_count(&j2, &array_size))
1386
0
      {
1387
0
        *je= j2;
1388
0
        return 1;
1389
0
      }
1390
0
      array_size= -array_size;
1391
0
    }
1392
0
    array_counters[next_step - p->steps]= array_size;
1393
0
  }
1394
1395
0
  *p_cur_step= next_step;
1396
0
  return 0;
1397
0
}
1398
1399
1400
/*
1401
  Check if the name of the current JSON key matches
1402
  the step of the path.
1403
*/
1404
int json_key_matches(json_engine_t *je, json_string_t *k)
1405
15.1k
{
1406
16.7k
  while (json_read_keyname_chr(je) == 0)
1407
9.03k
  {
1408
9.03k
    if (json_read_string_const_chr(k) ||
1409
9.03k
        je->s.c_next != k->c_next)
1410
7.44k
      return 0;
1411
9.03k
  }
1412
1413
7.72k
  return json_read_string_const_chr(k);
1414
15.1k
}
1415
1416
1417
int json_find_path(json_engine_t *je,
1418
                   json_path_t *p, json_path_step_t **p_cur_step,
1419
                   int *array_counters)
1420
0
{
1421
0
  json_string_t key_name;
1422
0
  int res= 0;
1423
1424
0
  json_string_set_cs(&key_name, p->s.cs);
1425
1426
0
  do
1427
0
  {
1428
0
    json_path_step_t *cur_step= *p_cur_step;
1429
0
    switch (je->state)
1430
0
    {
1431
0
    case JST_KEY:
1432
0
      DBUG_ASSERT(cur_step->type & JSON_PATH_KEY);
1433
0
      if (!(cur_step->type & JSON_PATH_WILD))
1434
0
      {
1435
0
        json_string_set_str(&key_name, cur_step->key, cur_step->key_end);
1436
0
        if (!json_key_matches(je, &key_name))
1437
0
        {
1438
0
          if (json_skip_key(je))
1439
0
            goto exit;
1440
0
          continue;
1441
0
        }
1442
0
      }
1443
0
      if (cur_step == p->last_step ||
1444
0
          handle_match(je, p, p_cur_step, array_counters))
1445
0
        goto exit;
1446
0
      break;
1447
0
    case JST_VALUE:
1448
0
      DBUG_ASSERT(cur_step->type & JSON_PATH_ARRAY);
1449
0
      if (cur_step->type & JSON_PATH_ARRAY_RANGE)
1450
0
      {
1451
0
        res= (cur_step->n_item <= array_counters[cur_step - p->steps] &&
1452
0
              cur_step->n_item_end >= array_counters[cur_step - p->steps]);
1453
0
        array_counters[cur_step - p->steps]++;
1454
0
      }
1455
0
      else
1456
0
        res= cur_step->n_item == array_counters[cur_step - p->steps]++;
1457
0
      if ((cur_step->type & JSON_PATH_WILD) || res)
1458
0
      {
1459
        /* Array item matches. */
1460
0
        if (cur_step == p->last_step ||
1461
0
            handle_match(je, p, p_cur_step, array_counters))
1462
0
          goto exit;
1463
0
      }
1464
0
      else
1465
0
        json_skip_array_item(je);
1466
0
      break;
1467
0
    case JST_OBJ_END:
1468
0
      do
1469
0
      {
1470
0
        (*p_cur_step)--;
1471
0
      } while (*p_cur_step > p->steps &&
1472
0
               array_counters[*p_cur_step - p->steps] == SKIPPED_STEP_MARK);
1473
0
      break;
1474
0
    case JST_ARRAY_END:
1475
0
      (*p_cur_step)--;
1476
0
      break;
1477
0
    default:
1478
0
      DBUG_ASSERT(0);
1479
0
      break;
1480
0
    };
1481
0
  } while (json_scan_next(je) == 0);
1482
1483
  /* No luck. */
1484
0
  return 1;
1485
1486
0
exit:
1487
0
  return je->s.error;
1488
0
}
1489
1490
1491
int json_find_paths_first(json_engine_t *je, json_find_paths_t *state,
1492
                          uint n_paths, json_path_t *paths, uint *path_depths)
1493
0
{
1494
0
  state->n_paths= n_paths;
1495
0
  state->paths= paths;
1496
0
  state->cur_depth= 0;
1497
0
  state->path_depths= path_depths;
1498
0
  return json_find_paths_next(je, state);
1499
0
}
1500
1501
1502
int json_find_paths_next(json_engine_t *je, json_find_paths_t *state)
1503
0
{
1504
0
  uint p_c;
1505
0
  int path_found, no_match_found;
1506
0
  do
1507
0
  {
1508
0
    switch (je->state)
1509
0
    {
1510
0
    case JST_KEY:
1511
0
      path_found= FALSE;
1512
0
      no_match_found= TRUE;
1513
0
      for (p_c=0; p_c < state->n_paths; p_c++)
1514
0
      {
1515
0
        json_path_step_t *cur_step;
1516
0
        if (state->path_depths[p_c] <
1517
0
              state->cur_depth /* Path already failed. */ ||
1518
0
            !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1519
0
              JSON_PATH_KEY))
1520
0
          continue;
1521
1522
0
        if (!(cur_step->type & JSON_PATH_WILD))
1523
0
        {
1524
0
          json_string_t key_name;
1525
0
          json_string_setup(&key_name, state->paths[p_c].s.cs,
1526
0
                            cur_step->key, cur_step->key_end);
1527
0
          if (!json_key_matches(je, &key_name))
1528
0
            continue;
1529
0
        }
1530
0
        if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1531
0
          path_found= TRUE;
1532
0
        else
1533
0
        {
1534
0
          no_match_found= FALSE;
1535
0
          state->path_depths[p_c]= state->cur_depth + 1;
1536
0
        }
1537
0
      }
1538
0
      if (path_found)
1539
        /* Return the result. */
1540
0
        goto exit;
1541
0
      if (no_match_found)
1542
0
      {
1543
        /* No possible paths left to check. Just skip the level. */
1544
0
        if (json_skip_level(je))
1545
0
          goto exit;
1546
0
      }
1547
1548
0
      break;
1549
0
    case JST_VALUE:
1550
0
      path_found= FALSE;
1551
0
      no_match_found= TRUE;
1552
0
      for (p_c=0; p_c < state->n_paths; p_c++)
1553
0
      {
1554
0
        json_path_step_t *cur_step;
1555
0
        if (state->path_depths[p_c]< state->cur_depth /* Path already failed. */ ||
1556
0
            !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1557
0
              JSON_PATH_ARRAY))
1558
0
          continue;
1559
0
        if (cur_step->type & JSON_PATH_WILD ||
1560
0
            cur_step->n_item == state->array_counters[state->cur_depth])
1561
0
        {
1562
          /* Array item matches. */
1563
0
          if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1564
0
            path_found= TRUE;
1565
0
          else
1566
0
          {
1567
0
            no_match_found= FALSE;
1568
0
            state->path_depths[p_c]= state->cur_depth + 1;
1569
0
          }
1570
0
        }
1571
0
      }
1572
1573
0
      if (path_found)
1574
0
        goto exit;
1575
1576
0
      if (no_match_found)
1577
0
        json_skip_array_item(je);
1578
1579
0
      state->array_counters[state->cur_depth]++;
1580
0
      break;
1581
0
    case JST_OBJ_START:
1582
0
    case JST_ARRAY_START:
1583
0
      for (p_c=0; p_c < state->n_paths; p_c++)
1584
0
      {
1585
0
        if (state->path_depths[p_c] < state->cur_depth)
1586
          /* Path already failed. */
1587
0
          continue;
1588
0
        if (state->paths[p_c].steps[state->cur_depth].type &
1589
0
            ((je->state == JST_OBJ_START) ? JSON_PATH_KEY : JSON_PATH_ARRAY))
1590
0
          state->path_depths[p_c]++;
1591
0
      }
1592
0
      state->cur_depth++;
1593
0
      break;
1594
0
    case JST_OBJ_END:
1595
0
    case JST_ARRAY_END:
1596
0
      for (p_c=0; p_c < state->n_paths; p_c++)
1597
0
      {
1598
0
        if (state->path_depths[p_c] < state->cur_depth)
1599
0
          continue;
1600
0
        state->path_depths[p_c]--;
1601
0
      }
1602
0
      state->cur_depth--;
1603
0
      break;
1604
0
    default:
1605
0
      DBUG_ASSERT(0);
1606
0
      break;
1607
0
    };
1608
0
  } while (json_scan_next(je) == 0);
1609
1610
  /* No luck. */
1611
0
  return 1;
1612
1613
0
exit:
1614
0
  return je->s.error;
1615
0
}
1616
1617
1618
int json_append_ascii(CHARSET_INFO *json_cs,
1619
                      uchar *json, uchar *json_end,
1620
                      const uchar *ascii, const uchar *ascii_end)
1621
0
{
1622
0
  const uchar *json_start= json;
1623
0
  while (ascii < ascii_end)
1624
0
  {
1625
0
    int c_len;
1626
0
    if ((c_len= my_ci_wc_mb(json_cs, (my_wc_t) *ascii, json, json_end)) > 0)
1627
0
    {
1628
0
      json+= c_len;
1629
0
      ascii++;
1630
0
      continue;
1631
0
    }
1632
1633
    /* Error return. */
1634
0
    return c_len;
1635
0
  }
1636
1637
0
  return (int)(json - json_start);
1638
0
}
1639
1640
1641
int json_unescape(CHARSET_INFO *json_cs,
1642
                  const uchar *json_str, const uchar *json_end,
1643
                  CHARSET_INFO *res_cs, uchar *res, uchar *res_end)
1644
0
{
1645
0
  json_string_t s;
1646
0
  const uchar *res_b= res;
1647
1648
0
  json_string_setup(&s, json_cs, json_str, json_end);
1649
0
  while (json_read_string_const_chr(&s) == 0)
1650
0
  {
1651
0
    int c_len;
1652
0
    if ((c_len= my_ci_wc_mb(res_cs, s.c_next, res, res_end)) > 0)
1653
0
    {
1654
0
      res+= c_len;
1655
0
      continue;
1656
0
    }
1657
0
    if (c_len == MY_CS_ILUNI)
1658
0
    {
1659
0
      return JSON_ERROR_ILLEGAL_SYMBOL;
1660
0
    }
1661
    /* Result buffer is too small. */
1662
0
    return JSON_ERROR_OUT_OF_SPACE;
1663
0
  }
1664
1665
0
  return s.error==JE_EOS ? (int)(res - res_b) : JSON_ERROR_OUT_OF_SPACE;
1666
0
}
1667
1668
1669
/* When we need to replace a character with the escaping. */
1670
enum json_esc_char_classes {
1671
  ESC_= 0,    /* No need to escape. */
1672
  ESC_U= 'u', /* Character not allowed in JSON. Always escape as \uXXXX. */
1673
  ESC_B= 'b', /* Backspace. Escape as \b */
1674
  ESC_F= 'f', /* Formfeed. Escape as \f */
1675
  ESC_N= 'n', /* Newline. Escape as \n */
1676
  ESC_R= 'r', /* Return. Escape as \r */
1677
  ESC_T= 't', /* Tab. Escape as \s */
1678
  ESC_BS= '\\'  /* Backslash or '"'. Escape by the \\ prefix. */
1679
};
1680
1681
1682
/* This specifies how we should escape the character. */
1683
static enum json_esc_char_classes json_escape_chr_map[0x60] = {
1684
  ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1685
  ESC_B,   ESC_T,   ESC_N,   ESC_U,   ESC_F,   ESC_R,   ESC_U,   ESC_U,
1686
  ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1687
  ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1688
1689
  ESC_,    ESC_,    ESC_BS,  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1690
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1691
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1692
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1693
1694
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1695
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1696
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1697
  ESC_,    ESC_,    ESC_,    ESC_,    ESC_BS,  ESC_,    ESC_,    ESC_,
1698
};
1699
1700
1701
static const char hexconv[17] = "0123456789ABCDEF";
1702
1703
1704
int json_escape(CHARSET_INFO *str_cs,
1705
                const uchar *str, const uchar *str_end,
1706
                CHARSET_INFO *json_cs, uchar *json, uchar *json_end)
1707
0
{
1708
0
  const uchar *json_start= json;
1709
1710
0
  while (str < str_end)
1711
0
  {
1712
0
    my_wc_t c_chr;
1713
0
    int c_len;
1714
0
    if ((c_len= my_ci_mb_wc(str_cs, &c_chr, str, str_end)) > 0)
1715
0
    {
1716
0
      enum json_esc_char_classes c_class;
1717
      
1718
0
      str+= c_len;
1719
0
      if (c_chr >= 0x60 || (c_class= json_escape_chr_map[c_chr]) == ESC_)
1720
0
      {
1721
0
        if ((c_len= my_ci_wc_mb(json_cs, c_chr, json, json_end)) > 0)
1722
0
        {
1723
0
          json+= c_len;
1724
0
          continue;
1725
0
        }
1726
0
        if (c_len < 0)
1727
0
        {
1728
          /* JSON buffer is depleted. */
1729
0
          return JSON_ERROR_OUT_OF_SPACE;
1730
0
        }
1731
1732
        /* JSON charset cannot convert this character. */
1733
0
        c_class= ESC_U;
1734
0
      }
1735
1736
0
      if ((c_len= my_ci_wc_mb(json_cs, '\\', json, json_end)) <= 0 ||
1737
0
          (c_len= my_ci_wc_mb(json_cs, (c_class == ESC_BS) ? c_chr : c_class,
1738
0
                                       json+= c_len, json_end)) <= 0)
1739
0
      {
1740
        /* JSON buffer is depleted. */
1741
0
        return JSON_ERROR_OUT_OF_SPACE;
1742
0
      }
1743
0
      json+= c_len;
1744
1745
0
      if (c_class != ESC_U)
1746
0
        continue;
1747
1748
0
      {
1749
        /* We have to use /uXXXX escaping. */
1750
0
        uchar utf16buf[4];
1751
0
        uchar code_str[8];
1752
0
        int u_len= my_uni_utf16(0, c_chr, utf16buf, utf16buf + 4);
1753
1754
0
        code_str[0]= hexconv[utf16buf[0] >> 4];
1755
0
        code_str[1]= hexconv[utf16buf[0] & 15];
1756
0
        code_str[2]= hexconv[utf16buf[1] >> 4];
1757
0
        code_str[3]= hexconv[utf16buf[1] & 15];
1758
1759
0
        if (u_len > 2)
1760
0
        {
1761
0
          code_str[4]= hexconv[utf16buf[2] >> 4];
1762
0
          code_str[5]= hexconv[utf16buf[2] & 15];
1763
0
          code_str[6]= hexconv[utf16buf[3] >> 4];
1764
0
          code_str[7]= hexconv[utf16buf[3] & 15];
1765
0
        }
1766
        
1767
0
        if ((c_len= json_append_ascii(json_cs, json, json_end,
1768
0
                                      code_str, code_str+u_len*2)) > 0)
1769
0
        {
1770
0
          json+= c_len;
1771
0
          continue;
1772
0
        }
1773
        /* JSON buffer is depleted. */
1774
0
        return JSON_ERROR_OUT_OF_SPACE;
1775
0
      }
1776
0
    }
1777
0
    else /* c_len == 0, an illegal symbol. */
1778
0
      return JSON_ERROR_ILLEGAL_SYMBOL;
1779
0
  }
1780
1781
0
  return (int)(json - json_start);
1782
0
}
1783
1784
1785
int json_get_path_start(json_engine_t *je, CHARSET_INFO *i_cs,
1786
                        const uchar *str, const uchar *end,
1787
                        json_path_t *p)
1788
0
{
1789
0
  json_scan_start(je, i_cs, str, end);
1790
0
  p->last_step= p->steps - 1; 
1791
0
  return 0;
1792
0
}
1793
1794
1795
int json_get_path_next(json_engine_t *je, json_path_t *p)
1796
0
{
1797
0
  if (p->last_step < p->steps)
1798
0
  {
1799
0
    if (json_read_value(je))
1800
0
      return 1;
1801
1802
0
    p->last_step= p->steps;
1803
0
    p->steps[0].type= JSON_PATH_ARRAY_WILD;
1804
0
    p->steps[0].n_item= 0;
1805
0
    return 0;
1806
0
  }
1807
0
  else
1808
0
  {
1809
0
    if (json_value_scalar(je))
1810
0
    {
1811
0
      if (p->last_step->type & JSON_PATH_ARRAY)
1812
0
        p->last_step->n_item++;
1813
0
    }
1814
0
    else
1815
0
    {
1816
0
      p->last_step++;
1817
0
      p->last_step->type= (enum json_path_step_types) je->value_type;
1818
0
      p->last_step->n_item= 0;
1819
0
    }
1820
1821
0
    if (json_scan_next(je))
1822
0
      return 1;
1823
0
  }
1824
1825
0
  do
1826
0
  {
1827
0
    switch (je->state)
1828
0
    {
1829
0
    case JST_KEY:
1830
0
      p->last_step->key= je->s.c_str;
1831
0
      do
1832
0
      {
1833
0
        p->last_step->key_end= je->s.c_str;
1834
0
      } while (json_read_keyname_chr(je) == 0);
1835
0
      if (je->s.error)
1836
0
        return 1;
1837
      /* Now we have je.state == JST_VALUE, so let's handle it. */
1838
1839
      /* fall through */
1840
0
    case JST_VALUE:
1841
0
      if (json_read_value(je))
1842
0
        return 1;
1843
0
      return 0;
1844
0
    case JST_OBJ_END:
1845
0
    case JST_ARRAY_END:
1846
0
      p->last_step--;
1847
0
      if (p->last_step->type & JSON_PATH_ARRAY)
1848
0
        p->last_step->n_item++;
1849
0
      break;
1850
0
    default:
1851
0
      break;
1852
0
    }
1853
0
  } while (json_scan_next(je) == 0);
1854
1855
0
  return 1;
1856
0
}
1857
1858
1859
static enum json_types smart_read_value(json_engine_t *je,
1860
                                        const char **value, int *value_len)
1861
908
{
1862
908
  if (json_read_value(je))
1863
435
    goto err_return;
1864
1865
473
  *value= (char *) je->value;
1866
1867
473
  if (json_value_scalar(je))
1868
37
    *value_len= je->value_len;
1869
436
  else
1870
436
  {
1871
436
    if (json_skip_level(je))
1872
421
      goto err_return;
1873
1874
15
    *value_len= (int) ((char *) je->s.c_str - *value);
1875
15
  }
1876
1877
52
  compile_time_assert((enum json_types)JSON_VALUE_OBJECT == JSV_OBJECT);
1878
52
  compile_time_assert((enum json_types)JSON_VALUE_ARRAY == JSV_ARRAY);
1879
52
  compile_time_assert((enum json_types)JSON_VALUE_STRING == JSV_STRING);
1880
52
  compile_time_assert((enum json_types)JSON_VALUE_NUMBER == JSV_NUMBER);
1881
52
  compile_time_assert((enum json_types)JSON_VALUE_TRUE == JSV_TRUE);
1882
52
  compile_time_assert((enum json_types)JSON_VALUE_FALSE == JSV_FALSE);
1883
52
  compile_time_assert((enum json_types)JSON_VALUE_NULL == JSV_NULL);
1884
1885
52
  return (enum json_types) je->value_type;
1886
1887
856
err_return:
1888
856
  return JSV_BAD_JSON;
1889
473
}
1890
1891
1892
enum json_types json_type(const char *js, const char *js_end,
1893
                          const char **value, int *value_len)
1894
0
{
1895
0
  json_engine_t je;
1896
1897
0
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1898
0
                  (const uchar *) js_end);
1899
1900
0
  return smart_read_value(&je, value, value_len);
1901
0
}
1902
1903
1904
enum json_types json_get_array_item(const char *js, const char *js_end,
1905
                                    int n_item,
1906
                                    const char **value, int *value_len)
1907
0
{
1908
0
  json_engine_t je;
1909
0
  int c_item= 0;
1910
1911
0
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1912
0
                  (const uchar *) js_end);
1913
1914
0
  if (json_read_value(&je) ||
1915
0
      je.value_type != JSON_VALUE_ARRAY)
1916
0
    goto err_return;
1917
1918
0
  while (!json_scan_next(&je))
1919
0
  {
1920
0
    switch (je.state)
1921
0
    {
1922
0
    case JST_VALUE:
1923
0
      if (c_item == n_item)
1924
0
        return smart_read_value(&je, value, value_len);
1925
1926
0
      if (json_skip_key(&je))
1927
0
        goto err_return;
1928
1929
0
      c_item++;
1930
0
      break;
1931
1932
0
    case JST_ARRAY_END:
1933
0
      *value= (const char *) (je.s.c_str - je.sav_c_len);
1934
0
      *value_len= c_item;
1935
0
      return JSV_NOTHING;
1936
0
    }
1937
0
  }
1938
1939
0
err_return:
1940
0
  return JSV_BAD_JSON;
1941
0
}
1942
1943
1944
/** Simple json lookup for a value by the key.
1945
1946
  Expects JSON object.
1947
  Only scans the 'first level' of the object, not
1948
  the nested structures.
1949
1950
  @param js          [in]       json object to search in
1951
  @param js_end      [in]       end of json string
1952
  @param key         [in]       key to search for
1953
  @param key_end     [in]         - " -
1954
  @param value_start [out]      pointer into js (value or closing })
1955
  @param value_len   [out]      length of the value found or number of keys
1956
1957
  @retval the type of the key value
1958
  @retval JSV_BAD_JSON - syntax error found reading JSON.
1959
                         or not JSON object.
1960
  @retval JSV_NOTHING - no such key found.
1961
*/
1962
enum json_types json_get_object_key(const char *js, const char *js_end,
1963
                                    const char *key,
1964
                                    const char **value, int *value_len)
1965
2.11k
{
1966
2.11k
  const char *key_end= key + strlen(key);
1967
2.11k
  json_engine_t je;
1968
2.11k
  json_string_t key_name;
1969
2.11k
  int n_keys= 0;
1970
1971
2.11k
  json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
1972
1973
2.11k
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1974
2.11k
                  (const uchar *) js_end);
1975
1976
2.11k
  if (json_read_value(&je) ||
1977
2.11k
      je.value_type != JSON_VALUE_OBJECT)
1978
333
    goto err_return;
1979
1980
7.75k
  while (!json_scan_next(&je))
1981
7.58k
  {
1982
7.58k
    switch (je.state)
1983
7.58k
    {
1984
7.58k
    case JST_KEY:
1985
7.58k
      n_keys++;
1986
7.58k
      json_string_set_str(&key_name, (const uchar *) key,
1987
7.58k
                          (const uchar *) key_end);
1988
7.58k
      if (json_key_matches(&je, &key_name))
1989
908
        return smart_read_value(&je, value, value_len);
1990
1991
6.67k
      if (json_skip_key(&je))
1992
698
        goto err_return;
1993
1994
5.97k
      break;
1995
1996
5.97k
    case JST_OBJ_END:
1997
7
      *value= (const char *) (je.s.c_str - je.sav_c_len);
1998
7
      *value_len= n_keys;
1999
7
      return JSV_NOTHING;
2000
7.58k
    }
2001
7.58k
  }
2002
2003
1.19k
err_return:
2004
1.19k
  return JSV_BAD_JSON;
2005
1.77k
}
2006
2007
2008
enum json_types json_get_object_nkey(const char *js __attribute__((unused)),
2009
                                     const char *js_end __attribute__((unused)),
2010
                                     int nkey __attribute__((unused)),
2011
                                     const char **keyname __attribute__((unused)),
2012
                                     const char **keyname_end __attribute__((unused)),
2013
                                     const char **value __attribute__((unused)),
2014
                                     int *value_len __attribute__((unused)))
2015
0
{
2016
0
  json_engine_t je;
2017
0
  int keys_found= 0;
2018
2019
0
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
2020
0
                  (const uchar *) js_end);
2021
2022
0
  if (json_read_value(&je) ||
2023
0
      je.value_type != JSON_VALUE_OBJECT)
2024
0
    goto err_return;
2025
2026
0
  while (!json_scan_next(&je))
2027
0
  {
2028
0
    switch (je.state)
2029
0
    {
2030
0
    case JST_KEY:
2031
0
      if (nkey == keys_found)
2032
0
      {
2033
0
        *keyname= (char *) je.s.c_str;
2034
0
        while (json_read_keyname_chr(&je) == 0)
2035
0
          *keyname_end= (char *) je.s.c_str;
2036
2037
0
        return smart_read_value(&je, value, value_len);
2038
0
      }
2039
2040
0
      keys_found++;
2041
0
      if (json_skip_key(&je))
2042
0
        goto err_return;
2043
2044
0
      break;
2045
2046
0
    case JST_OBJ_END:
2047
0
      return JSV_NOTHING;
2048
0
    }
2049
0
  }
2050
2051
0
err_return:
2052
0
  return JSV_BAD_JSON;
2053
0
}
2054
2055
2056
/** Check if json is valid (well-formed)
2057
2058
  @retval 0 - success, json is well-formed
2059
  @retval 1 - error, json is invalid
2060
*/
2061
int json_valid(const char *js, size_t js_len, CHARSET_INFO *cs)
2062
0
{
2063
0
  json_engine_t je;
2064
0
  json_scan_start(&je, cs, (const uchar *) js, (const uchar *) js + js_len);
2065
0
  while (json_scan_next(&je) == 0) /* no-op */ ;
2066
0
  return je.s.error == 0;
2067
0
}
2068
2069
2070
/*
2071
  Expects the JSON object as an js argument, and the key name.
2072
  Looks for this key in the object and returns
2073
  the location of all the text related to it.
2074
  The text includes the comma, separating this key.
2075
2076
  comma_pos - the hint where the comma is. It is important
2077
       if you plan to replace the key rather than just cut.
2078
    1  - comma is on the left
2079
    2  - comma is on the right.
2080
    0  - no comma at all (the object has just this single key)
2081
 
2082
  if no such key found *key_start is set to NULL.
2083
*/
2084
int json_locate_key(const char *js, const char *js_end,
2085
                    const char *kname,
2086
                    const char **key_start, const char **key_end,
2087
                    int *comma_pos)
2088
2.11k
{
2089
2.11k
  const char *kname_end= kname + strlen(kname);
2090
2.11k
  json_engine_t je;
2091
2.11k
  json_string_t key_name;
2092
2.11k
  int t_next, c_len, match_result;
2093
2094
2.11k
  json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
2095
2096
2.11k
  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
2097
2.11k
                  (const uchar *) js_end);
2098
2099
2.11k
  if (json_read_value(&je) ||
2100
2.11k
      je.value_type != JSON_VALUE_OBJECT)
2101
333
    goto err_return;
2102
2103
1.77k
  *key_start= (const char *) je.s.c_str;
2104
1.77k
  *comma_pos= 0;
2105
2106
7.75k
  while (!json_scan_next(&je))
2107
7.58k
  {
2108
7.58k
    switch (je.state)
2109
7.58k
    {
2110
7.58k
    case JST_KEY:
2111
7.58k
      json_string_set_str(&key_name, (const uchar *) kname,
2112
7.58k
                          (const uchar *) kname_end);
2113
7.58k
      match_result= json_key_matches(&je, &key_name);
2114
7.58k
      if (json_skip_key(&je))
2115
1.55k
        goto err_return;
2116
6.02k
      get_first_nonspace(&je.s, &t_next, &c_len);
2117
6.02k
      je.s.c_str-= c_len;
2118
2119
6.02k
      if (match_result)
2120
52
      {
2121
52
        *key_end= (const char *) je.s.c_str;
2122
2123
52
        if (*comma_pos == 1)
2124
2
          return 0;
2125
2126
50
        DBUG_ASSERT(*comma_pos == 0);
2127
2128
50
        if (t_next == C_COMMA)
2129
1
        {
2130
1
          *key_end+= c_len;
2131
1
          *comma_pos= 2;
2132
1
        }
2133
49
        else if (t_next == C_RCURB)
2134
1
          *comma_pos= 0;
2135
48
        else
2136
48
          goto err_return;
2137
2
        return 0;
2138
50
      }
2139
2140
5.97k
      *key_start= (const char *) je.s.c_str;
2141
5.97k
      *comma_pos= 1;
2142
5.97k
      break;
2143
2144
7
    case JST_OBJ_END:
2145
7
      *key_start= NULL;
2146
7
      return 0;
2147
7.58k
    }
2148
7.58k
  }
2149
2150
2.10k
err_return:
2151
2.10k
  return 1;
2152
2153
1.77k
}