Coverage Report

Created: 2025-06-13 06:06

/src/postgres/src/common/jsonapi.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * jsonapi.c
4
 *    JSON parser and lexer interfaces
5
 *
6
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7
 * Portions Copyright (c) 1994, Regents of the University of California
8
 *
9
 * IDENTIFICATION
10
 *    src/common/jsonapi.c
11
 *
12
 *-------------------------------------------------------------------------
13
 */
14
#ifndef FRONTEND
15
#include "postgres.h"
16
#else
17
#include "postgres_fe.h"
18
#endif
19
20
#include "common/jsonapi.h"
21
#include "mb/pg_wchar.h"
22
#include "port/pg_lfind.h"
23
24
#ifdef JSONAPI_USE_PQEXPBUFFER
25
#include "pqexpbuffer.h"
26
#else
27
#include "lib/stringinfo.h"
28
#include "miscadmin.h"
29
#endif
30
31
/*
32
 * By default, we will use palloc/pfree along with StringInfo.  In libpq,
33
 * use malloc and PQExpBuffer, and return JSON_OUT_OF_MEMORY on out-of-memory.
34
 */
35
#ifdef JSONAPI_USE_PQEXPBUFFER
36
37
#define STRDUP(s) strdup(s)
38
#define ALLOC(size) malloc(size)
39
#define ALLOC0(size) calloc(1, size)
40
#define REALLOC realloc
41
#define FREE(s) free(s)
42
43
#define jsonapi_appendStringInfo      appendPQExpBuffer
44
#define jsonapi_appendBinaryStringInfo    appendBinaryPQExpBuffer
45
#define jsonapi_appendStringInfoChar    appendPQExpBufferChar
46
/* XXX should we add a macro version to PQExpBuffer? */
47
#define jsonapi_appendStringInfoCharMacro appendPQExpBufferChar
48
#define jsonapi_makeStringInfo        createPQExpBuffer
49
#define jsonapi_initStringInfo        initPQExpBuffer
50
#define jsonapi_resetStringInfo       resetPQExpBuffer
51
#define jsonapi_termStringInfo        termPQExpBuffer
52
#define jsonapi_destroyStringInfo     destroyPQExpBuffer
53
54
#else             /* !JSONAPI_USE_PQEXPBUFFER */
55
56
0
#define STRDUP(s) pstrdup(s)
57
0
#define ALLOC(size) palloc(size)
58
1.53k
#define ALLOC0(size) palloc0(size)
59
0
#define REALLOC repalloc
60
61
#ifdef FRONTEND
62
#define FREE pfree
63
#else
64
/*
65
 * Backend pfree() doesn't handle NULL pointers like the frontend's does; smooth
66
 * that over to reduce mental gymnastics. Avoid multiple evaluation of the macro
67
 * argument to avoid future hair-pulling.
68
 */
69
2
#define FREE(s) do { \
70
2
  void *__v = (s);  \
71
2
  if (__v)     \
72
2
    pfree(__v);   \
73
2
} while (0)
74
#endif
75
76
0
#define jsonapi_appendStringInfo      appendStringInfo
77
13.2k
#define jsonapi_appendBinaryStringInfo    appendBinaryStringInfo
78
29.4k
#define jsonapi_appendStringInfoChar    appendStringInfoChar
79
0
#define jsonapi_appendStringInfoCharMacro appendStringInfoCharMacro
80
1.53k
#define jsonapi_makeStringInfo        makeStringInfo
81
0
#define jsonapi_initStringInfo        initStringInfo
82
1.43k
#define jsonapi_resetStringInfo       resetStringInfo
83
0
#define jsonapi_termStringInfo(s)     pfree((s)->data)
84
0
#define jsonapi_destroyStringInfo     destroyStringInfo
85
86
#endif              /* JSONAPI_USE_PQEXPBUFFER */
87
88
/*
89
 * The context of the parser is maintained by the recursive descent
90
 * mechanism, but is passed explicitly to the error reporting routine
91
 * for better diagnostics.
92
 */
93
typedef enum          /* contexts of JSON parser */
94
{
95
  JSON_PARSE_VALUE,     /* expecting a value */
96
  JSON_PARSE_STRING,      /* expecting a string (for a field name) */
97
  JSON_PARSE_ARRAY_START,   /* saw '[', expecting value or ']' */
98
  JSON_PARSE_ARRAY_NEXT,    /* saw array element, expecting ',' or ']' */
99
  JSON_PARSE_OBJECT_START,  /* saw '{', expecting label or '}' */
100
  JSON_PARSE_OBJECT_LABEL,  /* saw object label, expecting ':' */
101
  JSON_PARSE_OBJECT_NEXT,   /* saw object value, expecting ',' or '}' */
102
  JSON_PARSE_OBJECT_COMMA,  /* saw object ',', expecting next label */
103
  JSON_PARSE_END,       /* saw the end of a document, expect nothing */
104
} JsonParseContext;
105
106
/*
107
 * Setup for table-driven parser.
108
 * These enums need to be separate from the JsonTokenType and from each other
109
 * so we can have all of them on the prediction stack, which consists of
110
 * tokens, non-terminals, and semantic action markers.
111
 */
112
113
enum JsonNonTerminal
114
{
115
  JSON_NT_JSON = 32,
116
  JSON_NT_ARRAY_ELEMENTS,
117
  JSON_NT_MORE_ARRAY_ELEMENTS,
118
  JSON_NT_KEY_PAIRS,
119
  JSON_NT_MORE_KEY_PAIRS,
120
};
121
122
enum JsonParserSem
123
{
124
  JSON_SEM_OSTART = 64,
125
  JSON_SEM_OEND,
126
  JSON_SEM_ASTART,
127
  JSON_SEM_AEND,
128
  JSON_SEM_OFIELD_INIT,
129
  JSON_SEM_OFIELD_START,
130
  JSON_SEM_OFIELD_END,
131
  JSON_SEM_AELEM_START,
132
  JSON_SEM_AELEM_END,
133
  JSON_SEM_SCALAR_INIT,
134
  JSON_SEM_SCALAR_CALL,
135
};
136
137
/*
138
 * struct containing the 3 stacks used in non-recursive parsing,
139
 * and the token and value for scalars that need to be preserved
140
 * across calls.
141
 *
142
 * typedef appears in jsonapi.h
143
 */
144
struct JsonParserStack
145
{
146
  int     stack_size;
147
  char     *prediction;
148
  size_t    pred_index;
149
  /* these two are indexed by lex_level */
150
  char    **fnames;
151
  bool     *fnull;
152
  JsonTokenType scalar_tok;
153
  char     *scalar_val;
154
};
155
156
/*
157
 * struct containing state used when there is a possible partial token at the
158
 * end of a json chunk when we are doing incremental parsing.
159
 *
160
 * typedef appears in jsonapi.h
161
 */
162
struct JsonIncrementalState
163
{
164
  bool    started;
165
  bool    is_last_chunk;
166
  bool    partial_completed;
167
  jsonapi_StrValType partial_token;
168
};
169
170
/*
171
 * constants and macros used in the nonrecursive parser
172
 */
173
#define JSON_NUM_TERMINALS 13
174
#define JSON_NUM_NONTERMINALS 5
175
0
#define JSON_NT_OFFSET JSON_NT_JSON
176
/* for indexing the table */
177
0
#define OFS(NT) (NT) - JSON_NT_OFFSET
178
/* classify items we get off the stack */
179
0
#define IS_SEM(x) ((x) & 0x40)
180
0
#define IS_NT(x)  ((x) & 0x20)
181
182
/*
183
 * These productions are stored in reverse order right to left so that when
184
 * they are pushed on the stack what we expect next is at the top of the stack.
185
 */
186
static char JSON_PROD_EPSILON[] = {0};  /* epsilon - an empty production */
187
188
/* JSON -> string */
189
static char JSON_PROD_SCALAR_STRING[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_STRING, JSON_SEM_SCALAR_INIT, 0};
190
191
/* JSON -> number */
192
static char JSON_PROD_SCALAR_NUMBER[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_NUMBER, JSON_SEM_SCALAR_INIT, 0};
193
194
/* JSON -> 'true' */
195
static char JSON_PROD_SCALAR_TRUE[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_TRUE, JSON_SEM_SCALAR_INIT, 0};
196
197
/* JSON -> 'false' */
198
static char JSON_PROD_SCALAR_FALSE[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_FALSE, JSON_SEM_SCALAR_INIT, 0};
199
200
/* JSON -> 'null' */
201
static char JSON_PROD_SCALAR_NULL[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_NULL, JSON_SEM_SCALAR_INIT, 0};
202
203
/* JSON -> '{' KEY_PAIRS '}' */
204
static char JSON_PROD_OBJECT[] = {JSON_SEM_OEND, JSON_TOKEN_OBJECT_END, JSON_NT_KEY_PAIRS, JSON_TOKEN_OBJECT_START, JSON_SEM_OSTART, 0};
205
206
/* JSON -> '[' ARRAY_ELEMENTS ']' */
207
static char JSON_PROD_ARRAY[] = {JSON_SEM_AEND, JSON_TOKEN_ARRAY_END, JSON_NT_ARRAY_ELEMENTS, JSON_TOKEN_ARRAY_START, JSON_SEM_ASTART, 0};
208
209
/* ARRAY_ELEMENTS -> JSON MORE_ARRAY_ELEMENTS */
210
static char JSON_PROD_ARRAY_ELEMENTS[] = {JSON_NT_MORE_ARRAY_ELEMENTS, JSON_SEM_AELEM_END, JSON_NT_JSON, JSON_SEM_AELEM_START, 0};
211
212
/* MORE_ARRAY_ELEMENTS -> ',' JSON MORE_ARRAY_ELEMENTS */
213
static char JSON_PROD_MORE_ARRAY_ELEMENTS[] = {JSON_NT_MORE_ARRAY_ELEMENTS, JSON_SEM_AELEM_END, JSON_NT_JSON, JSON_SEM_AELEM_START, JSON_TOKEN_COMMA, 0};
214
215
/* KEY_PAIRS -> string ':' JSON MORE_KEY_PAIRS */
216
static char JSON_PROD_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_SEM_OFIELD_END, JSON_NT_JSON, JSON_SEM_OFIELD_START, JSON_TOKEN_COLON, JSON_TOKEN_STRING, JSON_SEM_OFIELD_INIT, 0};
217
218
/* MORE_KEY_PAIRS -> ',' string ':'  JSON MORE_KEY_PAIRS */
219
static char JSON_PROD_MORE_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_SEM_OFIELD_END, JSON_NT_JSON, JSON_SEM_OFIELD_START, JSON_TOKEN_COLON, JSON_TOKEN_STRING, JSON_SEM_OFIELD_INIT, JSON_TOKEN_COMMA, 0};
220
221
/*
222
 * Note: there are also epsilon productions for ARRAY_ELEMENTS,
223
 * MORE_ARRAY_ELEMENTS, KEY_PAIRS and MORE_KEY_PAIRS
224
 * They are all the same as none require any semantic actions.
225
 */
226
227
/*
228
 * Table connecting the productions with their director sets of
229
 * terminal symbols.
230
 * Any combination not specified here represents an error.
231
 */
232
233
typedef struct
234
{
235
  size_t    len;
236
  char     *prod;
237
} td_entry;
238
239
0
#define TD_ENTRY(PROD) { sizeof(PROD) - 1, (PROD) }
240
241
static td_entry td_parser_table[JSON_NUM_NONTERMINALS][JSON_NUM_TERMINALS] =
242
{
243
  /* JSON */
244
  [OFS(JSON_NT_JSON)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_SCALAR_STRING),
245
  [OFS(JSON_NT_JSON)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_SCALAR_NUMBER),
246
  [OFS(JSON_NT_JSON)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_SCALAR_TRUE),
247
  [OFS(JSON_NT_JSON)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_SCALAR_FALSE),
248
  [OFS(JSON_NT_JSON)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_SCALAR_NULL),
249
  [OFS(JSON_NT_JSON)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY),
250
  [OFS(JSON_NT_JSON)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_OBJECT),
251
  /* ARRAY_ELEMENTS */
252
  [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
253
  [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
254
  [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
255
  [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
256
  [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
257
  [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
258
  [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
259
  [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON),
260
  /* MORE_ARRAY_ELEMENTS */
261
  [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_ARRAY_ELEMENTS),
262
  [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON),
263
  /* KEY_PAIRS */
264
  [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_KEY_PAIRS),
265
  [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON),
266
  /* MORE_KEY_PAIRS */
267
  [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_KEY_PAIRS),
268
  [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON),
269
};
270
271
/* the GOAL production. Not stored in the table, but will be the initial contents of the prediction stack */
272
static char JSON_PROD_GOAL[] = {JSON_TOKEN_END, JSON_NT_JSON, 0};
273
274
static inline JsonParseErrorType json_lex_string(JsonLexContext *lex);
275
static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, const char *s,
276
                         bool *num_err, size_t *total_len);
277
static inline JsonParseErrorType parse_scalar(JsonLexContext *lex, const JsonSemAction *sem);
278
static JsonParseErrorType parse_object_field(JsonLexContext *lex, const JsonSemAction *sem);
279
static JsonParseErrorType parse_object(JsonLexContext *lex, const JsonSemAction *sem);
280
static JsonParseErrorType parse_array_element(JsonLexContext *lex, const JsonSemAction *sem);
281
static JsonParseErrorType parse_array(JsonLexContext *lex, const JsonSemAction *sem);
282
static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
283
static bool allocate_incremental_state(JsonLexContext *lex);
284
static inline void set_fname(JsonLexContext *lex, char *fname);
285
286
/* the null action object used for pure validation */
287
const JsonSemAction nullSemAction =
288
{
289
  NULL, NULL, NULL, NULL, NULL,
290
  NULL, NULL, NULL, NULL, NULL
291
};
292
293
/* sentinels used for out-of-memory conditions */
294
static JsonLexContext failed_oom;
295
static JsonIncrementalState failed_inc_oom;
296
297
/* Parser support routines */
298
299
/*
300
 * lex_peek
301
 *
302
 * what is the current look_ahead token?
303
*/
304
static inline JsonTokenType
305
lex_peek(JsonLexContext *lex)
306
10.5k
{
307
10.5k
  return lex->token_type;
308
10.5k
}
309
310
/*
311
 * lex_expect
312
 *
313
 * move the lexer to the next token if the current look_ahead token matches
314
 * the parameter token. Otherwise, report an error.
315
 */
316
static inline JsonParseErrorType
317
lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
318
3.17k
{
319
3.17k
  if (lex_peek(lex) == token)
320
3.13k
    return json_lex(lex);
321
45
  else
322
45
    return report_parse_error(ctx, lex);
323
3.17k
}
324
325
/* chars to consider as part of an alphanumeric token */
326
#define JSON_ALPHANUMERIC_CHAR(c)  \
327
4.75M
  (((c) >= 'a' && (c) <= 'z') || \
328
4.75M
   ((c) >= 'A' && (c) <= 'Z') || \
329
4.75M
   ((c) >= '0' && (c) <= '9') || \
330
4.75M
   (c) == '_' || \
331
4.75M
   IS_HIGHBIT_SET(c))
332
333
/*
334
 * Utility function to check if a string is a valid JSON number.
335
 *
336
 * str is of length len, and need not be null-terminated.
337
 */
338
bool
339
IsValidJsonNumber(const char *str, size_t len)
340
0
{
341
0
  bool    numeric_error;
342
0
  size_t    total_len;
343
0
  JsonLexContext dummy_lex = {0};
344
345
0
  if (len <= 0)
346
0
    return false;
347
348
  /*
349
   * json_lex_number expects a leading  '-' to have been eaten already.
350
   *
351
   * having to cast away the constness of str is ugly, but there's not much
352
   * easy alternative.
353
   */
354
0
  if (*str == '-')
355
0
  {
356
0
    dummy_lex.input = str + 1;
357
0
    dummy_lex.input_length = len - 1;
358
0
  }
359
0
  else
360
0
  {
361
0
    dummy_lex.input = str;
362
0
    dummy_lex.input_length = len;
363
0
  }
364
365
0
  dummy_lex.token_start = dummy_lex.input;
366
367
0
  json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
368
369
0
  return (!numeric_error) && (total_len == dummy_lex.input_length);
370
0
}
371
372
/*
373
 * makeJsonLexContextCstringLen
374
 *    Initialize the given JsonLexContext object, or create one
375
 *
376
 * If a valid 'lex' pointer is given, it is initialized.  This can
377
 * be used for stack-allocated structs, saving overhead.  If NULL is
378
 * given, a new struct is allocated.
379
 *
380
 * If need_escapes is true, ->strval stores the unescaped lexemes.
381
 * Unescaping is expensive, so only request it when necessary.
382
 *
383
 * If need_escapes is true or lex was given as NULL, then caller is
384
 * responsible for freeing the returned struct, either by calling
385
 * freeJsonLexContext() or (in backend environment) via memory context
386
 * cleanup.
387
 *
388
 * In shlib code, any out-of-memory failures will be deferred to time
389
 * of use; this function is guaranteed to return a valid JsonLexContext.
390
 */
391
JsonLexContext *
392
makeJsonLexContextCstringLen(JsonLexContext *lex, const char *json,
393
               size_t len, int encoding, bool need_escapes)
394
1.53k
{
395
1.53k
  if (lex == NULL)
396
1.53k
  {
397
1.53k
    lex = ALLOC0(sizeof(JsonLexContext));
398
1.53k
    if (!lex)
399
0
      return &failed_oom;
400
1.53k
    lex->flags |= JSONLEX_FREE_STRUCT;
401
1.53k
  }
402
0
  else
403
0
    memset(lex, 0, sizeof(JsonLexContext));
404
405
1.53k
  lex->errormsg = NULL;
406
1.53k
  lex->input = lex->token_terminator = lex->line_start = json;
407
1.53k
  lex->line_number = 1;
408
1.53k
  lex->input_length = len;
409
1.53k
  lex->input_encoding = encoding;
410
1.53k
  lex->need_escapes = need_escapes;
411
1.53k
  if (need_escapes)
412
1.53k
  {
413
    /*
414
     * This call can fail in shlib code. We defer error handling to time
415
     * of use (json_lex_string()) since we might not need to parse any
416
     * strings anyway.
417
     */
418
1.53k
    lex->strval = jsonapi_makeStringInfo();
419
1.53k
    lex->flags |= JSONLEX_FREE_STRVAL;
420
1.53k
  }
421
422
1.53k
  return lex;
423
1.53k
}
424
425
/*
426
 * Allocates the internal bookkeeping structures for incremental parsing. This
427
 * can only fail in-band with shlib code.
428
 */
429
0
#define JS_STACK_CHUNK_SIZE 64
430
0
#define JS_MAX_PROD_LEN 10    /* more than we need */
431
0
#define JSON_TD_MAX_STACK 6400  /* hard coded for now - this is a REALLY high
432
                 * number */
433
static bool
434
allocate_incremental_state(JsonLexContext *lex)
435
0
{
436
0
  void     *pstack,
437
0
         *prediction,
438
0
         *fnames,
439
0
         *fnull;
440
441
0
  lex->inc_state = ALLOC0(sizeof(JsonIncrementalState));
442
0
  pstack = ALLOC0(sizeof(JsonParserStack));
443
0
  prediction = ALLOC(JS_STACK_CHUNK_SIZE * JS_MAX_PROD_LEN);
444
0
  fnames = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(char *));
445
0
  fnull = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(bool));
446
447
#ifdef JSONAPI_USE_PQEXPBUFFER
448
  if (!lex->inc_state
449
    || !pstack
450
    || !prediction
451
    || !fnames
452
    || !fnull)
453
  {
454
    FREE(lex->inc_state);
455
    FREE(pstack);
456
    FREE(prediction);
457
    FREE(fnames);
458
    FREE(fnull);
459
460
    lex->inc_state = &failed_inc_oom;
461
    return false;
462
  }
463
#endif
464
465
0
  jsonapi_initStringInfo(&(lex->inc_state->partial_token));
466
0
  lex->pstack = pstack;
467
0
  lex->pstack->stack_size = JS_STACK_CHUNK_SIZE;
468
0
  lex->pstack->prediction = prediction;
469
0
  lex->pstack->fnames = fnames;
470
0
  lex->pstack->fnull = fnull;
471
472
  /*
473
   * fnames between 0 and lex_level must always be defined so that
474
   * freeJsonLexContext() can handle them safely. inc/dec_lex_level() handle
475
   * the rest.
476
   */
477
0
  Assert(lex->lex_level == 0);
478
0
  lex->pstack->fnames[0] = NULL;
479
480
0
  lex->incremental = true;
481
0
  return true;
482
0
}
483
484
485
/*
486
 * makeJsonLexContextIncremental
487
 *
488
 * Similar to above but set up for use in incremental parsing. That means we
489
 * need explicit stacks for predictions, field names and null indicators, but
490
 * we don't need the input, that will be handed in bit by bit to the
491
 * parse routine. We also need an accumulator for partial tokens in case
492
 * the boundary between chunks happens to fall in the middle of a token.
493
 *
494
 * In shlib code, any out-of-memory failures will be deferred to time of use;
495
 * this function is guaranteed to return a valid JsonLexContext.
496
 */
497
JsonLexContext *
498
makeJsonLexContextIncremental(JsonLexContext *lex, int encoding,
499
                bool need_escapes)
500
0
{
501
0
  if (lex == NULL)
502
0
  {
503
0
    lex = ALLOC0(sizeof(JsonLexContext));
504
0
    if (!lex)
505
0
      return &failed_oom;
506
507
0
    lex->flags |= JSONLEX_FREE_STRUCT;
508
0
  }
509
0
  else
510
0
    memset(lex, 0, sizeof(JsonLexContext));
511
512
0
  lex->line_number = 1;
513
0
  lex->input_encoding = encoding;
514
515
0
  if (!allocate_incremental_state(lex))
516
0
  {
517
0
    if (lex->flags & JSONLEX_FREE_STRUCT)
518
0
    {
519
0
      FREE(lex);
520
0
      return &failed_oom;
521
0
    }
522
523
    /* lex->inc_state tracks the OOM failure; we can return here. */
524
0
    return lex;
525
0
  }
526
527
0
  lex->need_escapes = need_escapes;
528
0
  if (need_escapes)
529
0
  {
530
    /*
531
     * This call can fail in shlib code. We defer error handling to time
532
     * of use (json_lex_string()) since we might not need to parse any
533
     * strings anyway.
534
     */
535
0
    lex->strval = jsonapi_makeStringInfo();
536
0
    lex->flags |= JSONLEX_FREE_STRVAL;
537
0
  }
538
539
0
  return lex;
540
0
}
541
542
void
543
setJsonLexContextOwnsTokens(JsonLexContext *lex, bool owned_by_context)
544
0
{
545
0
  if (lex->incremental && lex->inc_state->started)
546
0
  {
547
    /*
548
     * Switching this flag after parsing has already started is a
549
     * programming error.
550
     */
551
0
    Assert(false);
552
0
    return;
553
0
  }
554
555
0
  if (owned_by_context)
556
0
    lex->flags |= JSONLEX_CTX_OWNS_TOKENS;
557
0
  else
558
0
    lex->flags &= ~JSONLEX_CTX_OWNS_TOKENS;
559
0
}
560
561
static inline bool
562
inc_lex_level(JsonLexContext *lex)
563
0
{
564
0
  if (lex->incremental && (lex->lex_level + 1) >= lex->pstack->stack_size)
565
0
  {
566
0
    size_t    new_stack_size;
567
0
    char     *new_prediction;
568
0
    char    **new_fnames;
569
0
    bool     *new_fnull;
570
571
0
    new_stack_size = lex->pstack->stack_size + JS_STACK_CHUNK_SIZE;
572
573
0
    new_prediction = REALLOC(lex->pstack->prediction,
574
0
                 new_stack_size * JS_MAX_PROD_LEN);
575
#ifdef JSONAPI_USE_PQEXPBUFFER
576
    if (!new_prediction)
577
      return false;
578
#endif
579
0
    lex->pstack->prediction = new_prediction;
580
581
0
    new_fnames = REALLOC(lex->pstack->fnames,
582
0
               new_stack_size * sizeof(char *));
583
#ifdef JSONAPI_USE_PQEXPBUFFER
584
    if (!new_fnames)
585
      return false;
586
#endif
587
0
    lex->pstack->fnames = new_fnames;
588
589
0
    new_fnull = REALLOC(lex->pstack->fnull, new_stack_size * sizeof(bool));
590
#ifdef JSONAPI_USE_PQEXPBUFFER
591
    if (!new_fnull)
592
      return false;
593
#endif
594
0
    lex->pstack->fnull = new_fnull;
595
596
0
    lex->pstack->stack_size = new_stack_size;
597
0
  }
598
599
0
  lex->lex_level += 1;
600
601
0
  if (lex->incremental)
602
0
  {
603
    /*
604
     * Ensure freeJsonLexContext() remains safe even if no fname is
605
     * assigned at this level.
606
     */
607
0
    lex->pstack->fnames[lex->lex_level] = NULL;
608
0
  }
609
610
0
  return true;
611
0
}
612
613
static inline void
614
dec_lex_level(JsonLexContext *lex)
615
0
{
616
0
  set_fname(lex, NULL);   /* free the current level's fname, if needed */
617
0
  lex->lex_level -= 1;
618
0
}
619
620
static inline void
621
push_prediction(JsonParserStack *pstack, td_entry entry)
622
0
{
623
0
  memcpy(pstack->prediction + pstack->pred_index, entry.prod, entry.len);
624
0
  pstack->pred_index += entry.len;
625
0
}
626
627
static inline char
628
pop_prediction(JsonParserStack *pstack)
629
0
{
630
0
  Assert(pstack->pred_index > 0);
631
0
  return pstack->prediction[--pstack->pred_index];
632
0
}
633
634
static inline char
635
next_prediction(JsonParserStack *pstack)
636
0
{
637
0
  Assert(pstack->pred_index > 0);
638
0
  return pstack->prediction[pstack->pred_index - 1];
639
0
}
640
641
static inline bool
642
have_prediction(JsonParserStack *pstack)
643
0
{
644
0
  return pstack->pred_index > 0;
645
0
}
646
647
static inline void
648
set_fname(JsonLexContext *lex, char *fname)
649
0
{
650
0
  if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
651
0
  {
652
    /*
653
     * Don't leak prior fnames. If one hasn't been assigned yet,
654
     * inc_lex_level ensured that it's NULL (and therefore safe to free).
655
     */
656
0
    FREE(lex->pstack->fnames[lex->lex_level]);
657
0
  }
658
659
0
  lex->pstack->fnames[lex->lex_level] = fname;
660
0
}
661
662
static inline char *
663
get_fname(JsonLexContext *lex)
664
0
{
665
0
  return lex->pstack->fnames[lex->lex_level];
666
0
}
667
668
static inline void
669
set_fnull(JsonLexContext *lex, bool fnull)
670
0
{
671
0
  lex->pstack->fnull[lex->lex_level] = fnull;
672
0
}
673
674
static inline bool
675
get_fnull(JsonLexContext *lex)
676
0
{
677
0
  return lex->pstack->fnull[lex->lex_level];
678
0
}
679
680
/*
681
 * Free memory in a JsonLexContext.
682
 *
683
 * There's no need for this if a *lex pointer was given when the object was
684
 * made, need_escapes was false, and json_errdetail() was not called; or if (in
685
 * backend environment) a memory context delete/reset is imminent.
686
 */
687
void
688
freeJsonLexContext(JsonLexContext *lex)
689
0
{
690
0
  static const JsonLexContext empty = {0};
691
692
0
  if (!lex || lex == &failed_oom)
693
0
    return;
694
695
0
  if (lex->flags & JSONLEX_FREE_STRVAL)
696
0
    jsonapi_destroyStringInfo(lex->strval);
697
698
0
  if (lex->errormsg)
699
0
    jsonapi_destroyStringInfo(lex->errormsg);
700
701
0
  if (lex->incremental)
702
0
  {
703
0
    jsonapi_termStringInfo(&lex->inc_state->partial_token);
704
0
    FREE(lex->inc_state);
705
0
    FREE(lex->pstack->prediction);
706
707
0
    if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
708
0
    {
709
0
      int     i;
710
711
      /* Clean up any tokens that were left behind. */
712
0
      for (i = 0; i <= lex->lex_level; i++)
713
0
        FREE(lex->pstack->fnames[i]);
714
0
    }
715
716
0
    FREE(lex->pstack->fnames);
717
0
    FREE(lex->pstack->fnull);
718
0
    FREE(lex->pstack->scalar_val);
719
0
    FREE(lex->pstack);
720
0
  }
721
722
0
  if (lex->flags & JSONLEX_FREE_STRUCT)
723
0
    FREE(lex);
724
0
  else
725
0
    *lex = empty;
726
0
}
727
728
/*
729
 * pg_parse_json
730
 *
731
 * Publicly visible entry point for the JSON parser.
732
 *
733
 * lex is a lexing context, set up for the json to be processed by calling
734
 * makeJsonLexContext(). sem is a structure of function pointers to semantic
735
 * action routines to be called at appropriate spots during parsing, and a
736
 * pointer to a state object to be passed to those routines.
737
 *
738
 * If FORCE_JSON_PSTACK is defined then the routine will call the non-recursive
739
 * JSON parser. This is a useful way to validate that it's doing the right
740
 * thing at least for non-incremental cases. If this is on we expect to see
741
 * regression diffs relating to error messages about stack depth, but no
742
 * other differences.
743
 */
744
JsonParseErrorType
745
pg_parse_json(JsonLexContext *lex, const JsonSemAction *sem)
746
1.53k
{
747
#ifdef FORCE_JSON_PSTACK
748
  /*
749
   * We don't need partial token processing, there is only one chunk. But we
750
   * still need to init the partial token string so that freeJsonLexContext
751
   * works, so perform the full incremental initialization.
752
   */
753
  if (!allocate_incremental_state(lex))
754
    return JSON_OUT_OF_MEMORY;
755
756
  return pg_parse_json_incremental(lex, sem, lex->input, lex->input_length, true);
757
758
#else
759
760
1.53k
  JsonTokenType tok;
761
1.53k
  JsonParseErrorType result;
762
763
1.53k
  if (lex == &failed_oom)
764
0
    return JSON_OUT_OF_MEMORY;
765
1.53k
  if (lex->incremental)
766
0
    return JSON_INVALID_LEXER_TYPE;
767
768
  /* get the initial token */
769
1.53k
  result = json_lex(lex);
770
1.53k
  if (result != JSON_SUCCESS)
771
1.24k
    return result;
772
773
293
  tok = lex_peek(lex);
774
775
  /* parse by recursive descent */
776
293
  switch (tok)
777
293
  {
778
8
    case JSON_TOKEN_OBJECT_START:
779
8
      result = parse_object(lex, sem);
780
8
      break;
781
18
    case JSON_TOKEN_ARRAY_START:
782
18
      result = parse_array(lex, sem);
783
18
      break;
784
267
    default:
785
267
      result = parse_scalar(lex, sem); /* json can be a bare scalar */
786
293
  }
787
788
290
  if (result == JSON_SUCCESS)
789
41
    result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
790
791
290
  return result;
792
293
#endif
793
293
}
794
795
/*
796
 * json_count_array_elements
797
 *
798
 * Returns number of array elements in lex context at start of array token
799
 * until end of array token at same nesting level.
800
 *
801
 * Designed to be called from array_start routines.
802
 */
803
JsonParseErrorType
804
json_count_array_elements(JsonLexContext *lex, int *elements)
805
0
{
806
0
  JsonLexContext copylex;
807
0
  int     count;
808
0
  JsonParseErrorType result;
809
810
0
  if (lex == &failed_oom)
811
0
    return JSON_OUT_OF_MEMORY;
812
813
  /*
814
   * It's safe to do this with a shallow copy because the lexical routines
815
   * don't scribble on the input. They do scribble on the other pointers
816
   * etc, so doing this with a copy makes that safe.
817
   */
818
0
  memcpy(&copylex, lex, sizeof(JsonLexContext));
819
0
  copylex.need_escapes = false; /* not interested in values here */
820
0
  copylex.lex_level++;
821
822
0
  count = 0;
823
0
  result = lex_expect(JSON_PARSE_ARRAY_START, &copylex,
824
0
            JSON_TOKEN_ARRAY_START);
825
0
  if (result != JSON_SUCCESS)
826
0
    return result;
827
0
  if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
828
0
  {
829
0
    while (1)
830
0
    {
831
0
      count++;
832
0
      result = parse_array_element(&copylex, &nullSemAction);
833
0
      if (result != JSON_SUCCESS)
834
0
        return result;
835
0
      if (copylex.token_type != JSON_TOKEN_COMMA)
836
0
        break;
837
0
      result = json_lex(&copylex);
838
0
      if (result != JSON_SUCCESS)
839
0
        return result;
840
0
    }
841
0
  }
842
0
  result = lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex,
843
0
            JSON_TOKEN_ARRAY_END);
844
0
  if (result != JSON_SUCCESS)
845
0
    return result;
846
847
0
  *elements = count;
848
0
  return JSON_SUCCESS;
849
0
}
850
851
/*
852
 * pg_parse_json_incremental
853
 *
854
 * Routine for incremental parsing of json. This uses the non-recursive top
855
 * down method of the Dragon Book Algorithm 4.3. It's somewhat slower than
856
 * the Recursive Descent pattern used above, so we only use it for incremental
857
 * parsing of JSON.
858
 *
859
 * The lexing context needs to be set up by a call to
860
 * makeJsonLexContextIncremental(). sem is a structure of function pointers
861
 * to semantic action routines, which should function exactly as those used
862
 * in the recursive descent parser.
863
 *
864
 * This routine can be called repeatedly with chunks of JSON. On the final
865
 * chunk is_last must be set to true. len is the length of the json chunk,
866
 * which does not need to be null terminated.
867
 */
868
JsonParseErrorType
869
pg_parse_json_incremental(JsonLexContext *lex,
870
              const JsonSemAction *sem,
871
              const char *json,
872
              size_t len,
873
              bool is_last)
874
0
{
875
0
  JsonTokenType tok;
876
0
  JsonParseErrorType result;
877
0
  JsonParseContext ctx = JSON_PARSE_VALUE;
878
0
  JsonParserStack *pstack = lex->pstack;
879
880
0
  if (lex == &failed_oom || lex->inc_state == &failed_inc_oom)
881
0
    return JSON_OUT_OF_MEMORY;
882
0
  if (!lex->incremental)
883
0
    return JSON_INVALID_LEXER_TYPE;
884
885
0
  lex->input = lex->token_terminator = lex->line_start = json;
886
0
  lex->input_length = len;
887
0
  lex->inc_state->is_last_chunk = is_last;
888
0
  lex->inc_state->started = true;
889
890
  /* get the initial token */
891
0
  result = json_lex(lex);
892
0
  if (result != JSON_SUCCESS)
893
0
    return result;
894
895
0
  tok = lex_peek(lex);
896
897
  /* use prediction stack for incremental parsing */
898
899
0
  if (!have_prediction(pstack))
900
0
  {
901
0
    td_entry  goal = TD_ENTRY(JSON_PROD_GOAL);
902
903
0
    push_prediction(pstack, goal);
904
0
  }
905
906
0
  while (have_prediction(pstack))
907
0
  {
908
0
    char    top = pop_prediction(pstack);
909
0
    td_entry  entry;
910
911
    /*
912
     * these first two branches are the guts of the Table Driven method
913
     */
914
0
    if (top == tok)
915
0
    {
916
      /*
917
       * tok can only be a terminal symbol, so top must be too. the
918
       * token matches the top of the stack, so get the next token.
919
       */
920
0
      if (tok < JSON_TOKEN_END)
921
0
      {
922
0
        result = json_lex(lex);
923
0
        if (result != JSON_SUCCESS)
924
0
          return result;
925
0
        tok = lex_peek(lex);
926
0
      }
927
0
    }
928
0
    else if (IS_NT(top) && (entry = td_parser_table[OFS(top)][tok]).prod != NULL)
929
0
    {
930
      /*
931
       * the token is in the director set for a production of the
932
       * non-terminal at the top of the stack, so push the reversed RHS
933
       * of the production onto the stack.
934
       */
935
0
      push_prediction(pstack, entry);
936
0
    }
937
0
    else if (IS_SEM(top))
938
0
    {
939
      /*
940
       * top is a semantic action marker, so take action accordingly.
941
       * It's important to have these markers in the prediction stack
942
       * before any token they might need so we don't advance the token
943
       * prematurely. Note in a couple of cases we need to do something
944
       * both before and after the token.
945
       */
946
0
      switch (top)
947
0
      {
948
0
        case JSON_SEM_OSTART:
949
0
          {
950
0
            json_struct_action ostart = sem->object_start;
951
952
0
            if (lex->lex_level >= JSON_TD_MAX_STACK)
953
0
              return JSON_NESTING_TOO_DEEP;
954
955
0
            if (ostart != NULL)
956
0
            {
957
0
              result = (*ostart) (sem->semstate);
958
0
              if (result != JSON_SUCCESS)
959
0
                return result;
960
0
            }
961
962
0
            if (!inc_lex_level(lex))
963
0
              return JSON_OUT_OF_MEMORY;
964
0
          }
965
0
          break;
966
0
        case JSON_SEM_OEND:
967
0
          {
968
0
            json_struct_action oend = sem->object_end;
969
970
0
            dec_lex_level(lex);
971
0
            if (oend != NULL)
972
0
            {
973
0
              result = (*oend) (sem->semstate);
974
0
              if (result != JSON_SUCCESS)
975
0
                return result;
976
0
            }
977
0
          }
978
0
          break;
979
0
        case JSON_SEM_ASTART:
980
0
          {
981
0
            json_struct_action astart = sem->array_start;
982
983
0
            if (lex->lex_level >= JSON_TD_MAX_STACK)
984
0
              return JSON_NESTING_TOO_DEEP;
985
986
0
            if (astart != NULL)
987
0
            {
988
0
              result = (*astart) (sem->semstate);
989
0
              if (result != JSON_SUCCESS)
990
0
                return result;
991
0
            }
992
993
0
            if (!inc_lex_level(lex))
994
0
              return JSON_OUT_OF_MEMORY;
995
0
          }
996
0
          break;
997
0
        case JSON_SEM_AEND:
998
0
          {
999
0
            json_struct_action aend = sem->array_end;
1000
1001
0
            dec_lex_level(lex);
1002
0
            if (aend != NULL)
1003
0
            {
1004
0
              result = (*aend) (sem->semstate);
1005
0
              if (result != JSON_SUCCESS)
1006
0
                return result;
1007
0
            }
1008
0
          }
1009
0
          break;
1010
0
        case JSON_SEM_OFIELD_INIT:
1011
0
          {
1012
            /*
1013
             * all we do here is save out the field name. We have
1014
             * to wait to get past the ':' to see if the next
1015
             * value is null so we can call the semantic routine
1016
             */
1017
0
            char     *fname = NULL;
1018
0
            json_ofield_action ostart = sem->object_field_start;
1019
0
            json_ofield_action oend = sem->object_field_end;
1020
1021
0
            if ((ostart != NULL || oend != NULL) && lex->need_escapes)
1022
0
            {
1023
0
              fname = STRDUP(lex->strval->data);
1024
0
              if (fname == NULL)
1025
0
                return JSON_OUT_OF_MEMORY;
1026
0
            }
1027
0
            set_fname(lex, fname);
1028
0
          }
1029
0
          break;
1030
0
        case JSON_SEM_OFIELD_START:
1031
0
          {
1032
            /*
1033
             * the current token should be the first token of the
1034
             * value
1035
             */
1036
0
            bool    isnull = tok == JSON_TOKEN_NULL;
1037
0
            json_ofield_action ostart = sem->object_field_start;
1038
1039
0
            set_fnull(lex, isnull);
1040
1041
0
            if (ostart != NULL)
1042
0
            {
1043
0
              char     *fname = get_fname(lex);
1044
1045
0
              result = (*ostart) (sem->semstate, fname, isnull);
1046
0
              if (result != JSON_SUCCESS)
1047
0
                return result;
1048
0
            }
1049
0
          }
1050
0
          break;
1051
0
        case JSON_SEM_OFIELD_END:
1052
0
          {
1053
0
            json_ofield_action oend = sem->object_field_end;
1054
1055
0
            if (oend != NULL)
1056
0
            {
1057
0
              char     *fname = get_fname(lex);
1058
0
              bool    isnull = get_fnull(lex);
1059
1060
0
              result = (*oend) (sem->semstate, fname, isnull);
1061
0
              if (result != JSON_SUCCESS)
1062
0
                return result;
1063
0
            }
1064
0
          }
1065
0
          break;
1066
0
        case JSON_SEM_AELEM_START:
1067
0
          {
1068
0
            json_aelem_action astart = sem->array_element_start;
1069
0
            bool    isnull = tok == JSON_TOKEN_NULL;
1070
1071
0
            set_fnull(lex, isnull);
1072
1073
0
            if (astart != NULL)
1074
0
            {
1075
0
              result = (*astart) (sem->semstate, isnull);
1076
0
              if (result != JSON_SUCCESS)
1077
0
                return result;
1078
0
            }
1079
0
          }
1080
0
          break;
1081
0
        case JSON_SEM_AELEM_END:
1082
0
          {
1083
0
            json_aelem_action aend = sem->array_element_end;
1084
1085
0
            if (aend != NULL)
1086
0
            {
1087
0
              bool    isnull = get_fnull(lex);
1088
1089
0
              result = (*aend) (sem->semstate, isnull);
1090
0
              if (result != JSON_SUCCESS)
1091
0
                return result;
1092
0
            }
1093
0
          }
1094
0
          break;
1095
0
        case JSON_SEM_SCALAR_INIT:
1096
0
          {
1097
0
            json_scalar_action sfunc = sem->scalar;
1098
1099
0
            pstack->scalar_val = NULL;
1100
1101
0
            if (sfunc != NULL)
1102
0
            {
1103
              /*
1104
               * extract the de-escaped string value, or the raw
1105
               * lexeme
1106
               */
1107
              /*
1108
               * XXX copied from RD parser but looks like a
1109
               * buglet
1110
               */
1111
0
              if (tok == JSON_TOKEN_STRING)
1112
0
              {
1113
0
                if (lex->need_escapes)
1114
0
                {
1115
0
                  pstack->scalar_val = STRDUP(lex->strval->data);
1116
0
                  if (pstack->scalar_val == NULL)
1117
0
                    return JSON_OUT_OF_MEMORY;
1118
0
                }
1119
0
              }
1120
0
              else
1121
0
              {
1122
0
                ptrdiff_t tlen = (lex->token_terminator - lex->token_start);
1123
1124
0
                pstack->scalar_val = ALLOC(tlen + 1);
1125
0
                if (pstack->scalar_val == NULL)
1126
0
                  return JSON_OUT_OF_MEMORY;
1127
1128
0
                memcpy(pstack->scalar_val, lex->token_start, tlen);
1129
0
                pstack->scalar_val[tlen] = '\0';
1130
0
              }
1131
0
              pstack->scalar_tok = tok;
1132
0
            }
1133
0
          }
1134
0
          break;
1135
0
        case JSON_SEM_SCALAR_CALL:
1136
0
          {
1137
            /*
1138
             * We'd like to be able to get rid of this business of
1139
             * two bits of scalar action, but we can't. It breaks
1140
             * certain semantic actions which expect that when
1141
             * called the lexer has consumed the item. See for
1142
             * example get_scalar() in jsonfuncs.c.
1143
             */
1144
0
            json_scalar_action sfunc = sem->scalar;
1145
1146
0
            if (sfunc != NULL)
1147
0
            {
1148
0
              result = (*sfunc) (sem->semstate, pstack->scalar_val, pstack->scalar_tok);
1149
1150
              /*
1151
               * Either ownership of the token passed to the
1152
               * callback, or we need to free it now. Either
1153
               * way, clear our pointer to it so it doesn't get
1154
               * freed in the future.
1155
               */
1156
0
              if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
1157
0
                FREE(pstack->scalar_val);
1158
0
              pstack->scalar_val = NULL;
1159
1160
0
              if (result != JSON_SUCCESS)
1161
0
                return result;
1162
0
            }
1163
0
          }
1164
0
          break;
1165
0
        default:
1166
          /* should not happen */
1167
0
          break;
1168
0
      }
1169
0
    }
1170
0
    else
1171
0
    {
1172
      /*
1173
       * The token didn't match the stack top if it's a terminal nor a
1174
       * production for the stack top if it's a non-terminal.
1175
       *
1176
       * Various cases here are Asserted to be not possible, as the
1177
       * token would not appear at the top of the prediction stack
1178
       * unless the lookahead matched.
1179
       */
1180
0
      switch (top)
1181
0
      {
1182
0
        case JSON_TOKEN_STRING:
1183
0
          if (next_prediction(pstack) == JSON_TOKEN_COLON)
1184
0
            ctx = JSON_PARSE_STRING;
1185
0
          else
1186
0
          {
1187
0
            Assert(false);
1188
0
            ctx = JSON_PARSE_VALUE;
1189
0
          }
1190
0
          break;
1191
0
        case JSON_TOKEN_NUMBER:
1192
0
        case JSON_TOKEN_TRUE:
1193
0
        case JSON_TOKEN_FALSE:
1194
0
        case JSON_TOKEN_NULL:
1195
0
        case JSON_TOKEN_ARRAY_START:
1196
0
        case JSON_TOKEN_OBJECT_START:
1197
0
          Assert(false);
1198
0
          ctx = JSON_PARSE_VALUE;
1199
0
          break;
1200
0
        case JSON_TOKEN_ARRAY_END:
1201
0
          Assert(false);
1202
0
          ctx = JSON_PARSE_ARRAY_NEXT;
1203
0
          break;
1204
0
        case JSON_TOKEN_OBJECT_END:
1205
0
          Assert(false);
1206
0
          ctx = JSON_PARSE_OBJECT_NEXT;
1207
0
          break;
1208
0
        case JSON_TOKEN_COMMA:
1209
0
          Assert(false);
1210
0
          if (next_prediction(pstack) == JSON_TOKEN_STRING)
1211
0
            ctx = JSON_PARSE_OBJECT_NEXT;
1212
0
          else
1213
0
            ctx = JSON_PARSE_ARRAY_NEXT;
1214
0
          break;
1215
0
        case JSON_TOKEN_COLON:
1216
0
          ctx = JSON_PARSE_OBJECT_LABEL;
1217
0
          break;
1218
0
        case JSON_TOKEN_END:
1219
0
          ctx = JSON_PARSE_END;
1220
0
          break;
1221
0
        case JSON_NT_MORE_ARRAY_ELEMENTS:
1222
0
          ctx = JSON_PARSE_ARRAY_NEXT;
1223
0
          break;
1224
0
        case JSON_NT_ARRAY_ELEMENTS:
1225
0
          ctx = JSON_PARSE_ARRAY_START;
1226
0
          break;
1227
0
        case JSON_NT_MORE_KEY_PAIRS:
1228
0
          ctx = JSON_PARSE_OBJECT_NEXT;
1229
0
          break;
1230
0
        case JSON_NT_KEY_PAIRS:
1231
0
          ctx = JSON_PARSE_OBJECT_START;
1232
0
          break;
1233
0
        default:
1234
0
          ctx = JSON_PARSE_VALUE;
1235
0
      }
1236
0
      return report_parse_error(ctx, lex);
1237
0
    }
1238
0
  }
1239
1240
0
  return JSON_SUCCESS;
1241
0
}
1242
1243
/*
1244
 *  Recursive Descent parse routines. There is one for each structural
1245
 *  element in a json document:
1246
 *    - scalar (string, number, true, false, null)
1247
 *    - array  ( [ ] )
1248
 *    - array element
1249
 *    - object ( { } )
1250
 *    - object field
1251
 */
1252
static inline JsonParseErrorType
1253
parse_scalar(JsonLexContext *lex, const JsonSemAction *sem)
1254
281
{
1255
281
  char     *val = NULL;
1256
281
  json_scalar_action sfunc = sem->scalar;
1257
281
  JsonTokenType tok = lex_peek(lex);
1258
281
  JsonParseErrorType result;
1259
1260
  /* a scalar must be a string, a number, true, false, or null */
1261
281
  if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER &&
1262
281
    tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE &&
1263
281
    tok != JSON_TOKEN_NULL)
1264
5
    return report_parse_error(JSON_PARSE_VALUE, lex);
1265
1266
  /* if no semantic function, just consume the token */
1267
276
  if (sfunc == NULL)
1268
276
    return json_lex(lex);
1269
1270
  /* extract the de-escaped string value, or the raw lexeme */
1271
0
  if (lex_peek(lex) == JSON_TOKEN_STRING)
1272
0
  {
1273
0
    if (lex->need_escapes)
1274
0
    {
1275
0
      val = STRDUP(lex->strval->data);
1276
0
      if (val == NULL)
1277
0
        return JSON_OUT_OF_MEMORY;
1278
0
    }
1279
0
  }
1280
0
  else
1281
0
  {
1282
0
    int     len = (lex->token_terminator - lex->token_start);
1283
1284
0
    val = ALLOC(len + 1);
1285
0
    if (val == NULL)
1286
0
      return JSON_OUT_OF_MEMORY;
1287
1288
0
    memcpy(val, lex->token_start, len);
1289
0
    val[len] = '\0';
1290
0
  }
1291
1292
  /* consume the token */
1293
0
  result = json_lex(lex);
1294
0
  if (result != JSON_SUCCESS)
1295
0
  {
1296
0
    FREE(val);
1297
0
    return result;
1298
0
  }
1299
1300
  /*
1301
   * invoke the callback, which may take ownership of val. For string
1302
   * values, val is NULL if need_escapes is false.
1303
   */
1304
0
  result = (*sfunc) (sem->semstate, val, tok);
1305
1306
0
  if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
1307
0
    FREE(val);
1308
1309
0
  return result;
1310
0
}
1311
1312
static JsonParseErrorType
1313
parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
1314
563
{
1315
  /*
1316
   * An object field is "fieldname" : value where value can be a scalar,
1317
   * object or array.  Note: in user-facing docs and error messages, we
1318
   * generally call a field name a "key".
1319
   */
1320
1321
563
  char     *fname = NULL;
1322
563
  json_ofield_action ostart = sem->object_field_start;
1323
563
  json_ofield_action oend = sem->object_field_end;
1324
563
  bool    isnull;
1325
563
  JsonTokenType tok;
1326
563
  JsonParseErrorType result;
1327
1328
563
  if (lex_peek(lex) != JSON_TOKEN_STRING)
1329
0
    return report_parse_error(JSON_PARSE_STRING, lex);
1330
563
  if ((ostart != NULL || oend != NULL) && lex->need_escapes)
1331
0
  {
1332
    /* fname is NULL if need_escapes is false */
1333
0
    fname = STRDUP(lex->strval->data);
1334
0
    if (fname == NULL)
1335
0
      return JSON_OUT_OF_MEMORY;
1336
0
  }
1337
563
  result = json_lex(lex);
1338
563
  if (result != JSON_SUCCESS)
1339
0
  {
1340
0
    FREE(fname);
1341
0
    return result;
1342
0
  }
1343
1344
563
  result = lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
1345
563
  if (result != JSON_SUCCESS)
1346
2
  {
1347
2
    FREE(fname);
1348
2
    return result;
1349
2
  }
1350
1351
561
  tok = lex_peek(lex);
1352
561
  isnull = tok == JSON_TOKEN_NULL;
1353
1354
561
  if (ostart != NULL)
1355
0
  {
1356
0
    result = (*ostart) (sem->semstate, fname, isnull);
1357
0
    if (result != JSON_SUCCESS)
1358
0
      goto ofield_cleanup;
1359
0
  }
1360
1361
561
  switch (tok)
1362
561
  {
1363
161
    case JSON_TOKEN_OBJECT_START:
1364
161
      result = parse_object(lex, sem);
1365
161
      break;
1366
399
    case JSON_TOKEN_ARRAY_START:
1367
399
      result = parse_array(lex, sem);
1368
399
      break;
1369
1
    default:
1370
1
      result = parse_scalar(lex, sem);
1371
561
  }
1372
3
  if (result != JSON_SUCCESS)
1373
3
    goto ofield_cleanup;
1374
1375
0
  if (oend != NULL)
1376
0
  {
1377
0
    result = (*oend) (sem->semstate, fname, isnull);
1378
0
    if (result != JSON_SUCCESS)
1379
0
      goto ofield_cleanup;
1380
0
  }
1381
1382
3
ofield_cleanup:
1383
3
  if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
1384
0
    FREE(fname);
1385
3
  return result;
1386
0
}
1387
1388
static JsonParseErrorType
1389
parse_object(JsonLexContext *lex, const JsonSemAction *sem)
1390
569
{
1391
  /*
1392
   * an object is a possibly empty sequence of object fields, separated by
1393
   * commas and surrounded by curly braces.
1394
   */
1395
569
  json_struct_action ostart = sem->object_start;
1396
569
  json_struct_action oend = sem->object_end;
1397
569
  JsonTokenType tok;
1398
569
  JsonParseErrorType result;
1399
1400
569
#ifndef FRONTEND
1401
1402
  /*
1403
   * TODO: clients need some way to put a bound on stack growth. Parse level
1404
   * limits maybe?
1405
   */
1406
569
  check_stack_depth();
1407
569
#endif
1408
1409
569
  if (ostart != NULL)
1410
0
  {
1411
0
    result = (*ostart) (sem->semstate);
1412
0
    if (result != JSON_SUCCESS)
1413
0
      return result;
1414
0
  }
1415
1416
  /*
1417
   * Data inside an object is at a higher nesting level than the object
1418
   * itself. Note that we increment this after we call the semantic routine
1419
   * for the object start and restore it before we call the routine for the
1420
   * object end.
1421
   */
1422
569
  lex->lex_level++;
1423
1424
569
  Assert(lex_peek(lex) == JSON_TOKEN_OBJECT_START);
1425
569
  result = json_lex(lex);
1426
569
  if (result != JSON_SUCCESS)
1427
4
    return result;
1428
1429
565
  tok = lex_peek(lex);
1430
565
  switch (tok)
1431
565
  {
1432
563
    case JSON_TOKEN_STRING:
1433
563
      result = parse_object_field(lex, sem);
1434
563
      while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
1435
0
      {
1436
0
        result = json_lex(lex);
1437
0
        if (result != JSON_SUCCESS)
1438
0
          break;
1439
0
        result = parse_object_field(lex, sem);
1440
0
      }
1441
563
      break;
1442
0
    case JSON_TOKEN_OBJECT_END:
1443
0
      break;
1444
2
    default:
1445
      /* case of an invalid initial token inside the object */
1446
2
      result = report_parse_error(JSON_PARSE_OBJECT_START, lex);
1447
565
  }
1448
7
  if (result != JSON_SUCCESS)
1449
7
    return result;
1450
1451
0
  result = lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
1452
0
  if (result != JSON_SUCCESS)
1453
0
    return result;
1454
1455
0
  lex->lex_level--;
1456
1457
0
  if (oend != NULL)
1458
0
  {
1459
0
    result = (*oend) (sem->semstate);
1460
0
    if (result != JSON_SUCCESS)
1461
0
      return result;
1462
0
  }
1463
1464
0
  return JSON_SUCCESS;
1465
0
}
1466
1467
static JsonParseErrorType
1468
parse_array_element(JsonLexContext *lex, const JsonSemAction *sem)
1469
2.57k
{
1470
2.57k
  json_aelem_action astart = sem->array_element_start;
1471
2.57k
  json_aelem_action aend = sem->array_element_end;
1472
2.57k
  JsonTokenType tok = lex_peek(lex);
1473
2.57k
  JsonParseErrorType result;
1474
2.57k
  bool    isnull;
1475
1476
2.57k
  isnull = tok == JSON_TOKEN_NULL;
1477
1478
2.57k
  if (astart != NULL)
1479
0
  {
1480
0
    result = (*astart) (sem->semstate, isnull);
1481
0
    if (result != JSON_SUCCESS)
1482
0
      return result;
1483
0
  }
1484
1485
  /* an array element is any object, array or scalar */
1486
2.57k
  switch (tok)
1487
2.57k
  {
1488
400
    case JSON_TOKEN_OBJECT_START:
1489
400
      result = parse_object(lex, sem);
1490
400
      break;
1491
2.15k
    case JSON_TOKEN_ARRAY_START:
1492
2.15k
      result = parse_array(lex, sem);
1493
2.15k
      break;
1494
13
    default:
1495
13
      result = parse_scalar(lex, sem);
1496
2.57k
  }
1497
1498
24
  if (result != JSON_SUCCESS)
1499
15
    return result;
1500
1501
9
  if (aend != NULL)
1502
0
  {
1503
0
    result = (*aend) (sem->semstate, isnull);
1504
0
    if (result != JSON_SUCCESS)
1505
0
      return result;
1506
0
  }
1507
1508
9
  return JSON_SUCCESS;
1509
9
}
1510
1511
static JsonParseErrorType
1512
parse_array(JsonLexContext *lex, const JsonSemAction *sem)
1513
2.57k
{
1514
  /*
1515
   * an array is a possibly empty sequence of array elements, separated by
1516
   * commas and surrounded by square brackets.
1517
   */
1518
2.57k
  json_struct_action astart = sem->array_start;
1519
2.57k
  json_struct_action aend = sem->array_end;
1520
2.57k
  JsonParseErrorType result;
1521
1522
2.57k
#ifndef FRONTEND
1523
2.57k
  check_stack_depth();
1524
2.57k
#endif
1525
1526
2.57k
  if (astart != NULL)
1527
0
  {
1528
0
    result = (*astart) (sem->semstate);
1529
0
    if (result != JSON_SUCCESS)
1530
0
      return result;
1531
0
  }
1532
1533
  /*
1534
   * Data inside an array is at a higher nesting level than the array
1535
   * itself. Note that we increment this after we call the semantic routine
1536
   * for the array start and restore it before we call the routine for the
1537
   * array end.
1538
   */
1539
2.57k
  lex->lex_level++;
1540
1541
2.57k
  result = lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
1542
2.57k
  if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END)
1543
2.56k
  {
1544
2.56k
    result = parse_array_element(lex, sem);
1545
1546
2.57k
    while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
1547
7
    {
1548
7
      result = json_lex(lex);
1549
7
      if (result != JSON_SUCCESS)
1550
0
        break;
1551
7
      result = parse_array_element(lex, sem);
1552
7
    }
1553
2.56k
  }
1554
2.57k
  if (result != JSON_SUCCESS)
1555
22
    return result;
1556
1557
2.55k
  result = lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
1558
2.55k
  if (result != JSON_SUCCESS)
1559
3
    return result;
1560
1561
2.55k
  lex->lex_level--;
1562
1563
2.55k
  if (aend != NULL)
1564
0
  {
1565
0
    result = (*aend) (sem->semstate);
1566
0
    if (result != JSON_SUCCESS)
1567
0
      return result;
1568
0
  }
1569
1570
2.55k
  return JSON_SUCCESS;
1571
2.55k
}
1572
1573
/*
1574
 * Lex one token from the input stream.
1575
 *
1576
 * When doing incremental parsing, we can reach the end of the input string
1577
 * without having (or knowing we have) a complete token. If it's not the
1578
 * final chunk of input, the partial token is then saved to the lex
1579
 * structure's ptok StringInfo. On subsequent calls input is appended to this
1580
 * buffer until we have something that we think is a complete token,
1581
 * which is then lexed using a recursive call to json_lex. Processing then
1582
 * continues as normal on subsequent calls.
1583
 *
1584
 * Note than when doing incremental processing, the lex.prev_token_terminator
1585
 * should not be relied on. It could point into a previous input chunk or
1586
 * worse.
1587
 */
1588
JsonParseErrorType
1589
json_lex(JsonLexContext *lex)
1590
6.08k
{
1591
6.08k
  const char *s;
1592
6.08k
  const char *const end = lex->input + lex->input_length;
1593
6.08k
  JsonParseErrorType result;
1594
1595
6.08k
  if (lex == &failed_oom || lex->inc_state == &failed_inc_oom)
1596
0
    return JSON_OUT_OF_MEMORY;
1597
1598
6.08k
  if (lex->incremental)
1599
0
  {
1600
0
    if (lex->inc_state->partial_completed)
1601
0
    {
1602
      /*
1603
       * We just lexed a completed partial token on the last call, so
1604
       * reset everything
1605
       */
1606
0
      jsonapi_resetStringInfo(&(lex->inc_state->partial_token));
1607
0
      lex->token_terminator = lex->input;
1608
0
      lex->inc_state->partial_completed = false;
1609
0
    }
1610
1611
#ifdef JSONAPI_USE_PQEXPBUFFER
1612
    /* Make sure our partial token buffer is valid before using it below. */
1613
    if (PQExpBufferDataBroken(lex->inc_state->partial_token))
1614
      return JSON_OUT_OF_MEMORY;
1615
#endif
1616
0
  }
1617
1618
6.08k
  s = lex->token_terminator;
1619
1620
6.08k
  if (lex->incremental && lex->inc_state->partial_token.len)
1621
0
  {
1622
    /*
1623
     * We have a partial token. Extend it and if completed lex it by a
1624
     * recursive call
1625
     */
1626
0
    jsonapi_StrValType *ptok = &(lex->inc_state->partial_token);
1627
0
    size_t    added = 0;
1628
0
    bool    tok_done = false;
1629
0
    JsonLexContext dummy_lex = {0};
1630
0
    JsonParseErrorType partial_result;
1631
1632
0
    if (ptok->data[0] == '"')
1633
0
    {
1634
      /*
1635
       * It's a string. Accumulate characters until we reach an
1636
       * unescaped '"'.
1637
       */
1638
0
      int     escapes = 0;
1639
1640
0
      for (int i = ptok->len - 1; i > 0; i--)
1641
0
      {
1642
        /* count the trailing backslashes on the partial token */
1643
0
        if (ptok->data[i] == '\\')
1644
0
          escapes++;
1645
0
        else
1646
0
          break;
1647
0
      }
1648
1649
0
      for (size_t i = 0; i < lex->input_length; i++)
1650
0
      {
1651
0
        char    c = lex->input[i];
1652
1653
0
        jsonapi_appendStringInfoCharMacro(ptok, c);
1654
0
        added++;
1655
0
        if (c == '"' && escapes % 2 == 0)
1656
0
        {
1657
0
          tok_done = true;
1658
0
          break;
1659
0
        }
1660
0
        if (c == '\\')
1661
0
          escapes++;
1662
0
        else
1663
0
          escapes = 0;
1664
0
      }
1665
0
    }
1666
0
    else
1667
0
    {
1668
      /* not a string */
1669
0
      char    c = ptok->data[0];
1670
1671
0
      if (c == '-' || (c >= '0' && c <= '9'))
1672
0
      {
1673
        /* for numbers look for possible numeric continuations */
1674
1675
0
        bool    numend = false;
1676
1677
0
        for (size_t i = 0; i < lex->input_length && !numend; i++)
1678
0
        {
1679
0
          char    cc = lex->input[i];
1680
1681
0
          switch (cc)
1682
0
          {
1683
0
            case '+':
1684
0
            case '-':
1685
0
            case 'e':
1686
0
            case 'E':
1687
0
            case '0':
1688
0
            case '1':
1689
0
            case '2':
1690
0
            case '3':
1691
0
            case '4':
1692
0
            case '5':
1693
0
            case '6':
1694
0
            case '7':
1695
0
            case '8':
1696
0
            case '9':
1697
0
              {
1698
0
                jsonapi_appendStringInfoCharMacro(ptok, cc);
1699
0
                added++;
1700
0
              }
1701
0
              break;
1702
0
            default:
1703
0
              numend = true;
1704
0
          }
1705
0
        }
1706
0
      }
1707
1708
      /*
1709
       * Add any remaining alphanumeric chars. This takes care of the
1710
       * {null, false, true} literals as well as any trailing
1711
       * alphanumeric junk on non-string tokens.
1712
       */
1713
0
      for (size_t i = added; i < lex->input_length; i++)
1714
0
      {
1715
0
        char    cc = lex->input[i];
1716
1717
0
        if (JSON_ALPHANUMERIC_CHAR(cc))
1718
0
        {
1719
0
          jsonapi_appendStringInfoCharMacro(ptok, cc);
1720
0
          added++;
1721
0
        }
1722
0
        else
1723
0
        {
1724
0
          tok_done = true;
1725
0
          break;
1726
0
        }
1727
0
      }
1728
0
      if (added == lex->input_length &&
1729
0
        lex->inc_state->is_last_chunk)
1730
0
      {
1731
0
        tok_done = true;
1732
0
      }
1733
0
    }
1734
1735
0
    if (!tok_done)
1736
0
    {
1737
      /* We should have consumed the whole chunk in this case. */
1738
0
      Assert(added == lex->input_length);
1739
1740
0
      if (!lex->inc_state->is_last_chunk)
1741
0
        return JSON_INCOMPLETE;
1742
1743
      /* json_errdetail() needs access to the accumulated token. */
1744
0
      lex->token_start = ptok->data;
1745
0
      lex->token_terminator = ptok->data + ptok->len;
1746
0
      return JSON_INVALID_TOKEN;
1747
0
    }
1748
1749
    /*
1750
     * Everything up to lex->input[added] has been added to the partial
1751
     * token, so move the input past it.
1752
     */
1753
0
    lex->input += added;
1754
0
    lex->input_length -= added;
1755
1756
0
    dummy_lex.input = dummy_lex.token_terminator =
1757
0
      dummy_lex.line_start = ptok->data;
1758
0
    dummy_lex.line_number = lex->line_number;
1759
0
    dummy_lex.input_length = ptok->len;
1760
0
    dummy_lex.input_encoding = lex->input_encoding;
1761
0
    dummy_lex.incremental = false;
1762
0
    dummy_lex.need_escapes = lex->need_escapes;
1763
0
    dummy_lex.strval = lex->strval;
1764
1765
0
    partial_result = json_lex(&dummy_lex);
1766
1767
    /*
1768
     * We either have a complete token or an error. In either case we need
1769
     * to point to the partial token data for the semantic or error
1770
     * routines. If it's not an error we'll readjust on the next call to
1771
     * json_lex.
1772
     */
1773
0
    lex->token_type = dummy_lex.token_type;
1774
0
    lex->line_number = dummy_lex.line_number;
1775
1776
    /*
1777
     * We know the prev_token_terminator must be back in some previous
1778
     * piece of input, so we just make it NULL.
1779
     */
1780
0
    lex->prev_token_terminator = NULL;
1781
1782
    /*
1783
     * Normally token_start would be ptok->data, but it could be later,
1784
     * see json_lex_string's handling of invalid escapes.
1785
     */
1786
0
    lex->token_start = dummy_lex.token_start;
1787
0
    lex->token_terminator = dummy_lex.token_terminator;
1788
0
    if (partial_result == JSON_SUCCESS)
1789
0
    {
1790
      /* make sure we've used all the input */
1791
0
      if (lex->token_terminator - lex->token_start != ptok->len)
1792
0
      {
1793
0
        Assert(false);
1794
0
        return JSON_INVALID_TOKEN;
1795
0
      }
1796
1797
0
      lex->inc_state->partial_completed = true;
1798
0
    }
1799
0
    return partial_result;
1800
    /* end of partial token processing */
1801
0
  }
1802
1803
  /* Skip leading whitespace. */
1804
7.64k
  while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
1805
1.55k
  {
1806
1.55k
    if (*s++ == '\n')
1807
198
    {
1808
198
      ++lex->line_number;
1809
198
      lex->line_start = s;
1810
198
    }
1811
1.55k
  }
1812
6.08k
  lex->token_start = s;
1813
1814
  /* Determine token type. */
1815
6.08k
  if (s >= end)
1816
0
  {
1817
0
    lex->token_start = NULL;
1818
0
    lex->prev_token_terminator = lex->token_terminator;
1819
0
    lex->token_terminator = s;
1820
0
    lex->token_type = JSON_TOKEN_END;
1821
0
  }
1822
6.08k
  else
1823
6.08k
  {
1824
6.08k
    switch (*s)
1825
6.08k
    {
1826
        /* Single-character token, some kind of punctuation mark. */
1827
572
      case '{':
1828
572
        lex->prev_token_terminator = lex->token_terminator;
1829
572
        lex->token_terminator = s + 1;
1830
572
        lex->token_type = JSON_TOKEN_OBJECT_START;
1831
572
        break;
1832
2
      case '}':
1833
2
        lex->prev_token_terminator = lex->token_terminator;
1834
2
        lex->token_terminator = s + 1;
1835
2
        lex->token_type = JSON_TOKEN_OBJECT_END;
1836
2
        break;
1837
2.57k
      case '[':
1838
2.57k
        lex->prev_token_terminator = lex->token_terminator;
1839
2.57k
        lex->token_terminator = s + 1;
1840
2.57k
        lex->token_type = JSON_TOKEN_ARRAY_START;
1841
2.57k
        break;
1842
2
      case ']':
1843
2
        lex->prev_token_terminator = lex->token_terminator;
1844
2
        lex->token_terminator = s + 1;
1845
2
        lex->token_type = JSON_TOKEN_ARRAY_END;
1846
2
        break;
1847
13
      case ',':
1848
13
        lex->prev_token_terminator = lex->token_terminator;
1849
13
        lex->token_terminator = s + 1;
1850
13
        lex->token_type = JSON_TOKEN_COMMA;
1851
13
        break;
1852
566
      case ':':
1853
566
        lex->prev_token_terminator = lex->token_terminator;
1854
566
        lex->token_terminator = s + 1;
1855
566
        lex->token_type = JSON_TOKEN_COLON;
1856
566
        break;
1857
1.43k
      case '"':
1858
        /* string */
1859
1.43k
        result = json_lex_string(lex);
1860
1.43k
        if (result != JSON_SUCCESS)
1861
843
          return result;
1862
596
        lex->token_type = JSON_TOKEN_STRING;
1863
596
        break;
1864
97
      case '-':
1865
        /* Negative number. */
1866
97
        result = json_lex_number(lex, s + 1, NULL, NULL);
1867
97
        if (result != JSON_SUCCESS)
1868
67
          return result;
1869
30
        lex->token_type = JSON_TOKEN_NUMBER;
1870
30
        break;
1871
59
      case '0':
1872
109
      case '1':
1873
161
      case '2':
1874
204
      case '3':
1875
229
      case '4':
1876
282
      case '5':
1877
334
      case '6':
1878
375
      case '7':
1879
415
      case '8':
1880
444
      case '9':
1881
        /* Positive number. */
1882
444
        result = json_lex_number(lex, s, NULL, NULL);
1883
444
        if (result != JSON_SUCCESS)
1884
213
          return result;
1885
231
        lex->token_type = JSON_TOKEN_NUMBER;
1886
231
        break;
1887
376
      default:
1888
376
        {
1889
376
          const char *p;
1890
1891
          /*
1892
           * We're not dealing with a string, number, legal
1893
           * punctuation mark, or end of string.  The only legal
1894
           * tokens we might find here are true, false, and null,
1895
           * but for error reporting purposes we scan until we see a
1896
           * non-alphanumeric character.  That way, we can report
1897
           * the whole word as an unexpected token, rather than just
1898
           * some unintuitive prefix thereof.
1899
           */
1900
4.19M
          for (p = s; p < end && JSON_ALPHANUMERIC_CHAR(*p); p++)
1901
4.19M
             /* skip */ ;
1902
1903
          /*
1904
           * We got some sort of unexpected punctuation or an
1905
           * otherwise unexpected character, so just complain about
1906
           * that one character.
1907
           */
1908
376
          if (p == s)
1909
202
          {
1910
202
            lex->prev_token_terminator = lex->token_terminator;
1911
202
            lex->token_terminator = s + 1;
1912
202
            return JSON_INVALID_TOKEN;
1913
202
          }
1914
1915
174
          if (lex->incremental && !lex->inc_state->is_last_chunk &&
1916
174
            p == lex->input + lex->input_length)
1917
0
          {
1918
0
            jsonapi_appendBinaryStringInfo(&(lex->inc_state->partial_token), s, end - s);
1919
0
            return JSON_INCOMPLETE;
1920
0
          }
1921
1922
          /*
1923
           * We've got a real alphanumeric token here.  If it
1924
           * happens to be true, false, or null, all is well.  If
1925
           * not, error out.
1926
           */
1927
174
          lex->prev_token_terminator = lex->token_terminator;
1928
174
          lex->token_terminator = p;
1929
174
          if (p - s == 4)
1930
43
          {
1931
43
            if (memcmp(s, "true", 4) == 0)
1932
6
              lex->token_type = JSON_TOKEN_TRUE;
1933
37
            else if (memcmp(s, "null", 4) == 0)
1934
6
              lex->token_type = JSON_TOKEN_NULL;
1935
31
            else
1936
31
              return JSON_INVALID_TOKEN;
1937
43
          }
1938
131
          else if (p - s == 5 && memcmp(s, "false", 5) == 0)
1939
3
            lex->token_type = JSON_TOKEN_FALSE;
1940
128
          else
1941
128
            return JSON_INVALID_TOKEN;
1942
174
        }
1943
6.08k
    }           /* end of switch */
1944
6.08k
  }
1945
1946
4.60k
  if (lex->incremental && lex->token_type == JSON_TOKEN_END && !lex->inc_state->is_last_chunk)
1947
0
    return JSON_INCOMPLETE;
1948
4.60k
  else
1949
4.60k
    return JSON_SUCCESS;
1950
4.60k
}
1951
1952
/*
1953
 * The next token in the input stream is known to be a string; lex it.
1954
 *
1955
 * If lex->strval isn't NULL, fill it with the decoded string.
1956
 * Set lex->token_terminator to the end of the decoded input, and in
1957
 * success cases, transfer its previous value to lex->prev_token_terminator.
1958
 * Return JSON_SUCCESS or an error code.
1959
 *
1960
 * Note: be careful that all error exits advance lex->token_terminator
1961
 * to the point after the character we detected the error on.
1962
 */
1963
static inline JsonParseErrorType
1964
json_lex_string(JsonLexContext *lex)
1965
1.43k
{
1966
1.43k
  const char *s;
1967
1.43k
  const char *const end = lex->input + lex->input_length;
1968
1.43k
  int     hi_surrogate = -1;
1969
1970
  /* Convenience macros for error exits */
1971
1.43k
#define FAIL_OR_INCOMPLETE_AT_CHAR_START(code) \
1972
1.43k
  do { \
1973
0
    if (lex->incremental && !lex->inc_state->is_last_chunk) \
1974
0
    { \
1975
0
      jsonapi_appendBinaryStringInfo(&lex->inc_state->partial_token, \
1976
0
                       lex->token_start, \
1977
0
                       end - lex->token_start); \
1978
0
      return JSON_INCOMPLETE; \
1979
0
    } \
1980
0
    lex->token_terminator = s; \
1981
0
    return code; \
1982
0
  } while (0)
1983
1.43k
#define FAIL_AT_CHAR_END(code) \
1984
1.43k
  do { \
1985
643
    ptrdiff_t remaining = end - s; \
1986
643
    int     charlen; \
1987
643
    charlen = pg_encoding_mblen_or_incomplete(lex->input_encoding, \
1988
643
                          s, remaining); \
1989
643
    lex->token_terminator = (charlen <= remaining) ? s + charlen : end; \
1990
643
    return code; \
1991
643
  } while (0)
1992
1993
1.43k
  if (lex->need_escapes)
1994
1.43k
  {
1995
#ifdef JSONAPI_USE_PQEXPBUFFER
1996
    /* make sure initialization succeeded */
1997
    if (lex->strval == NULL)
1998
      return JSON_OUT_OF_MEMORY;
1999
#endif
2000
1.43k
    jsonapi_resetStringInfo(lex->strval);
2001
1.43k
  }
2002
2003
1.43k
  Assert(lex->input_length > 0);
2004
1.43k
  s = lex->token_start;
2005
1.43k
  for (;;)
2006
45.9k
  {
2007
45.9k
    s++;
2008
    /* Premature end of the string. */
2009
45.9k
    if (s >= end)
2010
0
      FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
2011
45.9k
    else if (*s == '"')
2012
607
      break;
2013
45.3k
    else if (*s == '\\')
2014
31.8k
    {
2015
      /* OK, we have an escape character. */
2016
31.8k
      s++;
2017
31.8k
      if (s >= end)
2018
0
        FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
2019
31.8k
      else if (*s == 'u')
2020
2.23k
      {
2021
2.23k
        int     i;
2022
2.23k
        int     ch = 0;
2023
2024
10.8k
        for (i = 1; i <= 4; i++)
2025
8.67k
        {
2026
8.67k
          s++;
2027
8.67k
          if (s >= end)
2028
0
            FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
2029
8.67k
          else if (*s >= '0' && *s <= '9')
2030
6.47k
            ch = (ch * 16) + (*s - '0');
2031
2.20k
          else if (*s >= 'a' && *s <= 'f')
2032
1.10k
            ch = (ch * 16) + (*s - 'a') + 10;
2033
1.09k
          else if (*s >= 'A' && *s <= 'F')
2034
978
            ch = (ch * 16) + (*s - 'A') + 10;
2035
115
          else
2036
115
            FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT);
2037
8.67k
        }
2038
2.12k
        if (lex->need_escapes)
2039
2.12k
        {
2040
          /*
2041
           * Combine surrogate pairs.
2042
           */
2043
2.12k
          if (is_utf16_surrogate_first(ch))
2044
292
          {
2045
292
            if (hi_surrogate != -1)
2046
50
              FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE);
2047
242
            hi_surrogate = ch;
2048
242
            continue;
2049
292
          }
2050
1.83k
          else if (is_utf16_surrogate_second(ch))
2051
78
          {
2052
78
            if (hi_surrogate == -1)
2053
53
              FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
2054
25
            ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
2055
25
            hi_surrogate = -1;
2056
25
          }
2057
2058
1.77k
          if (hi_surrogate != -1)
2059
47
            FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
2060
2061
          /*
2062
           * Reject invalid cases.  We can't have a value above
2063
           * 0xFFFF here (since we only accepted 4 hex digits
2064
           * above), so no need to test for out-of-range chars.
2065
           */
2066
1.73k
          if (ch == 0)
2067
40
          {
2068
            /* We can't allow this, since our TEXT type doesn't */
2069
40
            FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO);
2070
40
          }
2071
2072
          /*
2073
           * Add the represented character to lex->strval.  In the
2074
           * backend, we can let pg_unicode_to_server_noerror()
2075
           * handle any required character set conversion; in
2076
           * frontend, we can only deal with trivial conversions.
2077
           */
2078
1.69k
#ifndef FRONTEND
2079
1.69k
          {
2080
1.69k
            char    cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
2081
2082
1.69k
            if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
2083
106
              FAIL_AT_CHAR_END(JSON_UNICODE_UNTRANSLATABLE);
2084
1.58k
            appendStringInfoString(lex->strval, cbuf);
2085
1.58k
          }
2086
#else
2087
          if (lex->input_encoding == PG_UTF8)
2088
          {
2089
            /* OK, we can map the code point to UTF8 easily */
2090
            char    utf8str[5];
2091
            int     utf8len;
2092
2093
            unicode_to_utf8(ch, (unsigned char *) utf8str);
2094
            utf8len = pg_utf_mblen((unsigned char *) utf8str);
2095
            jsonapi_appendBinaryStringInfo(lex->strval, utf8str, utf8len);
2096
          }
2097
          else if (ch <= 0x007f)
2098
          {
2099
            /* The ASCII range is the same in all encodings */
2100
            jsonapi_appendStringInfoChar(lex->strval, (char) ch);
2101
          }
2102
          else
2103
            FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE);
2104
#endif              /* FRONTEND */
2105
1.58k
        }
2106
2.12k
      }
2107
29.6k
      else if (lex->need_escapes)
2108
29.6k
      {
2109
29.6k
        if (hi_surrogate != -1)
2110
51
          FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
2111
2112
29.5k
        switch (*s)
2113
29.5k
        {
2114
557
          case '"':
2115
12.8k
          case '\\':
2116
13.2k
          case '/':
2117
13.2k
            jsonapi_appendStringInfoChar(lex->strval, *s);
2118
13.2k
            break;
2119
9.47k
          case 'b':
2120
9.47k
            jsonapi_appendStringInfoChar(lex->strval, '\b');
2121
9.47k
            break;
2122
5.05k
          case 'f':
2123
5.05k
            jsonapi_appendStringInfoChar(lex->strval, '\f');
2124
5.05k
            break;
2125
444
          case 'n':
2126
444
            jsonapi_appendStringInfoChar(lex->strval, '\n');
2127
444
            break;
2128
808
          case 'r':
2129
808
            jsonapi_appendStringInfoChar(lex->strval, '\r');
2130
808
            break;
2131
376
          case 't':
2132
376
            jsonapi_appendStringInfoChar(lex->strval, '\t');
2133
376
            break;
2134
125
          default:
2135
2136
            /*
2137
             * Not a valid string escape, so signal error.  We
2138
             * adjust token_start so that just the escape sequence
2139
             * is reported, not the whole string.
2140
             */
2141
125
            lex->token_start = s;
2142
125
            FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
2143
29.5k
        }
2144
29.5k
      }
2145
0
      else if (strchr("\"\\/bfnrt", *s) == NULL)
2146
0
      {
2147
        /*
2148
         * Simpler processing if we're not bothered about de-escaping
2149
         *
2150
         * It's very tempting to remove the strchr() call here and
2151
         * replace it with a switch statement, but testing so far has
2152
         * shown it's not a performance win.
2153
         */
2154
0
        lex->token_start = s;
2155
0
        FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
2156
0
      }
2157
31.8k
    }
2158
13.5k
    else
2159
13.5k
    {
2160
13.5k
      const char *p = s;
2161
2162
13.5k
      if (hi_surrogate != -1)
2163
56
        FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
2164
2165
      /*
2166
       * Skip to the first byte that requires special handling, so we
2167
       * can batch calls to jsonapi_appendBinaryStringInfo.
2168
       */
2169
435k
      while (p < end - sizeof(Vector8) &&
2170
435k
           !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) &&
2171
435k
           !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) &&
2172
435k
           !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8)))
2173
422k
        p += sizeof(Vector8);
2174
2175
44.2k
      for (; p < end; p++)
2176
44.2k
      {
2177
44.2k
        if (*p == '\\' || *p == '"')
2178
13.2k
          break;
2179
30.9k
        else if ((unsigned char) *p <= 31)
2180
189
        {
2181
          /* Per RFC4627, these characters MUST be escaped. */
2182
          /*
2183
           * Since *p isn't printable, exclude it from the context
2184
           * string
2185
           */
2186
189
          lex->token_terminator = p;
2187
189
          return JSON_ESCAPING_REQUIRED;
2188
189
        }
2189
44.2k
      }
2190
2191
13.2k
      if (lex->need_escapes)
2192
13.2k
        jsonapi_appendBinaryStringInfo(lex->strval, s, p - s);
2193
2194
      /*
2195
       * s will be incremented at the top of the loop, so set it to just
2196
       * behind our lookahead position
2197
       */
2198
13.2k
      s = p - 1;
2199
13.2k
    }
2200
45.9k
  }
2201
2202
607
  if (hi_surrogate != -1)
2203
11
  {
2204
11
    lex->token_terminator = s + 1;
2205
11
    return JSON_UNICODE_LOW_SURROGATE;
2206
11
  }
2207
2208
#ifdef JSONAPI_USE_PQEXPBUFFER
2209
  if (lex->need_escapes && PQExpBufferBroken(lex->strval))
2210
    return JSON_OUT_OF_MEMORY;
2211
#endif
2212
2213
  /* Hooray, we found the end of the string! */
2214
596
  lex->prev_token_terminator = lex->token_terminator;
2215
596
  lex->token_terminator = s + 1;
2216
596
  return JSON_SUCCESS;
2217
2218
607
#undef FAIL_OR_INCOMPLETE_AT_CHAR_START
2219
607
#undef FAIL_AT_CHAR_END
2220
607
}
2221
2222
/*
2223
 * The next token in the input stream is known to be a number; lex it.
2224
 *
2225
 * In JSON, a number consists of four parts:
2226
 *
2227
 * (1) An optional minus sign ('-').
2228
 *
2229
 * (2) Either a single '0', or a string of one or more digits that does not
2230
 *     begin with a '0'.
2231
 *
2232
 * (3) An optional decimal part, consisting of a period ('.') followed by
2233
 *     one or more digits.  (Note: While this part can be omitted
2234
 *     completely, it's not OK to have only the decimal point without
2235
 *     any digits afterwards.)
2236
 *
2237
 * (4) An optional exponent part, consisting of 'e' or 'E', optionally
2238
 *     followed by '+' or '-', followed by one or more digits.  (Note:
2239
 *     As with the decimal part, if 'e' or 'E' is present, it must be
2240
 *     followed by at least one digit.)
2241
 *
2242
 * The 's' argument to this function points to the ostensible beginning
2243
 * of part 2 - i.e. the character after any optional minus sign, or the
2244
 * first character of the string if there is none.
2245
 *
2246
 * If num_err is not NULL, we return an error flag to *num_err rather than
2247
 * raising an error for a badly-formed number.  Also, if total_len is not NULL
2248
 * the distance from lex->input to the token end+1 is returned to *total_len.
2249
 */
2250
static inline JsonParseErrorType
2251
json_lex_number(JsonLexContext *lex, const char *s,
2252
        bool *num_err, size_t *total_len)
2253
541
{
2254
541
  bool    error = false;
2255
541
  int     len = s - lex->input;
2256
2257
  /* Part (1): leading sign indicator. */
2258
  /* Caller already did this for us; so do nothing. */
2259
2260
  /* Part (2): parse main digit string. */
2261
541
  if (len < lex->input_length && *s == '0')
2262
60
  {
2263
60
    s++;
2264
60
    len++;
2265
60
  }
2266
481
  else if (len < lex->input_length && *s >= '1' && *s <= '9')
2267
424
  {
2268
424
    do
2269
60.7k
    {
2270
60.7k
      s++;
2271
60.7k
      len++;
2272
60.7k
    } while (len < lex->input_length && *s >= '0' && *s <= '9');
2273
424
  }
2274
57
  else
2275
57
    error = true;
2276
2277
  /* Part (3): parse optional decimal portion. */
2278
541
  if (len < lex->input_length && *s == '.')
2279
137
  {
2280
137
    s++;
2281
137
    len++;
2282
137
    if (len == lex->input_length || *s < '0' || *s > '9')
2283
39
      error = true;
2284
98
    else
2285
98
    {
2286
98
      do
2287
16.8k
      {
2288
16.8k
        s++;
2289
16.8k
        len++;
2290
16.8k
      } while (len < lex->input_length && *s >= '0' && *s <= '9');
2291
98
    }
2292
137
  }
2293
2294
  /* Part (4): parse optional exponent. */
2295
541
  if (len < lex->input_length && (*s == 'e' || *s == 'E'))
2296
166
  {
2297
166
    s++;
2298
166
    len++;
2299
166
    if (len < lex->input_length && (*s == '+' || *s == '-'))
2300
14
    {
2301
14
      s++;
2302
14
      len++;
2303
14
    }
2304
166
    if (len == lex->input_length || *s < '0' || *s > '9')
2305
59
      error = true;
2306
107
    else
2307
107
    {
2308
107
      do
2309
24.3k
      {
2310
24.3k
        s++;
2311
24.3k
        len++;
2312
24.3k
      } while (len < lex->input_length && *s >= '0' && *s <= '9');
2313
107
    }
2314
166
  }
2315
2316
  /*
2317
   * Check for trailing garbage.  As in json_lex(), any alphanumeric stuff
2318
   * here should be considered part of the token for error-reporting
2319
   * purposes.
2320
   */
2321
557k
  for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
2322
556k
    error = true;
2323
2324
541
  if (total_len != NULL)
2325
0
    *total_len = len;
2326
2327
541
  if (lex->incremental && !lex->inc_state->is_last_chunk &&
2328
541
    len >= lex->input_length)
2329
0
  {
2330
0
    jsonapi_appendBinaryStringInfo(&lex->inc_state->partial_token,
2331
0
                     lex->token_start, s - lex->token_start);
2332
0
    if (num_err != NULL)
2333
0
      *num_err = error;
2334
2335
0
    return JSON_INCOMPLETE;
2336
0
  }
2337
541
  else if (num_err != NULL)
2338
0
  {
2339
    /* let the caller handle any error */
2340
0
    *num_err = error;
2341
0
  }
2342
541
  else
2343
541
  {
2344
    /* return token endpoint */
2345
541
    lex->prev_token_terminator = lex->token_terminator;
2346
541
    lex->token_terminator = s;
2347
    /* handle error if any */
2348
541
    if (error)
2349
280
      return JSON_INVALID_TOKEN;
2350
541
  }
2351
2352
261
  return JSON_SUCCESS;
2353
541
}
2354
2355
/*
2356
 * Report a parse error.
2357
 *
2358
 * lex->token_start and lex->token_terminator must identify the current token.
2359
 */
2360
static JsonParseErrorType
2361
report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
2362
52
{
2363
  /* Handle case where the input ended prematurely. */
2364
52
  if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
2365
0
    return JSON_EXPECTED_MORE;
2366
2367
  /* Otherwise choose the error type based on the parsing context. */
2368
52
  switch (ctx)
2369
52
  {
2370
41
    case JSON_PARSE_END:
2371
41
      return JSON_EXPECTED_END;
2372
5
    case JSON_PARSE_VALUE:
2373
5
      return JSON_EXPECTED_JSON;
2374
0
    case JSON_PARSE_STRING:
2375
0
      return JSON_EXPECTED_STRING;
2376
0
    case JSON_PARSE_ARRAY_START:
2377
0
      return JSON_EXPECTED_ARRAY_FIRST;
2378
2
    case JSON_PARSE_ARRAY_NEXT:
2379
2
      return JSON_EXPECTED_ARRAY_NEXT;
2380
2
    case JSON_PARSE_OBJECT_START:
2381
2
      return JSON_EXPECTED_OBJECT_FIRST;
2382
2
    case JSON_PARSE_OBJECT_LABEL:
2383
2
      return JSON_EXPECTED_COLON;
2384
0
    case JSON_PARSE_OBJECT_NEXT:
2385
0
      return JSON_EXPECTED_OBJECT_NEXT;
2386
0
    case JSON_PARSE_OBJECT_COMMA:
2387
0
      return JSON_EXPECTED_STRING;
2388
52
  }
2389
2390
  /*
2391
   * We don't use a default: case, so that the compiler will warn about
2392
   * unhandled enum values.
2393
   */
2394
0
  Assert(false);
2395
0
  return JSON_SUCCESS;   /* silence stupider compilers */
2396
52
}
2397
2398
/*
2399
 * Construct an (already translated) detail message for a JSON error.
2400
 *
2401
 * The returned pointer should not be freed, the allocation is either static
2402
 * or owned by the JsonLexContext.
2403
 */
2404
char *
2405
json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
2406
0
{
2407
0
  if (error == JSON_OUT_OF_MEMORY || lex == &failed_oom)
2408
0
  {
2409
    /* Short circuit. Allocating anything for this case is unhelpful. */
2410
0
    return _("out of memory");
2411
0
  }
2412
2413
0
  if (lex->errormsg)
2414
0
    jsonapi_resetStringInfo(lex->errormsg);
2415
0
  else
2416
0
    lex->errormsg = jsonapi_makeStringInfo();
2417
2418
  /*
2419
   * A helper for error messages that should print the current token. The
2420
   * format must contain exactly one %.*s specifier.
2421
   */
2422
0
#define json_token_error(lex, format) \
2423
0
  jsonapi_appendStringInfo((lex)->errormsg, _(format), \
2424
0
               (int) ((lex)->token_terminator - (lex)->token_start), \
2425
0
               (lex)->token_start);
2426
2427
0
  switch (error)
2428
0
  {
2429
0
    case JSON_INCOMPLETE:
2430
0
    case JSON_SUCCESS:
2431
      /* fall through to the error code after switch */
2432
0
      break;
2433
0
    case JSON_INVALID_LEXER_TYPE:
2434
0
      if (lex->incremental)
2435
0
        return _("Recursive descent parser cannot use incremental lexer.");
2436
0
      else
2437
0
        return _("Incremental parser requires incremental lexer.");
2438
0
    case JSON_NESTING_TOO_DEEP:
2439
0
      return (_("JSON nested too deep, maximum permitted depth is 6400."));
2440
0
    case JSON_ESCAPING_INVALID:
2441
0
      json_token_error(lex, "Escape sequence \"\\%.*s\" is invalid.");
2442
0
      break;
2443
0
    case JSON_ESCAPING_REQUIRED:
2444
0
      jsonapi_appendStringInfo(lex->errormsg,
2445
0
                   _("Character with value 0x%02x must be escaped."),
2446
0
                   (unsigned char) *(lex->token_terminator));
2447
0
      break;
2448
0
    case JSON_EXPECTED_END:
2449
0
      json_token_error(lex, "Expected end of input, but found \"%.*s\".");
2450
0
      break;
2451
0
    case JSON_EXPECTED_ARRAY_FIRST:
2452
0
      json_token_error(lex, "Expected array element or \"]\", but found \"%.*s\".");
2453
0
      break;
2454
0
    case JSON_EXPECTED_ARRAY_NEXT:
2455
0
      json_token_error(lex, "Expected \",\" or \"]\", but found \"%.*s\".");
2456
0
      break;
2457
0
    case JSON_EXPECTED_COLON:
2458
0
      json_token_error(lex, "Expected \":\", but found \"%.*s\".");
2459
0
      break;
2460
0
    case JSON_EXPECTED_JSON:
2461
0
      json_token_error(lex, "Expected JSON value, but found \"%.*s\".");
2462
0
      break;
2463
0
    case JSON_EXPECTED_MORE:
2464
0
      return _("The input string ended unexpectedly.");
2465
0
    case JSON_EXPECTED_OBJECT_FIRST:
2466
0
      json_token_error(lex, "Expected string or \"}\", but found \"%.*s\".");
2467
0
      break;
2468
0
    case JSON_EXPECTED_OBJECT_NEXT:
2469
0
      json_token_error(lex, "Expected \",\" or \"}\", but found \"%.*s\".");
2470
0
      break;
2471
0
    case JSON_EXPECTED_STRING:
2472
0
      json_token_error(lex, "Expected string, but found \"%.*s\".");
2473
0
      break;
2474
0
    case JSON_INVALID_TOKEN:
2475
0
      json_token_error(lex, "Token \"%.*s\" is invalid.");
2476
0
      break;
2477
0
    case JSON_OUT_OF_MEMORY:
2478
      /* should have been handled above; use the error path */
2479
0
      break;
2480
0
    case JSON_UNICODE_CODE_POINT_ZERO:
2481
0
      return _("\\u0000 cannot be converted to text.");
2482
0
    case JSON_UNICODE_ESCAPE_FORMAT:
2483
0
      return _("\"\\u\" must be followed by four hexadecimal digits.");
2484
0
    case JSON_UNICODE_HIGH_ESCAPE:
2485
      /* note: this case is only reachable in frontend not backend */
2486
0
      return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
2487
0
    case JSON_UNICODE_UNTRANSLATABLE:
2488
2489
      /*
2490
       * Note: this case is only reachable in backend and not frontend.
2491
       * #ifdef it away so the frontend doesn't try to link against
2492
       * backend functionality.
2493
       */
2494
0
#ifndef FRONTEND
2495
0
      return psprintf(_("Unicode escape value could not be translated to the server's encoding %s."),
2496
0
              GetDatabaseEncodingName());
2497
#else
2498
      Assert(false);
2499
      break;
2500
#endif
2501
0
    case JSON_UNICODE_HIGH_SURROGATE:
2502
0
      return _("Unicode high surrogate must not follow a high surrogate.");
2503
0
    case JSON_UNICODE_LOW_SURROGATE:
2504
0
      return _("Unicode low surrogate must follow a high surrogate.");
2505
0
    case JSON_SEM_ACTION_FAILED:
2506
      /* fall through to the error code after switch */
2507
0
      break;
2508
0
  }
2509
0
#undef json_token_error
2510
2511
  /* Note that lex->errormsg can be NULL in shlib code. */
2512
0
  if (lex->errormsg && lex->errormsg->len == 0)
2513
0
  {
2514
    /*
2515
     * We don't use a default: case, so that the compiler will warn about
2516
     * unhandled enum values.  But this needs to be here anyway to cover
2517
     * the possibility of an incorrect input.
2518
     */
2519
0
    jsonapi_appendStringInfo(lex->errormsg,
2520
0
                 "unexpected json parse error type: %d",
2521
0
                 (int) error);
2522
0
  }
2523
2524
#ifdef JSONAPI_USE_PQEXPBUFFER
2525
  if (PQExpBufferBroken(lex->errormsg))
2526
    return _("out of memory while constructing error description");
2527
#endif
2528
2529
0
  return lex->errormsg->data;
2530
0
}