Coverage Report

Created: 2025-06-13 06:06

/src/postgres/src/backend/utils/adt/json.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * json.c
4
 *    JSON data type support.
5
 *
6
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7
 * Portions Copyright (c) 1994, Regents of the University of California
8
 *
9
 * IDENTIFICATION
10
 *    src/backend/utils/adt/json.c
11
 *
12
 *-------------------------------------------------------------------------
13
 */
14
#include "postgres.h"
15
16
#include "catalog/pg_proc.h"
17
#include "catalog/pg_type.h"
18
#include "common/hashfn.h"
19
#include "funcapi.h"
20
#include "libpq/pqformat.h"
21
#include "miscadmin.h"
22
#include "port/simd.h"
23
#include "utils/array.h"
24
#include "utils/builtins.h"
25
#include "utils/date.h"
26
#include "utils/datetime.h"
27
#include "utils/fmgroids.h"
28
#include "utils/json.h"
29
#include "utils/jsonfuncs.h"
30
#include "utils/lsyscache.h"
31
#include "utils/typcache.h"
32
33
34
/*
35
 * Support for fast key uniqueness checking.
36
 *
37
 * We maintain a hash table of used keys in JSON objects for fast detection
38
 * of duplicates.
39
 */
40
/* Common context for key uniqueness check */
41
typedef struct HTAB *JsonUniqueCheckState;  /* hash table for key names */
42
43
/* Hash entry for JsonUniqueCheckState */
44
typedef struct JsonUniqueHashEntry
45
{
46
  const char *key;
47
  int     key_len;
48
  int     object_id;
49
} JsonUniqueHashEntry;
50
51
/* Stack element for key uniqueness check during JSON parsing */
52
typedef struct JsonUniqueStackEntry
53
{
54
  struct JsonUniqueStackEntry *parent;
55
  int     object_id;
56
} JsonUniqueStackEntry;
57
58
/* Context struct for key uniqueness check during JSON parsing */
59
typedef struct JsonUniqueParsingState
60
{
61
  JsonLexContext *lex;
62
  JsonUniqueCheckState check;
63
  JsonUniqueStackEntry *stack;
64
  int     id_counter;
65
  bool    unique;
66
} JsonUniqueParsingState;
67
68
/* Context struct for key uniqueness check during JSON building */
69
typedef struct JsonUniqueBuilderState
70
{
71
  JsonUniqueCheckState check; /* unique check */
72
  StringInfoData skipped_keys;  /* skipped keys with NULL values */
73
  MemoryContext mcxt;     /* context for saving skipped keys */
74
} JsonUniqueBuilderState;
75
76
77
/* State struct for JSON aggregation */
78
typedef struct JsonAggState
79
{
80
  StringInfo  str;
81
  JsonTypeCategory key_category;
82
  Oid     key_output_func;
83
  JsonTypeCategory val_category;
84
  Oid     val_output_func;
85
  JsonUniqueBuilderState unique_check;
86
} JsonAggState;
87
88
static void composite_to_json(Datum composite, StringInfo result,
89
                bool use_line_feeds);
90
static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
91
                Datum *vals, bool *nulls, int *valcount,
92
                JsonTypeCategory tcategory, Oid outfuncoid,
93
                bool use_line_feeds);
94
static void array_to_json_internal(Datum array, StringInfo result,
95
                   bool use_line_feeds);
96
static void datum_to_json_internal(Datum val, bool is_null, StringInfo result,
97
                   JsonTypeCategory tcategory, Oid outfuncoid,
98
                   bool key_scalar);
99
static void add_json(Datum val, bool is_null, StringInfo result,
100
           Oid val_type, bool key_scalar);
101
static text *catenate_stringinfo_string(StringInfo buffer, const char *addon);
102
103
/*
104
 * Input.
105
 */
106
Datum
107
json_in(PG_FUNCTION_ARGS)
108
0
{
109
0
  char     *json = PG_GETARG_CSTRING(0);
110
0
  text     *result = cstring_to_text(json);
111
0
  JsonLexContext lex;
112
113
  /* validate it */
114
0
  makeJsonLexContext(&lex, result, false);
115
0
  if (!pg_parse_json_or_errsave(&lex, &nullSemAction, fcinfo->context))
116
0
    PG_RETURN_NULL();
117
118
  /* Internal representation is the same as text */
119
0
  PG_RETURN_TEXT_P(result);
120
0
}
121
122
/*
123
 * Output.
124
 */
125
Datum
126
json_out(PG_FUNCTION_ARGS)
127
0
{
128
  /* we needn't detoast because text_to_cstring will handle that */
129
0
  Datum   txt = PG_GETARG_DATUM(0);
130
131
0
  PG_RETURN_CSTRING(TextDatumGetCString(txt));
132
0
}
133
134
/*
135
 * Binary send.
136
 */
137
Datum
138
json_send(PG_FUNCTION_ARGS)
139
0
{
140
0
  text     *t = PG_GETARG_TEXT_PP(0);
141
0
  StringInfoData buf;
142
143
0
  pq_begintypsend(&buf);
144
0
  pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
145
0
  PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
146
0
}
147
148
/*
149
 * Binary receive.
150
 */
151
Datum
152
json_recv(PG_FUNCTION_ARGS)
153
0
{
154
0
  StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
155
0
  char     *str;
156
0
  int     nbytes;
157
0
  JsonLexContext lex;
158
159
0
  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
160
161
  /* Validate it. */
162
0
  makeJsonLexContextCstringLen(&lex, str, nbytes, GetDatabaseEncoding(),
163
0
                 false);
164
0
  pg_parse_json_or_ereport(&lex, &nullSemAction);
165
166
0
  PG_RETURN_TEXT_P(cstring_to_text_with_len(str, nbytes));
167
0
}
168
169
/*
170
 * Turn a Datum into JSON text, appending the string to "result".
171
 *
172
 * tcategory and outfuncoid are from a previous call to json_categorize_type,
173
 * except that if is_null is true then they can be invalid.
174
 *
175
 * If key_scalar is true, the value is being printed as a key, so insist
176
 * it's of an acceptable type, and force it to be quoted.
177
 */
178
static void
179
datum_to_json_internal(Datum val, bool is_null, StringInfo result,
180
             JsonTypeCategory tcategory, Oid outfuncoid,
181
             bool key_scalar)
182
0
{
183
0
  char     *outputstr;
184
0
  text     *jsontext;
185
186
0
  check_stack_depth();
187
188
  /* callers are expected to ensure that null keys are not passed in */
189
0
  Assert(!(key_scalar && is_null));
190
191
0
  if (is_null)
192
0
  {
193
0
    appendBinaryStringInfo(result, "null", strlen("null"));
194
0
    return;
195
0
  }
196
197
0
  if (key_scalar &&
198
0
    (tcategory == JSONTYPE_ARRAY ||
199
0
     tcategory == JSONTYPE_COMPOSITE ||
200
0
     tcategory == JSONTYPE_JSON ||
201
0
     tcategory == JSONTYPE_CAST))
202
0
    ereport(ERROR,
203
0
        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
204
0
         errmsg("key value must be scalar, not array, composite, or json")));
205
206
0
  switch (tcategory)
207
0
  {
208
0
    case JSONTYPE_ARRAY:
209
0
      array_to_json_internal(val, result, false);
210
0
      break;
211
0
    case JSONTYPE_COMPOSITE:
212
0
      composite_to_json(val, result, false);
213
0
      break;
214
0
    case JSONTYPE_BOOL:
215
0
      if (key_scalar)
216
0
        appendStringInfoChar(result, '"');
217
0
      if (DatumGetBool(val))
218
0
        appendBinaryStringInfo(result, "true", strlen("true"));
219
0
      else
220
0
        appendBinaryStringInfo(result, "false", strlen("false"));
221
0
      if (key_scalar)
222
0
        appendStringInfoChar(result, '"');
223
0
      break;
224
0
    case JSONTYPE_NUMERIC:
225
0
      outputstr = OidOutputFunctionCall(outfuncoid, val);
226
227
      /*
228
       * Don't quote a non-key if it's a valid JSON number (i.e., not
229
       * "Infinity", "-Infinity", or "NaN").  Since we know this is a
230
       * numeric data type's output, we simplify and open-code the
231
       * validation for better performance.
232
       */
233
0
      if (!key_scalar &&
234
0
        ((*outputstr >= '0' && *outputstr <= '9') ||
235
0
         (*outputstr == '-' &&
236
0
          (outputstr[1] >= '0' && outputstr[1] <= '9'))))
237
0
        appendStringInfoString(result, outputstr);
238
0
      else
239
0
      {
240
0
        appendStringInfoChar(result, '"');
241
0
        appendStringInfoString(result, outputstr);
242
0
        appendStringInfoChar(result, '"');
243
0
      }
244
0
      pfree(outputstr);
245
0
      break;
246
0
    case JSONTYPE_DATE:
247
0
      {
248
0
        char    buf[MAXDATELEN + 1];
249
250
0
        JsonEncodeDateTime(buf, val, DATEOID, NULL);
251
0
        appendStringInfoChar(result, '"');
252
0
        appendStringInfoString(result, buf);
253
0
        appendStringInfoChar(result, '"');
254
0
      }
255
0
      break;
256
0
    case JSONTYPE_TIMESTAMP:
257
0
      {
258
0
        char    buf[MAXDATELEN + 1];
259
260
0
        JsonEncodeDateTime(buf, val, TIMESTAMPOID, NULL);
261
0
        appendStringInfoChar(result, '"');
262
0
        appendStringInfoString(result, buf);
263
0
        appendStringInfoChar(result, '"');
264
0
      }
265
0
      break;
266
0
    case JSONTYPE_TIMESTAMPTZ:
267
0
      {
268
0
        char    buf[MAXDATELEN + 1];
269
270
0
        JsonEncodeDateTime(buf, val, TIMESTAMPTZOID, NULL);
271
0
        appendStringInfoChar(result, '"');
272
0
        appendStringInfoString(result, buf);
273
0
        appendStringInfoChar(result, '"');
274
0
      }
275
0
      break;
276
0
    case JSONTYPE_JSON:
277
      /* JSON and JSONB output will already be escaped */
278
0
      outputstr = OidOutputFunctionCall(outfuncoid, val);
279
0
      appendStringInfoString(result, outputstr);
280
0
      pfree(outputstr);
281
0
      break;
282
0
    case JSONTYPE_CAST:
283
      /* outfuncoid refers to a cast function, not an output function */
284
0
      jsontext = DatumGetTextPP(OidFunctionCall1(outfuncoid, val));
285
0
      appendBinaryStringInfo(result, VARDATA_ANY(jsontext),
286
0
                   VARSIZE_ANY_EXHDR(jsontext));
287
0
      pfree(jsontext);
288
0
      break;
289
0
    default:
290
      /* special-case text types to save useless palloc/memcpy cycles */
291
0
      if (outfuncoid == F_TEXTOUT || outfuncoid == F_VARCHAROUT ||
292
0
        outfuncoid == F_BPCHAROUT)
293
0
        escape_json_text(result, (text *) DatumGetPointer(val));
294
0
      else
295
0
      {
296
0
        outputstr = OidOutputFunctionCall(outfuncoid, val);
297
0
        escape_json(result, outputstr);
298
0
        pfree(outputstr);
299
0
      }
300
0
      break;
301
0
  }
302
0
}
303
304
/*
305
 * Encode 'value' of datetime type 'typid' into JSON string in ISO format using
306
 * optionally preallocated buffer 'buf'.  Optional 'tzp' determines time-zone
307
 * offset (in seconds) in which we want to show timestamptz.
308
 */
309
char *
310
JsonEncodeDateTime(char *buf, Datum value, Oid typid, const int *tzp)
311
0
{
312
0
  if (!buf)
313
0
    buf = palloc(MAXDATELEN + 1);
314
315
0
  switch (typid)
316
0
  {
317
0
    case DATEOID:
318
0
      {
319
0
        DateADT   date;
320
0
        struct pg_tm tm;
321
322
0
        date = DatumGetDateADT(value);
323
324
        /* Same as date_out(), but forcing DateStyle */
325
0
        if (DATE_NOT_FINITE(date))
326
0
          EncodeSpecialDate(date, buf);
327
0
        else
328
0
        {
329
0
          j2date(date + POSTGRES_EPOCH_JDATE,
330
0
               &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
331
0
          EncodeDateOnly(&tm, USE_XSD_DATES, buf);
332
0
        }
333
0
      }
334
0
      break;
335
0
    case TIMEOID:
336
0
      {
337
0
        TimeADT   time = DatumGetTimeADT(value);
338
0
        struct pg_tm tt,
339
0
               *tm = &tt;
340
0
        fsec_t    fsec;
341
342
        /* Same as time_out(), but forcing DateStyle */
343
0
        time2tm(time, tm, &fsec);
344
0
        EncodeTimeOnly(tm, fsec, false, 0, USE_XSD_DATES, buf);
345
0
      }
346
0
      break;
347
0
    case TIMETZOID:
348
0
      {
349
0
        TimeTzADT  *time = DatumGetTimeTzADTP(value);
350
0
        struct pg_tm tt,
351
0
               *tm = &tt;
352
0
        fsec_t    fsec;
353
0
        int     tz;
354
355
        /* Same as timetz_out(), but forcing DateStyle */
356
0
        timetz2tm(time, tm, &fsec, &tz);
357
0
        EncodeTimeOnly(tm, fsec, true, tz, USE_XSD_DATES, buf);
358
0
      }
359
0
      break;
360
0
    case TIMESTAMPOID:
361
0
      {
362
0
        Timestamp timestamp;
363
0
        struct pg_tm tm;
364
0
        fsec_t    fsec;
365
366
0
        timestamp = DatumGetTimestamp(value);
367
        /* Same as timestamp_out(), but forcing DateStyle */
368
0
        if (TIMESTAMP_NOT_FINITE(timestamp))
369
0
          EncodeSpecialTimestamp(timestamp, buf);
370
0
        else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
371
0
          EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
372
0
        else
373
0
          ereport(ERROR,
374
0
              (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
375
0
               errmsg("timestamp out of range")));
376
0
      }
377
0
      break;
378
0
    case TIMESTAMPTZOID:
379
0
      {
380
0
        TimestampTz timestamp;
381
0
        struct pg_tm tm;
382
0
        int     tz;
383
0
        fsec_t    fsec;
384
0
        const char *tzn = NULL;
385
386
0
        timestamp = DatumGetTimestampTz(value);
387
388
        /*
389
         * If a time zone is specified, we apply the time-zone shift,
390
         * convert timestamptz to pg_tm as if it were without a time
391
         * zone, and then use the specified time zone for converting
392
         * the timestamp into a string.
393
         */
394
0
        if (tzp)
395
0
        {
396
0
          tz = *tzp;
397
0
          timestamp -= (TimestampTz) tz * USECS_PER_SEC;
398
0
        }
399
400
        /* Same as timestamptz_out(), but forcing DateStyle */
401
0
        if (TIMESTAMP_NOT_FINITE(timestamp))
402
0
          EncodeSpecialTimestamp(timestamp, buf);
403
0
        else if (timestamp2tm(timestamp, tzp ? NULL : &tz, &tm, &fsec,
404
0
                    tzp ? NULL : &tzn, NULL) == 0)
405
0
        {
406
0
          if (tzp)
407
0
            tm.tm_isdst = 1; /* set time-zone presence flag */
408
409
0
          EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
410
0
        }
411
0
        else
412
0
          ereport(ERROR,
413
0
              (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
414
0
               errmsg("timestamp out of range")));
415
0
      }
416
0
      break;
417
0
    default:
418
0
      elog(ERROR, "unknown jsonb value datetime type oid %u", typid);
419
0
      return NULL;
420
0
  }
421
422
0
  return buf;
423
0
}
424
425
/*
426
 * Process a single dimension of an array.
427
 * If it's the innermost dimension, output the values, otherwise call
428
 * ourselves recursively to process the next dimension.
429
 */
430
static void
431
array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals,
432
          bool *nulls, int *valcount, JsonTypeCategory tcategory,
433
          Oid outfuncoid, bool use_line_feeds)
434
0
{
435
0
  int     i;
436
0
  const char *sep;
437
438
0
  Assert(dim < ndims);
439
440
0
  sep = use_line_feeds ? ",\n " : ",";
441
442
0
  appendStringInfoChar(result, '[');
443
444
0
  for (i = 1; i <= dims[dim]; i++)
445
0
  {
446
0
    if (i > 1)
447
0
      appendStringInfoString(result, sep);
448
449
0
    if (dim + 1 == ndims)
450
0
    {
451
0
      datum_to_json_internal(vals[*valcount], nulls[*valcount],
452
0
                   result, tcategory,
453
0
                   outfuncoid, false);
454
0
      (*valcount)++;
455
0
    }
456
0
    else
457
0
    {
458
      /*
459
       * Do we want line feeds on inner dimensions of arrays? For now
460
       * we'll say no.
461
       */
462
0
      array_dim_to_json(result, dim + 1, ndims, dims, vals, nulls,
463
0
                valcount, tcategory, outfuncoid, false);
464
0
    }
465
0
  }
466
467
0
  appendStringInfoChar(result, ']');
468
0
}
469
470
/*
471
 * Turn an array into JSON.
472
 */
473
static void
474
array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
475
0
{
476
0
  ArrayType  *v = DatumGetArrayTypeP(array);
477
0
  Oid     element_type = ARR_ELEMTYPE(v);
478
0
  int      *dim;
479
0
  int     ndim;
480
0
  int     nitems;
481
0
  int     count = 0;
482
0
  Datum    *elements;
483
0
  bool     *nulls;
484
0
  int16   typlen;
485
0
  bool    typbyval;
486
0
  char    typalign;
487
0
  JsonTypeCategory tcategory;
488
0
  Oid     outfuncoid;
489
490
0
  ndim = ARR_NDIM(v);
491
0
  dim = ARR_DIMS(v);
492
0
  nitems = ArrayGetNItems(ndim, dim);
493
494
0
  if (nitems <= 0)
495
0
  {
496
0
    appendStringInfoString(result, "[]");
497
0
    return;
498
0
  }
499
500
0
  get_typlenbyvalalign(element_type,
501
0
             &typlen, &typbyval, &typalign);
502
503
0
  json_categorize_type(element_type, false,
504
0
             &tcategory, &outfuncoid);
505
506
0
  deconstruct_array(v, element_type, typlen, typbyval,
507
0
            typalign, &elements, &nulls,
508
0
            &nitems);
509
510
0
  array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory,
511
0
            outfuncoid, use_line_feeds);
512
513
0
  pfree(elements);
514
0
  pfree(nulls);
515
0
}
516
517
/*
518
 * Turn a composite / record into JSON.
519
 */
520
static void
521
composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
522
0
{
523
0
  HeapTupleHeader td;
524
0
  Oid     tupType;
525
0
  int32   tupTypmod;
526
0
  TupleDesc tupdesc;
527
0
  HeapTupleData tmptup,
528
0
         *tuple;
529
0
  int     i;
530
0
  bool    needsep = false;
531
0
  const char *sep;
532
0
  int     seplen;
533
534
  /*
535
   * We can avoid expensive strlen() calls by precalculating the separator
536
   * length.
537
   */
538
0
  sep = use_line_feeds ? ",\n " : ",";
539
0
  seplen = use_line_feeds ? strlen(",\n ") : strlen(",");
540
541
0
  td = DatumGetHeapTupleHeader(composite);
542
543
  /* Extract rowtype info and find a tupdesc */
544
0
  tupType = HeapTupleHeaderGetTypeId(td);
545
0
  tupTypmod = HeapTupleHeaderGetTypMod(td);
546
0
  tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
547
548
  /* Build a temporary HeapTuple control structure */
549
0
  tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
550
0
  tmptup.t_data = td;
551
0
  tuple = &tmptup;
552
553
0
  appendStringInfoChar(result, '{');
554
555
0
  for (i = 0; i < tupdesc->natts; i++)
556
0
  {
557
0
    Datum   val;
558
0
    bool    isnull;
559
0
    char     *attname;
560
0
    JsonTypeCategory tcategory;
561
0
    Oid     outfuncoid;
562
0
    Form_pg_attribute att = TupleDescAttr(tupdesc, i);
563
564
0
    if (att->attisdropped)
565
0
      continue;
566
567
0
    if (needsep)
568
0
      appendBinaryStringInfo(result, sep, seplen);
569
0
    needsep = true;
570
571
0
    attname = NameStr(att->attname);
572
0
    escape_json(result, attname);
573
0
    appendStringInfoChar(result, ':');
574
575
0
    val = heap_getattr(tuple, i + 1, tupdesc, &isnull);
576
577
0
    if (isnull)
578
0
    {
579
0
      tcategory = JSONTYPE_NULL;
580
0
      outfuncoid = InvalidOid;
581
0
    }
582
0
    else
583
0
      json_categorize_type(att->atttypid, false, &tcategory,
584
0
                 &outfuncoid);
585
586
0
    datum_to_json_internal(val, isnull, result, tcategory, outfuncoid,
587
0
                 false);
588
0
  }
589
590
0
  appendStringInfoChar(result, '}');
591
0
  ReleaseTupleDesc(tupdesc);
592
0
}
593
594
/*
595
 * Append JSON text for "val" to "result".
596
 *
597
 * This is just a thin wrapper around datum_to_json.  If the same type will be
598
 * printed many times, avoid using this; better to do the json_categorize_type
599
 * lookups only once.
600
 */
601
static void
602
add_json(Datum val, bool is_null, StringInfo result,
603
     Oid val_type, bool key_scalar)
604
0
{
605
0
  JsonTypeCategory tcategory;
606
0
  Oid     outfuncoid;
607
608
0
  if (val_type == InvalidOid)
609
0
    ereport(ERROR,
610
0
        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
611
0
         errmsg("could not determine input data type")));
612
613
0
  if (is_null)
614
0
  {
615
0
    tcategory = JSONTYPE_NULL;
616
0
    outfuncoid = InvalidOid;
617
0
  }
618
0
  else
619
0
    json_categorize_type(val_type, false,
620
0
               &tcategory, &outfuncoid);
621
622
0
  datum_to_json_internal(val, is_null, result, tcategory, outfuncoid,
623
0
               key_scalar);
624
0
}
625
626
/*
627
 * SQL function array_to_json(row)
628
 */
629
Datum
630
array_to_json(PG_FUNCTION_ARGS)
631
0
{
632
0
  Datum   array = PG_GETARG_DATUM(0);
633
0
  StringInfo  result;
634
635
0
  result = makeStringInfo();
636
637
0
  array_to_json_internal(array, result, false);
638
639
0
  PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
640
0
}
641
642
/*
643
 * SQL function array_to_json(row, prettybool)
644
 */
645
Datum
646
array_to_json_pretty(PG_FUNCTION_ARGS)
647
0
{
648
0
  Datum   array = PG_GETARG_DATUM(0);
649
0
  bool    use_line_feeds = PG_GETARG_BOOL(1);
650
0
  StringInfo  result;
651
652
0
  result = makeStringInfo();
653
654
0
  array_to_json_internal(array, result, use_line_feeds);
655
656
0
  PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
657
0
}
658
659
/*
660
 * SQL function row_to_json(row)
661
 */
662
Datum
663
row_to_json(PG_FUNCTION_ARGS)
664
0
{
665
0
  Datum   array = PG_GETARG_DATUM(0);
666
0
  StringInfo  result;
667
668
0
  result = makeStringInfo();
669
670
0
  composite_to_json(array, result, false);
671
672
0
  PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
673
0
}
674
675
/*
676
 * SQL function row_to_json(row, prettybool)
677
 */
678
Datum
679
row_to_json_pretty(PG_FUNCTION_ARGS)
680
0
{
681
0
  Datum   array = PG_GETARG_DATUM(0);
682
0
  bool    use_line_feeds = PG_GETARG_BOOL(1);
683
0
  StringInfo  result;
684
685
0
  result = makeStringInfo();
686
687
0
  composite_to_json(array, result, use_line_feeds);
688
689
0
  PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
690
0
}
691
692
/*
693
 * Is the given type immutable when coming out of a JSON context?
694
 *
695
 * At present, datetimes are all considered mutable, because they
696
 * depend on timezone.  XXX we should also drill down into objects
697
 * and arrays, but do not.
698
 */
699
bool
700
to_json_is_immutable(Oid typoid)
701
0
{
702
0
  JsonTypeCategory tcategory;
703
0
  Oid     outfuncoid;
704
705
0
  json_categorize_type(typoid, false, &tcategory, &outfuncoid);
706
707
0
  switch (tcategory)
708
0
  {
709
0
    case JSONTYPE_BOOL:
710
0
    case JSONTYPE_JSON:
711
0
    case JSONTYPE_JSONB:
712
0
    case JSONTYPE_NULL:
713
0
      return true;
714
715
0
    case JSONTYPE_DATE:
716
0
    case JSONTYPE_TIMESTAMP:
717
0
    case JSONTYPE_TIMESTAMPTZ:
718
0
      return false;
719
720
0
    case JSONTYPE_ARRAY:
721
0
      return false;   /* TODO recurse into elements */
722
723
0
    case JSONTYPE_COMPOSITE:
724
0
      return false;   /* TODO recurse into fields */
725
726
0
    case JSONTYPE_NUMERIC:
727
0
    case JSONTYPE_CAST:
728
0
    case JSONTYPE_OTHER:
729
0
      return func_volatile(outfuncoid) == PROVOLATILE_IMMUTABLE;
730
0
  }
731
732
0
  return false;       /* not reached */
733
0
}
734
735
/*
736
 * SQL function to_json(anyvalue)
737
 */
738
Datum
739
to_json(PG_FUNCTION_ARGS)
740
0
{
741
0
  Datum   val = PG_GETARG_DATUM(0);
742
0
  Oid     val_type = get_fn_expr_argtype(fcinfo->flinfo, 0);
743
0
  JsonTypeCategory tcategory;
744
0
  Oid     outfuncoid;
745
746
0
  if (val_type == InvalidOid)
747
0
    ereport(ERROR,
748
0
        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
749
0
         errmsg("could not determine input data type")));
750
751
0
  json_categorize_type(val_type, false,
752
0
             &tcategory, &outfuncoid);
753
754
0
  PG_RETURN_DATUM(datum_to_json(val, tcategory, outfuncoid));
755
0
}
756
757
/*
758
 * Turn a Datum into JSON text.
759
 *
760
 * tcategory and outfuncoid are from a previous call to json_categorize_type.
761
 */
762
Datum
763
datum_to_json(Datum val, JsonTypeCategory tcategory, Oid outfuncoid)
764
0
{
765
0
  StringInfo  result = makeStringInfo();
766
767
0
  datum_to_json_internal(val, false, result, tcategory, outfuncoid,
768
0
               false);
769
770
0
  return PointerGetDatum(cstring_to_text_with_len(result->data, result->len));
771
0
}
772
773
/*
774
 * json_agg transition function
775
 *
776
 * aggregate input column as a json array value.
777
 */
778
static Datum
779
json_agg_transfn_worker(FunctionCallInfo fcinfo, bool absent_on_null)
780
0
{
781
0
  MemoryContext aggcontext,
782
0
        oldcontext;
783
0
  JsonAggState *state;
784
0
  Datum   val;
785
786
0
  if (!AggCheckCallContext(fcinfo, &aggcontext))
787
0
  {
788
    /* cannot be called directly because of internal-type argument */
789
0
    elog(ERROR, "json_agg_transfn called in non-aggregate context");
790
0
  }
791
792
0
  if (PG_ARGISNULL(0))
793
0
  {
794
0
    Oid     arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
795
796
0
    if (arg_type == InvalidOid)
797
0
      ereport(ERROR,
798
0
          (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
799
0
           errmsg("could not determine input data type")));
800
801
    /*
802
     * Make this state object in a context where it will persist for the
803
     * duration of the aggregate call.  MemoryContextSwitchTo is only
804
     * needed the first time, as the StringInfo routines make sure they
805
     * use the right context to enlarge the object if necessary.
806
     */
807
0
    oldcontext = MemoryContextSwitchTo(aggcontext);
808
0
    state = (JsonAggState *) palloc(sizeof(JsonAggState));
809
0
    state->str = makeStringInfo();
810
0
    MemoryContextSwitchTo(oldcontext);
811
812
0
    appendStringInfoChar(state->str, '[');
813
0
    json_categorize_type(arg_type, false, &state->val_category,
814
0
               &state->val_output_func);
815
0
  }
816
0
  else
817
0
  {
818
0
    state = (JsonAggState *) PG_GETARG_POINTER(0);
819
0
  }
820
821
0
  if (absent_on_null && PG_ARGISNULL(1))
822
0
    PG_RETURN_POINTER(state);
823
824
0
  if (state->str->len > 1)
825
0
    appendStringInfoString(state->str, ", ");
826
827
  /* fast path for NULLs */
828
0
  if (PG_ARGISNULL(1))
829
0
  {
830
0
    datum_to_json_internal((Datum) 0, true, state->str, JSONTYPE_NULL,
831
0
                 InvalidOid, false);
832
0
    PG_RETURN_POINTER(state);
833
0
  }
834
835
0
  val = PG_GETARG_DATUM(1);
836
837
  /* add some whitespace if structured type and not first item */
838
0
  if (!PG_ARGISNULL(0) && state->str->len > 1 &&
839
0
    (state->val_category == JSONTYPE_ARRAY ||
840
0
     state->val_category == JSONTYPE_COMPOSITE))
841
0
  {
842
0
    appendStringInfoString(state->str, "\n ");
843
0
  }
844
845
0
  datum_to_json_internal(val, false, state->str, state->val_category,
846
0
               state->val_output_func, false);
847
848
  /*
849
   * The transition type for json_agg() is declared to be "internal", which
850
   * is a pass-by-value type the same size as a pointer.  So we can safely
851
   * pass the JsonAggState pointer through nodeAgg.c's machinations.
852
   */
853
0
  PG_RETURN_POINTER(state);
854
0
}
855
856
857
/*
858
 * json_agg aggregate function
859
 */
860
Datum
861
json_agg_transfn(PG_FUNCTION_ARGS)
862
0
{
863
0
  return json_agg_transfn_worker(fcinfo, false);
864
0
}
865
866
/*
867
 * json_agg_strict aggregate function
868
 */
869
Datum
870
json_agg_strict_transfn(PG_FUNCTION_ARGS)
871
0
{
872
0
  return json_agg_transfn_worker(fcinfo, true);
873
0
}
874
875
/*
876
 * json_agg final function
877
 */
878
Datum
879
json_agg_finalfn(PG_FUNCTION_ARGS)
880
0
{
881
0
  JsonAggState *state;
882
883
  /* cannot be called directly because of internal-type argument */
884
0
  Assert(AggCheckCallContext(fcinfo, NULL));
885
886
0
  state = PG_ARGISNULL(0) ?
887
0
    NULL :
888
0
    (JsonAggState *) PG_GETARG_POINTER(0);
889
890
  /* NULL result for no rows in, as is standard with aggregates */
891
0
  if (state == NULL)
892
0
    PG_RETURN_NULL();
893
894
  /* Else return state with appropriate array terminator added */
895
0
  PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, "]"));
896
0
}
897
898
/* Functions implementing hash table for key uniqueness check */
899
static uint32
900
json_unique_hash(const void *key, Size keysize)
901
0
{
902
0
  const JsonUniqueHashEntry *entry = (JsonUniqueHashEntry *) key;
903
0
  uint32    hash = hash_bytes_uint32(entry->object_id);
904
905
0
  hash ^= hash_bytes((const unsigned char *) entry->key, entry->key_len);
906
907
0
  return DatumGetUInt32(hash);
908
0
}
909
910
static int
911
json_unique_hash_match(const void *key1, const void *key2, Size keysize)
912
0
{
913
0
  const JsonUniqueHashEntry *entry1 = (const JsonUniqueHashEntry *) key1;
914
0
  const JsonUniqueHashEntry *entry2 = (const JsonUniqueHashEntry *) key2;
915
916
0
  if (entry1->object_id != entry2->object_id)
917
0
    return entry1->object_id > entry2->object_id ? 1 : -1;
918
919
0
  if (entry1->key_len != entry2->key_len)
920
0
    return entry1->key_len > entry2->key_len ? 1 : -1;
921
922
0
  return strncmp(entry1->key, entry2->key, entry1->key_len);
923
0
}
924
925
/*
926
 * Uniqueness detection support.
927
 *
928
 * In order to detect uniqueness during building or parsing of a JSON
929
 * object, we maintain a hash table of key names already seen.
930
 */
931
static void
932
json_unique_check_init(JsonUniqueCheckState *cxt)
933
0
{
934
0
  HASHCTL   ctl;
935
936
0
  memset(&ctl, 0, sizeof(ctl));
937
0
  ctl.keysize = sizeof(JsonUniqueHashEntry);
938
0
  ctl.entrysize = sizeof(JsonUniqueHashEntry);
939
0
  ctl.hcxt = CurrentMemoryContext;
940
0
  ctl.hash = json_unique_hash;
941
0
  ctl.match = json_unique_hash_match;
942
943
0
  *cxt = hash_create("json object hashtable",
944
0
             32,
945
0
             &ctl,
946
0
             HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION | HASH_COMPARE);
947
0
}
948
949
static void
950
json_unique_builder_init(JsonUniqueBuilderState *cxt)
951
0
{
952
0
  json_unique_check_init(&cxt->check);
953
0
  cxt->mcxt = CurrentMemoryContext;
954
0
  cxt->skipped_keys.data = NULL;
955
0
}
956
957
static bool
958
json_unique_check_key(JsonUniqueCheckState *cxt, const char *key, int object_id)
959
0
{
960
0
  JsonUniqueHashEntry entry;
961
0
  bool    found;
962
963
0
  entry.key = key;
964
0
  entry.key_len = strlen(key);
965
0
  entry.object_id = object_id;
966
967
0
  (void) hash_search(*cxt, &entry, HASH_ENTER, &found);
968
969
0
  return !found;
970
0
}
971
972
/*
973
 * On-demand initialization of a throwaway StringInfo.  This is used to
974
 * read a key name that we don't need to store in the output object, for
975
 * duplicate key detection when the value is NULL.
976
 */
977
static StringInfo
978
json_unique_builder_get_throwawaybuf(JsonUniqueBuilderState *cxt)
979
0
{
980
0
  StringInfo  out = &cxt->skipped_keys;
981
982
0
  if (!out->data)
983
0
  {
984
0
    MemoryContext oldcxt = MemoryContextSwitchTo(cxt->mcxt);
985
986
0
    initStringInfo(out);
987
0
    MemoryContextSwitchTo(oldcxt);
988
0
  }
989
0
  else
990
    /* Just reset the string to empty */
991
0
    out->len = 0;
992
993
0
  return out;
994
0
}
995
996
/*
997
 * json_object_agg transition function.
998
 *
999
 * aggregate two input columns as a single json object value.
1000
 */
1001
static Datum
1002
json_object_agg_transfn_worker(FunctionCallInfo fcinfo,
1003
                 bool absent_on_null, bool unique_keys)
1004
0
{
1005
0
  MemoryContext aggcontext,
1006
0
        oldcontext;
1007
0
  JsonAggState *state;
1008
0
  StringInfo  out;
1009
0
  Datum   arg;
1010
0
  bool    skip;
1011
0
  int     key_offset;
1012
1013
0
  if (!AggCheckCallContext(fcinfo, &aggcontext))
1014
0
  {
1015
    /* cannot be called directly because of internal-type argument */
1016
0
    elog(ERROR, "json_object_agg_transfn called in non-aggregate context");
1017
0
  }
1018
1019
0
  if (PG_ARGISNULL(0))
1020
0
  {
1021
0
    Oid     arg_type;
1022
1023
    /*
1024
     * Make the StringInfo in a context where it will persist for the
1025
     * duration of the aggregate call. Switching context is only needed
1026
     * for this initial step, as the StringInfo and dynahash routines make
1027
     * sure they use the right context to enlarge the object if necessary.
1028
     */
1029
0
    oldcontext = MemoryContextSwitchTo(aggcontext);
1030
0
    state = (JsonAggState *) palloc(sizeof(JsonAggState));
1031
0
    state->str = makeStringInfo();
1032
0
    if (unique_keys)
1033
0
      json_unique_builder_init(&state->unique_check);
1034
0
    else
1035
0
      memset(&state->unique_check, 0, sizeof(state->unique_check));
1036
0
    MemoryContextSwitchTo(oldcontext);
1037
1038
0
    arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
1039
1040
0
    if (arg_type == InvalidOid)
1041
0
      ereport(ERROR,
1042
0
          (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1043
0
           errmsg("could not determine data type for argument %d", 1)));
1044
1045
0
    json_categorize_type(arg_type, false, &state->key_category,
1046
0
               &state->key_output_func);
1047
1048
0
    arg_type = get_fn_expr_argtype(fcinfo->flinfo, 2);
1049
1050
0
    if (arg_type == InvalidOid)
1051
0
      ereport(ERROR,
1052
0
          (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1053
0
           errmsg("could not determine data type for argument %d", 2)));
1054
1055
0
    json_categorize_type(arg_type, false, &state->val_category,
1056
0
               &state->val_output_func);
1057
1058
0
    appendStringInfoString(state->str, "{ ");
1059
0
  }
1060
0
  else
1061
0
  {
1062
0
    state = (JsonAggState *) PG_GETARG_POINTER(0);
1063
0
  }
1064
1065
  /*
1066
   * Note: since json_object_agg() is declared as taking type "any", the
1067
   * parser will not do any type conversion on unknown-type literals (that
1068
   * is, undecorated strings or NULLs).  Such values will arrive here as
1069
   * type UNKNOWN, which fortunately does not matter to us, since
1070
   * unknownout() works fine.
1071
   */
1072
1073
0
  if (PG_ARGISNULL(1))
1074
0
    ereport(ERROR,
1075
0
        (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
1076
0
         errmsg("null value not allowed for object key")));
1077
1078
  /* Skip null values if absent_on_null */
1079
0
  skip = absent_on_null && PG_ARGISNULL(2);
1080
1081
0
  if (skip)
1082
0
  {
1083
    /*
1084
     * We got a NULL value and we're not storing those; if we're not
1085
     * testing key uniqueness, we're done.  If we are, use the throwaway
1086
     * buffer to store the key name so that we can check it.
1087
     */
1088
0
    if (!unique_keys)
1089
0
      PG_RETURN_POINTER(state);
1090
1091
0
    out = json_unique_builder_get_throwawaybuf(&state->unique_check);
1092
0
  }
1093
0
  else
1094
0
  {
1095
0
    out = state->str;
1096
1097
    /*
1098
     * Append comma delimiter only if we have already output some fields
1099
     * after the initial string "{ ".
1100
     */
1101
0
    if (out->len > 2)
1102
0
      appendStringInfoString(out, ", ");
1103
0
  }
1104
1105
0
  arg = PG_GETARG_DATUM(1);
1106
1107
0
  key_offset = out->len;
1108
1109
0
  datum_to_json_internal(arg, false, out, state->key_category,
1110
0
               state->key_output_func, true);
1111
1112
0
  if (unique_keys)
1113
0
  {
1114
    /*
1115
     * Copy the key first, instead of pointing into the buffer. It will be
1116
     * added to the hash table, but the buffer may get reallocated as
1117
     * we're appending more data to it. That would invalidate pointers to
1118
     * keys in the current buffer.
1119
     */
1120
0
    const char *key = MemoryContextStrdup(aggcontext,
1121
0
                        &out->data[key_offset]);
1122
1123
0
    if (!json_unique_check_key(&state->unique_check.check, key, 0))
1124
0
      ereport(ERROR,
1125
0
          errcode(ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE),
1126
0
          errmsg("duplicate JSON object key value: %s", key));
1127
1128
0
    if (skip)
1129
0
      PG_RETURN_POINTER(state);
1130
0
  }
1131
1132
0
  appendStringInfoString(state->str, " : ");
1133
1134
0
  if (PG_ARGISNULL(2))
1135
0
    arg = (Datum) 0;
1136
0
  else
1137
0
    arg = PG_GETARG_DATUM(2);
1138
1139
0
  datum_to_json_internal(arg, PG_ARGISNULL(2), state->str,
1140
0
               state->val_category,
1141
0
               state->val_output_func, false);
1142
1143
0
  PG_RETURN_POINTER(state);
1144
0
}
1145
1146
/*
1147
 * json_object_agg aggregate function
1148
 */
1149
Datum
1150
json_object_agg_transfn(PG_FUNCTION_ARGS)
1151
0
{
1152
0
  return json_object_agg_transfn_worker(fcinfo, false, false);
1153
0
}
1154
1155
/*
1156
 * json_object_agg_strict aggregate function
1157
 */
1158
Datum
1159
json_object_agg_strict_transfn(PG_FUNCTION_ARGS)
1160
0
{
1161
0
  return json_object_agg_transfn_worker(fcinfo, true, false);
1162
0
}
1163
1164
/*
1165
 * json_object_agg_unique aggregate function
1166
 */
1167
Datum
1168
json_object_agg_unique_transfn(PG_FUNCTION_ARGS)
1169
0
{
1170
0
  return json_object_agg_transfn_worker(fcinfo, false, true);
1171
0
}
1172
1173
/*
1174
 * json_object_agg_unique_strict aggregate function
1175
 */
1176
Datum
1177
json_object_agg_unique_strict_transfn(PG_FUNCTION_ARGS)
1178
0
{
1179
0
  return json_object_agg_transfn_worker(fcinfo, true, true);
1180
0
}
1181
1182
/*
1183
 * json_object_agg final function.
1184
 */
1185
Datum
1186
json_object_agg_finalfn(PG_FUNCTION_ARGS)
1187
0
{
1188
0
  JsonAggState *state;
1189
1190
  /* cannot be called directly because of internal-type argument */
1191
0
  Assert(AggCheckCallContext(fcinfo, NULL));
1192
1193
0
  state = PG_ARGISNULL(0) ? NULL : (JsonAggState *) PG_GETARG_POINTER(0);
1194
1195
  /* NULL result for no rows in, as is standard with aggregates */
1196
0
  if (state == NULL)
1197
0
    PG_RETURN_NULL();
1198
1199
  /* Else return state with appropriate object terminator added */
1200
0
  PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, " }"));
1201
0
}
1202
1203
/*
1204
 * Helper function for aggregates: return given StringInfo's contents plus
1205
 * specified trailing string, as a text datum.  We need this because aggregate
1206
 * final functions are not allowed to modify the aggregate state.
1207
 */
1208
static text *
1209
catenate_stringinfo_string(StringInfo buffer, const char *addon)
1210
0
{
1211
  /* custom version of cstring_to_text_with_len */
1212
0
  int     buflen = buffer->len;
1213
0
  int     addlen = strlen(addon);
1214
0
  text     *result = (text *) palloc(buflen + addlen + VARHDRSZ);
1215
1216
0
  SET_VARSIZE(result, buflen + addlen + VARHDRSZ);
1217
0
  memcpy(VARDATA(result), buffer->data, buflen);
1218
0
  memcpy(VARDATA(result) + buflen, addon, addlen);
1219
1220
0
  return result;
1221
0
}
1222
1223
Datum
1224
json_build_object_worker(int nargs, const Datum *args, const bool *nulls, const Oid *types,
1225
             bool absent_on_null, bool unique_keys)
1226
0
{
1227
0
  int     i;
1228
0
  const char *sep = "";
1229
0
  StringInfo  result;
1230
0
  JsonUniqueBuilderState unique_check;
1231
1232
0
  if (nargs % 2 != 0)
1233
0
    ereport(ERROR,
1234
0
        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1235
0
         errmsg("argument list must have even number of elements"),
1236
    /* translator: %s is a SQL function name */
1237
0
         errhint("The arguments of %s must consist of alternating keys and values.",
1238
0
             "json_build_object()")));
1239
1240
0
  result = makeStringInfo();
1241
1242
0
  appendStringInfoChar(result, '{');
1243
1244
0
  if (unique_keys)
1245
0
    json_unique_builder_init(&unique_check);
1246
1247
0
  for (i = 0; i < nargs; i += 2)
1248
0
  {
1249
0
    StringInfo  out;
1250
0
    bool    skip;
1251
0
    int     key_offset;
1252
1253
    /* Skip null values if absent_on_null */
1254
0
    skip = absent_on_null && nulls[i + 1];
1255
1256
0
    if (skip)
1257
0
    {
1258
      /* If key uniqueness check is needed we must save skipped keys */
1259
0
      if (!unique_keys)
1260
0
        continue;
1261
1262
0
      out = json_unique_builder_get_throwawaybuf(&unique_check);
1263
0
    }
1264
0
    else
1265
0
    {
1266
0
      appendStringInfoString(result, sep);
1267
0
      sep = ", ";
1268
0
      out = result;
1269
0
    }
1270
1271
    /* process key */
1272
0
    if (nulls[i])
1273
0
      ereport(ERROR,
1274
0
          (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
1275
0
           errmsg("null value not allowed for object key")));
1276
1277
    /* save key offset before appending it */
1278
0
    key_offset = out->len;
1279
1280
0
    add_json(args[i], false, out, types[i], true);
1281
1282
0
    if (unique_keys)
1283
0
    {
1284
      /*
1285
       * check key uniqueness after key appending
1286
       *
1287
       * Copy the key first, instead of pointing into the buffer. It
1288
       * will be added to the hash table, but the buffer may get
1289
       * reallocated as we're appending more data to it. That would
1290
       * invalidate pointers to keys in the current buffer.
1291
       */
1292
0
      const char *key = pstrdup(&out->data[key_offset]);
1293
1294
0
      if (!json_unique_check_key(&unique_check.check, key, 0))
1295
0
        ereport(ERROR,
1296
0
            errcode(ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE),
1297
0
            errmsg("duplicate JSON object key value: %s", key));
1298
1299
0
      if (skip)
1300
0
        continue;
1301
0
    }
1302
1303
0
    appendStringInfoString(result, " : ");
1304
1305
    /* process value */
1306
0
    add_json(args[i + 1], nulls[i + 1], result, types[i + 1], false);
1307
0
  }
1308
1309
0
  appendStringInfoChar(result, '}');
1310
1311
0
  return PointerGetDatum(cstring_to_text_with_len(result->data, result->len));
1312
0
}
1313
1314
/*
1315
 * SQL function json_build_object(variadic "any")
1316
 */
1317
Datum
1318
json_build_object(PG_FUNCTION_ARGS)
1319
0
{
1320
0
  Datum    *args;
1321
0
  bool     *nulls;
1322
0
  Oid      *types;
1323
1324
  /* build argument values to build the object */
1325
0
  int     nargs = extract_variadic_args(fcinfo, 0, true,
1326
0
                        &args, &types, &nulls);
1327
1328
0
  if (nargs < 0)
1329
0
    PG_RETURN_NULL();
1330
1331
0
  PG_RETURN_DATUM(json_build_object_worker(nargs, args, nulls, types, false, false));
1332
0
}
1333
1334
/*
1335
 * degenerate case of json_build_object where it gets 0 arguments.
1336
 */
1337
Datum
1338
json_build_object_noargs(PG_FUNCTION_ARGS)
1339
0
{
1340
0
  PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2));
1341
0
}
1342
1343
Datum
1344
json_build_array_worker(int nargs, const Datum *args, const bool *nulls, const Oid *types,
1345
            bool absent_on_null)
1346
0
{
1347
0
  int     i;
1348
0
  const char *sep = "";
1349
0
  StringInfo  result;
1350
1351
0
  result = makeStringInfo();
1352
1353
0
  appendStringInfoChar(result, '[');
1354
1355
0
  for (i = 0; i < nargs; i++)
1356
0
  {
1357
0
    if (absent_on_null && nulls[i])
1358
0
      continue;
1359
1360
0
    appendStringInfoString(result, sep);
1361
0
    sep = ", ";
1362
0
    add_json(args[i], nulls[i], result, types[i], false);
1363
0
  }
1364
1365
0
  appendStringInfoChar(result, ']');
1366
1367
0
  return PointerGetDatum(cstring_to_text_with_len(result->data, result->len));
1368
0
}
1369
1370
/*
1371
 * SQL function json_build_array(variadic "any")
1372
 */
1373
Datum
1374
json_build_array(PG_FUNCTION_ARGS)
1375
0
{
1376
0
  Datum    *args;
1377
0
  bool     *nulls;
1378
0
  Oid      *types;
1379
1380
  /* build argument values to build the object */
1381
0
  int     nargs = extract_variadic_args(fcinfo, 0, true,
1382
0
                        &args, &types, &nulls);
1383
1384
0
  if (nargs < 0)
1385
0
    PG_RETURN_NULL();
1386
1387
0
  PG_RETURN_DATUM(json_build_array_worker(nargs, args, nulls, types, false));
1388
0
}
1389
1390
/*
1391
 * degenerate case of json_build_array where it gets 0 arguments.
1392
 */
1393
Datum
1394
json_build_array_noargs(PG_FUNCTION_ARGS)
1395
0
{
1396
0
  PG_RETURN_TEXT_P(cstring_to_text_with_len("[]", 2));
1397
0
}
1398
1399
/*
1400
 * SQL function json_object(text[])
1401
 *
1402
 * take a one or two dimensional array of text as key/value pairs
1403
 * for a json object.
1404
 */
1405
Datum
1406
json_object(PG_FUNCTION_ARGS)
1407
0
{
1408
0
  ArrayType  *in_array = PG_GETARG_ARRAYTYPE_P(0);
1409
0
  int     ndims = ARR_NDIM(in_array);
1410
0
  StringInfoData result;
1411
0
  Datum    *in_datums;
1412
0
  bool     *in_nulls;
1413
0
  int     in_count,
1414
0
        count,
1415
0
        i;
1416
0
  text     *rval;
1417
1418
0
  switch (ndims)
1419
0
  {
1420
0
    case 0:
1421
0
      PG_RETURN_DATUM(CStringGetTextDatum("{}"));
1422
0
      break;
1423
1424
0
    case 1:
1425
0
      if ((ARR_DIMS(in_array)[0]) % 2)
1426
0
        ereport(ERROR,
1427
0
            (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1428
0
             errmsg("array must have even number of elements")));
1429
0
      break;
1430
1431
0
    case 2:
1432
0
      if ((ARR_DIMS(in_array)[1]) != 2)
1433
0
        ereport(ERROR,
1434
0
            (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1435
0
             errmsg("array must have two columns")));
1436
0
      break;
1437
1438
0
    default:
1439
0
      ereport(ERROR,
1440
0
          (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1441
0
           errmsg("wrong number of array subscripts")));
1442
0
  }
1443
1444
0
  deconstruct_array_builtin(in_array, TEXTOID, &in_datums, &in_nulls, &in_count);
1445
1446
0
  count = in_count / 2;
1447
1448
0
  initStringInfo(&result);
1449
1450
0
  appendStringInfoChar(&result, '{');
1451
1452
0
  for (i = 0; i < count; ++i)
1453
0
  {
1454
0
    if (in_nulls[i * 2])
1455
0
      ereport(ERROR,
1456
0
          (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
1457
0
           errmsg("null value not allowed for object key")));
1458
1459
0
    if (i > 0)
1460
0
      appendStringInfoString(&result, ", ");
1461
0
    escape_json_text(&result, (text *) DatumGetPointer(in_datums[i * 2]));
1462
0
    appendStringInfoString(&result, " : ");
1463
0
    if (in_nulls[i * 2 + 1])
1464
0
      appendStringInfoString(&result, "null");
1465
0
    else
1466
0
    {
1467
0
      escape_json_text(&result,
1468
0
               (text *) DatumGetPointer(in_datums[i * 2 + 1]));
1469
0
    }
1470
0
  }
1471
1472
0
  appendStringInfoChar(&result, '}');
1473
1474
0
  pfree(in_datums);
1475
0
  pfree(in_nulls);
1476
1477
0
  rval = cstring_to_text_with_len(result.data, result.len);
1478
0
  pfree(result.data);
1479
1480
0
  PG_RETURN_TEXT_P(rval);
1481
0
}
1482
1483
/*
1484
 * SQL function json_object(text[], text[])
1485
 *
1486
 * take separate key and value arrays of text to construct a json object
1487
 * pairwise.
1488
 */
1489
Datum
1490
json_object_two_arg(PG_FUNCTION_ARGS)
1491
0
{
1492
0
  ArrayType  *key_array = PG_GETARG_ARRAYTYPE_P(0);
1493
0
  ArrayType  *val_array = PG_GETARG_ARRAYTYPE_P(1);
1494
0
  int     nkdims = ARR_NDIM(key_array);
1495
0
  int     nvdims = ARR_NDIM(val_array);
1496
0
  StringInfoData result;
1497
0
  Datum    *key_datums,
1498
0
         *val_datums;
1499
0
  bool     *key_nulls,
1500
0
         *val_nulls;
1501
0
  int     key_count,
1502
0
        val_count,
1503
0
        i;
1504
0
  text     *rval;
1505
1506
0
  if (nkdims > 1 || nkdims != nvdims)
1507
0
    ereport(ERROR,
1508
0
        (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1509
0
         errmsg("wrong number of array subscripts")));
1510
1511
0
  if (nkdims == 0)
1512
0
    PG_RETURN_DATUM(CStringGetTextDatum("{}"));
1513
1514
0
  deconstruct_array_builtin(key_array, TEXTOID, &key_datums, &key_nulls, &key_count);
1515
0
  deconstruct_array_builtin(val_array, TEXTOID, &val_datums, &val_nulls, &val_count);
1516
1517
0
  if (key_count != val_count)
1518
0
    ereport(ERROR,
1519
0
        (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1520
0
         errmsg("mismatched array dimensions")));
1521
1522
0
  initStringInfo(&result);
1523
1524
0
  appendStringInfoChar(&result, '{');
1525
1526
0
  for (i = 0; i < key_count; ++i)
1527
0
  {
1528
0
    if (key_nulls[i])
1529
0
      ereport(ERROR,
1530
0
          (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
1531
0
           errmsg("null value not allowed for object key")));
1532
1533
0
    if (i > 0)
1534
0
      appendStringInfoString(&result, ", ");
1535
0
    escape_json_text(&result, (text *) DatumGetPointer(key_datums[i]));
1536
0
    appendStringInfoString(&result, " : ");
1537
0
    if (val_nulls[i])
1538
0
      appendStringInfoString(&result, "null");
1539
0
    else
1540
0
      escape_json_text(&result,
1541
0
               (text *) DatumGetPointer(val_datums[i]));
1542
0
  }
1543
1544
0
  appendStringInfoChar(&result, '}');
1545
1546
0
  pfree(key_datums);
1547
0
  pfree(key_nulls);
1548
0
  pfree(val_datums);
1549
0
  pfree(val_nulls);
1550
1551
0
  rval = cstring_to_text_with_len(result.data, result.len);
1552
0
  pfree(result.data);
1553
1554
0
  PG_RETURN_TEXT_P(rval);
1555
0
}
1556
1557
/*
1558
 * escape_json_char
1559
 *    Inline helper function for escape_json* functions
1560
 */
1561
static pg_attribute_always_inline void
1562
escape_json_char(StringInfo buf, char c)
1563
0
{
1564
0
  switch (c)
1565
0
  {
1566
0
    case '\b':
1567
0
      appendStringInfoString(buf, "\\b");
1568
0
      break;
1569
0
    case '\f':
1570
0
      appendStringInfoString(buf, "\\f");
1571
0
      break;
1572
0
    case '\n':
1573
0
      appendStringInfoString(buf, "\\n");
1574
0
      break;
1575
0
    case '\r':
1576
0
      appendStringInfoString(buf, "\\r");
1577
0
      break;
1578
0
    case '\t':
1579
0
      appendStringInfoString(buf, "\\t");
1580
0
      break;
1581
0
    case '"':
1582
0
      appendStringInfoString(buf, "\\\"");
1583
0
      break;
1584
0
    case '\\':
1585
0
      appendStringInfoString(buf, "\\\\");
1586
0
      break;
1587
0
    default:
1588
0
      if ((unsigned char) c < ' ')
1589
0
        appendStringInfo(buf, "\\u%04x", (int) c);
1590
0
      else
1591
0
        appendStringInfoCharMacro(buf, c);
1592
0
      break;
1593
0
  }
1594
0
}
1595
1596
/*
1597
 * escape_json
1598
 *    Produce a JSON string literal, properly escaping the NUL-terminated
1599
 *    cstring.
1600
 */
1601
void
1602
escape_json(StringInfo buf, const char *str)
1603
0
{
1604
0
  appendStringInfoCharMacro(buf, '"');
1605
1606
0
  for (; *str != '\0'; str++)
1607
0
    escape_json_char(buf, *str);
1608
1609
0
  appendStringInfoCharMacro(buf, '"');
1610
0
}
1611
1612
/*
1613
 * Define the number of bytes that escape_json_with_len will look ahead in the
1614
 * input string before flushing the input string to the destination buffer.
1615
 * Looking ahead too far could result in cachelines being evicted that will
1616
 * need to be reloaded in order to perform the appendBinaryStringInfo call.
1617
 * Smaller values will result in a larger number of calls to
1618
 * appendBinaryStringInfo and introduce additional function call overhead.
1619
 * Values larger than the size of L1d cache will likely result in worse
1620
 * performance.
1621
 */
1622
0
#define ESCAPE_JSON_FLUSH_AFTER 512
1623
1624
/*
1625
 * escape_json_with_len
1626
 *    Produce a JSON string literal, properly escaping the possibly not
1627
 *    NUL-terminated characters in 'str'.  'len' defines the number of bytes
1628
 *    from 'str' to process.
1629
 */
1630
void
1631
escape_json_with_len(StringInfo buf, const char *str, int len)
1632
0
{
1633
0
  int     vlen;
1634
1635
0
  Assert(len >= 0);
1636
1637
  /*
1638
   * Since we know the minimum length we'll need to append, let's just
1639
   * enlarge the buffer now rather than incrementally making more space when
1640
   * we run out.  Add two extra bytes for the enclosing quotes.
1641
   */
1642
0
  enlargeStringInfo(buf, len + 2);
1643
1644
  /*
1645
   * Figure out how many bytes to process using SIMD.  Round 'len' down to
1646
   * the previous multiple of sizeof(Vector8), assuming that's a power-of-2.
1647
   */
1648
0
  vlen = len & (int) (~(sizeof(Vector8) - 1));
1649
1650
0
  appendStringInfoCharMacro(buf, '"');
1651
1652
0
  for (int i = 0, copypos = 0;;)
1653
0
  {
1654
    /*
1655
     * To speed this up, try searching sizeof(Vector8) bytes at once for
1656
     * special characters that we need to escape.  When we find one, we
1657
     * fall out of the Vector8 loop and copy the portion we've vector
1658
     * searched and then we process sizeof(Vector8) bytes one byte at a
1659
     * time.  Once done, come back and try doing vector searching again.
1660
     * We'll also process any remaining bytes at the tail end of the
1661
     * string byte-by-byte.  This optimization assumes that most chunks of
1662
     * sizeof(Vector8) bytes won't contain any special characters.
1663
     */
1664
0
    for (; i < vlen; i += sizeof(Vector8))
1665
0
    {
1666
0
      Vector8   chunk;
1667
1668
0
      vector8_load(&chunk, (const uint8 *) &str[i]);
1669
1670
      /*
1671
       * Break on anything less than ' ' or if we find a '"' or '\\'.
1672
       * Those need special handling.  That's done in the per-byte loop.
1673
       */
1674
0
      if (vector8_has_le(chunk, (unsigned char) 0x1F) ||
1675
0
        vector8_has(chunk, (unsigned char) '"') ||
1676
0
        vector8_has(chunk, (unsigned char) '\\'))
1677
0
        break;
1678
1679
0
#ifdef ESCAPE_JSON_FLUSH_AFTER
1680
1681
      /*
1682
       * Flush what's been checked so far out to the destination buffer
1683
       * every so often to avoid having to re-read cachelines when
1684
       * escaping large strings.
1685
       */
1686
0
      if (i - copypos >= ESCAPE_JSON_FLUSH_AFTER)
1687
0
      {
1688
0
        appendBinaryStringInfo(buf, &str[copypos], i - copypos);
1689
0
        copypos = i;
1690
0
      }
1691
0
#endif
1692
0
    }
1693
1694
    /*
1695
     * Write to the destination up to the point that we've vector searched
1696
     * so far.  Do this only when switching into per-byte mode rather than
1697
     * once every sizeof(Vector8) bytes.
1698
     */
1699
0
    if (copypos < i)
1700
0
    {
1701
0
      appendBinaryStringInfo(buf, &str[copypos], i - copypos);
1702
0
      copypos = i;
1703
0
    }
1704
1705
    /*
1706
     * Per-byte loop for Vector8s containing special chars and for
1707
     * processing the tail of the string.
1708
     */
1709
0
    for (int b = 0; b < sizeof(Vector8); b++)
1710
0
    {
1711
      /* check if we've finished */
1712
0
      if (i == len)
1713
0
        goto done;
1714
1715
0
      Assert(i < len);
1716
1717
0
      escape_json_char(buf, str[i++]);
1718
0
    }
1719
1720
0
    copypos = i;
1721
    /* We're not done yet.  Try the vector search again. */
1722
0
  }
1723
1724
0
done:
1725
0
  appendStringInfoCharMacro(buf, '"');
1726
0
}
1727
1728
/*
1729
 * escape_json_text
1730
 *    Append 'txt' onto 'buf' and escape using escape_json_with_len.
1731
 *
1732
 * This is more efficient than calling text_to_cstring and appending the
1733
 * result as that could require an additional palloc and memcpy.
1734
 */
1735
void
1736
escape_json_text(StringInfo buf, const text *txt)
1737
0
{
1738
  /* must cast away the const, unfortunately */
1739
0
  text     *tunpacked = pg_detoast_datum_packed(unconstify(text *, txt));
1740
0
  int     len = VARSIZE_ANY_EXHDR(tunpacked);
1741
0
  char     *str;
1742
1743
0
  str = VARDATA_ANY(tunpacked);
1744
1745
0
  escape_json_with_len(buf, str, len);
1746
1747
  /* pfree any detoasted values */
1748
0
  if (tunpacked != txt)
1749
0
    pfree(tunpacked);
1750
0
}
1751
1752
/* Semantic actions for key uniqueness check */
1753
static JsonParseErrorType
1754
json_unique_object_start(void *_state)
1755
0
{
1756
0
  JsonUniqueParsingState *state = _state;
1757
0
  JsonUniqueStackEntry *entry;
1758
1759
0
  if (!state->unique)
1760
0
    return JSON_SUCCESS;
1761
1762
  /* push object entry to stack */
1763
0
  entry = palloc(sizeof(*entry));
1764
0
  entry->object_id = state->id_counter++;
1765
0
  entry->parent = state->stack;
1766
0
  state->stack = entry;
1767
1768
0
  return JSON_SUCCESS;
1769
0
}
1770
1771
static JsonParseErrorType
1772
json_unique_object_end(void *_state)
1773
0
{
1774
0
  JsonUniqueParsingState *state = _state;
1775
0
  JsonUniqueStackEntry *entry;
1776
1777
0
  if (!state->unique)
1778
0
    return JSON_SUCCESS;
1779
1780
0
  entry = state->stack;
1781
0
  state->stack = entry->parent; /* pop object from stack */
1782
0
  pfree(entry);
1783
0
  return JSON_SUCCESS;
1784
0
}
1785
1786
static JsonParseErrorType
1787
json_unique_object_field_start(void *_state, char *field, bool isnull)
1788
0
{
1789
0
  JsonUniqueParsingState *state = _state;
1790
0
  JsonUniqueStackEntry *entry;
1791
1792
0
  if (!state->unique)
1793
0
    return JSON_SUCCESS;
1794
1795
  /* find key collision in the current object */
1796
0
  if (json_unique_check_key(&state->check, field, state->stack->object_id))
1797
0
    return JSON_SUCCESS;
1798
1799
0
  state->unique = false;
1800
1801
  /* pop all objects entries */
1802
0
  while ((entry = state->stack))
1803
0
  {
1804
0
    state->stack = entry->parent;
1805
0
    pfree(entry);
1806
0
  }
1807
0
  return JSON_SUCCESS;
1808
0
}
1809
1810
/* Validate JSON text and additionally check key uniqueness */
1811
bool
1812
json_validate(text *json, bool check_unique_keys, bool throw_error)
1813
0
{
1814
0
  JsonLexContext lex;
1815
0
  JsonSemAction uniqueSemAction = {0};
1816
0
  JsonUniqueParsingState state;
1817
0
  JsonParseErrorType result;
1818
1819
0
  makeJsonLexContext(&lex, json, check_unique_keys);
1820
1821
0
  if (check_unique_keys)
1822
0
  {
1823
0
    state.lex = &lex;
1824
0
    state.stack = NULL;
1825
0
    state.id_counter = 0;
1826
0
    state.unique = true;
1827
0
    json_unique_check_init(&state.check);
1828
1829
0
    uniqueSemAction.semstate = &state;
1830
0
    uniqueSemAction.object_start = json_unique_object_start;
1831
0
    uniqueSemAction.object_field_start = json_unique_object_field_start;
1832
0
    uniqueSemAction.object_end = json_unique_object_end;
1833
0
  }
1834
1835
0
  result = pg_parse_json(&lex, check_unique_keys ? &uniqueSemAction : &nullSemAction);
1836
1837
0
  if (result != JSON_SUCCESS)
1838
0
  {
1839
0
    if (throw_error)
1840
0
      json_errsave_error(result, &lex, NULL);
1841
1842
0
    return false;     /* invalid json */
1843
0
  }
1844
1845
0
  if (check_unique_keys && !state.unique)
1846
0
  {
1847
0
    if (throw_error)
1848
0
      ereport(ERROR,
1849
0
          (errcode(ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE),
1850
0
           errmsg("duplicate JSON object key value")));
1851
1852
0
    return false;     /* not unique keys */
1853
0
  }
1854
1855
0
  if (check_unique_keys)
1856
0
    freeJsonLexContext(&lex);
1857
1858
0
  return true;        /* ok */
1859
0
}
1860
1861
/*
1862
 * SQL function json_typeof(json) -> text
1863
 *
1864
 * Returns the type of the outermost JSON value as TEXT.  Possible types are
1865
 * "object", "array", "string", "number", "boolean", and "null".
1866
 *
1867
 * Performs a single call to json_lex() to get the first token of the supplied
1868
 * value.  This initial token uniquely determines the value's type.  As our
1869
 * input must already have been validated by json_in() or json_recv(), the
1870
 * initial token should never be JSON_TOKEN_OBJECT_END, JSON_TOKEN_ARRAY_END,
1871
 * JSON_TOKEN_COLON, JSON_TOKEN_COMMA, or JSON_TOKEN_END.
1872
 */
1873
Datum
1874
json_typeof(PG_FUNCTION_ARGS)
1875
0
{
1876
0
  text     *json = PG_GETARG_TEXT_PP(0);
1877
0
  JsonLexContext lex;
1878
0
  char     *type;
1879
0
  JsonParseErrorType result;
1880
1881
  /* Lex exactly one token from the input and check its type. */
1882
0
  makeJsonLexContext(&lex, json, false);
1883
0
  result = json_lex(&lex);
1884
0
  if (result != JSON_SUCCESS)
1885
0
    json_errsave_error(result, &lex, NULL);
1886
1887
0
  switch (lex.token_type)
1888
0
  {
1889
0
    case JSON_TOKEN_OBJECT_START:
1890
0
      type = "object";
1891
0
      break;
1892
0
    case JSON_TOKEN_ARRAY_START:
1893
0
      type = "array";
1894
0
      break;
1895
0
    case JSON_TOKEN_STRING:
1896
0
      type = "string";
1897
0
      break;
1898
0
    case JSON_TOKEN_NUMBER:
1899
0
      type = "number";
1900
0
      break;
1901
0
    case JSON_TOKEN_TRUE:
1902
0
    case JSON_TOKEN_FALSE:
1903
0
      type = "boolean";
1904
0
      break;
1905
0
    case JSON_TOKEN_NULL:
1906
0
      type = "null";
1907
0
      break;
1908
0
    default:
1909
0
      elog(ERROR, "unexpected json token: %d", lex.token_type);
1910
0
  }
1911
1912
0
  PG_RETURN_TEXT_P(cstring_to_text(type));
1913
0
}