Coverage Report

Created: 2025-06-13 06:06

/src/postgres/src/backend/nodes/read.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * read.c
4
 *    routines to convert a string (legal ascii representation of node) back
5
 *    to nodes
6
 *
7
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8
 * Portions Copyright (c) 1994, Regents of the University of California
9
 *
10
 *
11
 * IDENTIFICATION
12
 *    src/backend/nodes/read.c
13
 *
14
 * HISTORY
15
 *    AUTHOR      DATE      MAJOR EVENT
16
 *    Andrew Yu     Nov 2, 1994   file creation
17
 *
18
 *-------------------------------------------------------------------------
19
 */
20
#include "postgres.h"
21
22
#include <ctype.h>
23
24
#include "common/string.h"
25
#include "nodes/bitmapset.h"
26
#include "nodes/pg_list.h"
27
#include "nodes/readfuncs.h"
28
#include "nodes/value.h"
29
30
31
/* Static state for pg_strtok */
32
static const char *pg_strtok_ptr = NULL;
33
34
/* State flag that determines how readfuncs.c should treat location fields */
35
#ifdef DEBUG_NODE_TESTS_ENABLED
36
bool    restore_location_fields = false;
37
#endif
38
39
40
/*
41
 * stringToNode -
42
 *    builds a Node tree from its string representation (assumed valid)
43
 *
44
 * restore_loc_fields instructs readfuncs.c whether to restore location
45
 * fields rather than set them to -1.  This is currently only supported
46
 * in builds with DEBUG_NODE_TESTS_ENABLED defined.
47
 */
48
static void *
49
stringToNodeInternal(const char *str, bool restore_loc_fields)
50
0
{
51
0
  void     *retval;
52
0
  const char *save_strtok;
53
#ifdef DEBUG_NODE_TESTS_ENABLED
54
  bool    save_restore_location_fields;
55
#endif
56
57
  /*
58
   * We save and restore the pre-existing state of pg_strtok. This makes the
59
   * world safe for re-entrant invocation of stringToNode, without incurring
60
   * a lot of notational overhead by having to pass the next-character
61
   * pointer around through all the readfuncs.c code.
62
   */
63
0
  save_strtok = pg_strtok_ptr;
64
65
0
  pg_strtok_ptr = str;    /* point pg_strtok at the string to read */
66
67
  /*
68
   * If enabled, likewise save/restore the location field handling flag.
69
   */
70
#ifdef DEBUG_NODE_TESTS_ENABLED
71
  save_restore_location_fields = restore_location_fields;
72
  restore_location_fields = restore_loc_fields;
73
#endif
74
75
0
  retval = nodeRead(NULL, 0); /* do the reading */
76
77
0
  pg_strtok_ptr = save_strtok;
78
79
#ifdef DEBUG_NODE_TESTS_ENABLED
80
  restore_location_fields = save_restore_location_fields;
81
#endif
82
83
0
  return retval;
84
0
}
85
86
/*
87
 * Externally visible entry points
88
 */
89
void *
90
stringToNode(const char *str)
91
0
{
92
0
  return stringToNodeInternal(str, false);
93
0
}
94
95
#ifdef DEBUG_NODE_TESTS_ENABLED
96
97
void *
98
stringToNodeWithLocations(const char *str)
99
{
100
  return stringToNodeInternal(str, true);
101
}
102
103
#endif
104
105
106
/*****************************************************************************
107
 *
108
 * the lisp token parser
109
 *
110
 *****************************************************************************/
111
112
/*
113
 * pg_strtok --- retrieve next "token" from a string.
114
 *
115
 * Works kinda like strtok, except it never modifies the source string.
116
 * (Instead of storing nulls into the string, the length of the token
117
 * is returned to the caller.)
118
 * Also, the rules about what is a token are hard-wired rather than being
119
 * configured by passing a set of terminating characters.
120
 *
121
 * The string is assumed to have been initialized already by stringToNode.
122
 *
123
 * The rules for tokens are:
124
 *  * Whitespace (space, tab, newline) always separates tokens.
125
 *  * The characters '(', ')', '{', '}' form individual tokens even
126
 *    without any whitespace around them.
127
 *  * Otherwise, a token is all the characters up to the next whitespace
128
 *    or occurrence of one of the four special characters.
129
 *  * A backslash '\' can be used to quote whitespace or one of the four
130
 *    special characters, so that it is treated as a plain token character.
131
 *    Backslashes themselves must also be backslashed for consistency.
132
 *    Any other character can be, but need not be, backslashed as well.
133
 *  * If the resulting token is '<>' (with no backslash), it is returned
134
 *    as a non-NULL pointer to the token but with length == 0.  Note that
135
 *    there is no other way to get a zero-length token.
136
 *
137
 * Returns a pointer to the start of the next token, and the length of the
138
 * token (including any embedded backslashes!) in *length.  If there are
139
 * no more tokens, NULL and 0 are returned.
140
 *
141
 * NOTE: this routine doesn't remove backslashes; the caller must do so
142
 * if necessary (see "debackslash").
143
 *
144
 * NOTE: prior to release 7.0, this routine also had a special case to treat
145
 * a token starting with '"' as extending to the next '"'.  This code was
146
 * broken, however, since it would fail to cope with a string containing an
147
 * embedded '"'.  I have therefore removed this special case, and instead
148
 * introduced rules for using backslashes to quote characters.  Higher-level
149
 * code should add backslashes to a string constant to ensure it is treated
150
 * as a single token.
151
 */
152
const char *
153
pg_strtok(int *length)
154
0
{
155
0
  const char *local_str;    /* working pointer to string */
156
0
  const char *ret_str;    /* start of token to return */
157
158
0
  local_str = pg_strtok_ptr;
159
160
0
  while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t')
161
0
    local_str++;
162
163
0
  if (*local_str == '\0')
164
0
  {
165
0
    *length = 0;
166
0
    pg_strtok_ptr = local_str;
167
0
    return NULL;      /* no more tokens */
168
0
  }
169
170
  /*
171
   * Now pointing at start of next token.
172
   */
173
0
  ret_str = local_str;
174
175
0
  if (*local_str == '(' || *local_str == ')' ||
176
0
    *local_str == '{' || *local_str == '}')
177
0
  {
178
    /* special 1-character token */
179
0
    local_str++;
180
0
  }
181
0
  else
182
0
  {
183
    /* Normal token, possibly containing backslashes */
184
0
    while (*local_str != '\0' &&
185
0
         *local_str != ' ' && *local_str != '\n' &&
186
0
         *local_str != '\t' &&
187
0
         *local_str != '(' && *local_str != ')' &&
188
0
         *local_str != '{' && *local_str != '}')
189
0
    {
190
0
      if (*local_str == '\\' && local_str[1] != '\0')
191
0
        local_str += 2;
192
0
      else
193
0
        local_str++;
194
0
    }
195
0
  }
196
197
0
  *length = local_str - ret_str;
198
199
  /* Recognize special case for "empty" token */
200
0
  if (*length == 2 && ret_str[0] == '<' && ret_str[1] == '>')
201
0
    *length = 0;
202
203
0
  pg_strtok_ptr = local_str;
204
205
0
  return ret_str;
206
0
}
207
208
/*
209
 * debackslash -
210
 *    create a palloc'd string holding the given token.
211
 *    any protective backslashes in the token are removed.
212
 */
213
char *
214
debackslash(const char *token, int length)
215
0
{
216
0
  char     *result = palloc(length + 1);
217
0
  char     *ptr = result;
218
219
0
  while (length > 0)
220
0
  {
221
0
    if (*token == '\\' && length > 1)
222
0
      token++, length--;
223
0
    *ptr++ = *token++;
224
0
    length--;
225
0
  }
226
0
  *ptr = '\0';
227
0
  return result;
228
0
}
229
230
0
#define RIGHT_PAREN (1000000 + 1)
231
0
#define LEFT_PAREN  (1000000 + 2)
232
0
#define LEFT_BRACE  (1000000 + 3)
233
0
#define OTHER_TOKEN (1000000 + 4)
234
235
/*
236
 * nodeTokenType -
237
 *    returns the type of the node token contained in token.
238
 *    It returns one of the following valid NodeTags:
239
 *    T_Integer, T_Float, T_Boolean, T_String, T_BitString
240
 *    and some of its own:
241
 *    RIGHT_PAREN, LEFT_PAREN, LEFT_BRACE, OTHER_TOKEN
242
 *
243
 *    Assumption: the ascii representation is legal
244
 */
245
static NodeTag
246
nodeTokenType(const char *token, int length)
247
0
{
248
0
  NodeTag   retval;
249
0
  const char *numptr;
250
0
  int     numlen;
251
252
  /*
253
   * Check if the token is a number
254
   */
255
0
  numptr = token;
256
0
  numlen = length;
257
0
  if (*numptr == '+' || *numptr == '-')
258
0
    numptr++, numlen--;
259
0
  if ((numlen > 0 && isdigit((unsigned char) *numptr)) ||
260
0
    (numlen > 1 && *numptr == '.' && isdigit((unsigned char) numptr[1])))
261
0
  {
262
    /*
263
     * Yes.  Figure out whether it is integral or float; this requires
264
     * both a syntax check and a range check. strtoint() can do both for
265
     * us. We know the token will end at a character that strtoint will
266
     * stop at, so we do not need to modify the string.
267
     */
268
0
    char     *endptr;
269
270
0
    errno = 0;
271
0
    (void) strtoint(numptr, &endptr, 10);
272
0
    if (endptr != token + length || errno == ERANGE)
273
0
      return T_Float;
274
0
    return T_Integer;
275
0
  }
276
277
  /*
278
   * these three cases do not need length checks, since pg_strtok() will
279
   * always treat them as single-byte tokens
280
   */
281
0
  else if (*token == '(')
282
0
    retval = LEFT_PAREN;
283
0
  else if (*token == ')')
284
0
    retval = RIGHT_PAREN;
285
0
  else if (*token == '{')
286
0
    retval = LEFT_BRACE;
287
0
  else if ((length == 4 && strncmp(token, "true", 4) == 0) ||
288
0
       (length == 5 && strncmp(token, "false", 5) == 0))
289
0
    retval = T_Boolean;
290
0
  else if (*token == '"' && length > 1 && token[length - 1] == '"')
291
0
    retval = T_String;
292
0
  else if (*token == 'b' || *token == 'x')
293
0
    retval = T_BitString;
294
0
  else
295
0
    retval = OTHER_TOKEN;
296
0
  return retval;
297
0
}
298
299
/*
300
 * nodeRead -
301
 *    Slightly higher-level reader.
302
 *
303
 * This routine applies some semantic knowledge on top of the purely
304
 * lexical tokenizer pg_strtok().   It can read
305
 *  * Value token nodes (integers, floats, booleans, or strings);
306
 *  * General nodes (via parseNodeString() from readfuncs.c);
307
 *  * Lists of the above;
308
 *  * Lists of integers, OIDs, or TransactionIds.
309
 * The return value is declared void *, not Node *, to avoid having to
310
 * cast it explicitly in callers that assign to fields of different types.
311
 *
312
 * External callers should always pass NULL/0 for the arguments.  Internally
313
 * a non-NULL token may be passed when the upper recursion level has already
314
 * scanned the first token of a node's representation.
315
 *
316
 * We assume pg_strtok is already initialized with a string to read (hence
317
 * this should only be invoked from within a stringToNode operation).
318
 */
319
void *
320
nodeRead(const char *token, int tok_len)
321
0
{
322
0
  Node     *result;
323
0
  NodeTag   type;
324
325
0
  if (token == NULL)     /* need to read a token? */
326
0
  {
327
0
    token = pg_strtok(&tok_len);
328
329
0
    if (token == NULL)   /* end of input */
330
0
      return NULL;
331
0
  }
332
333
0
  type = nodeTokenType(token, tok_len);
334
335
0
  switch ((int) type)
336
0
  {
337
0
    case LEFT_BRACE:
338
0
      result = parseNodeString();
339
0
      token = pg_strtok(&tok_len);
340
0
      if (token == NULL || token[0] != '}')
341
0
        elog(ERROR, "did not find '}' at end of input node");
342
0
      break;
343
0
    case LEFT_PAREN:
344
0
      {
345
0
        List     *l = NIL;
346
347
        /*----------
348
         * Could be an integer list:  (i int int ...)
349
         * or an OID list:        (o int int ...)
350
         * or an XID list:        (x int int ...)
351
         * or a bitmapset:        (b int int ...)
352
         * or a list of nodes/values: (node node ...)
353
         *----------
354
         */
355
0
        token = pg_strtok(&tok_len);
356
0
        if (token == NULL)
357
0
          elog(ERROR, "unterminated List structure");
358
0
        if (tok_len == 1 && token[0] == 'i')
359
0
        {
360
          /* List of integers */
361
0
          for (;;)
362
0
          {
363
0
            int     val;
364
0
            char     *endptr;
365
366
0
            token = pg_strtok(&tok_len);
367
0
            if (token == NULL)
368
0
              elog(ERROR, "unterminated List structure");
369
0
            if (token[0] == ')')
370
0
              break;
371
0
            val = (int) strtol(token, &endptr, 10);
372
0
            if (endptr != token + tok_len)
373
0
              elog(ERROR, "unrecognized integer: \"%.*s\"",
374
0
                 tok_len, token);
375
0
            l = lappend_int(l, val);
376
0
          }
377
0
          result = (Node *) l;
378
0
        }
379
0
        else if (tok_len == 1 && token[0] == 'o')
380
0
        {
381
          /* List of OIDs */
382
0
          for (;;)
383
0
          {
384
0
            Oid     val;
385
0
            char     *endptr;
386
387
0
            token = pg_strtok(&tok_len);
388
0
            if (token == NULL)
389
0
              elog(ERROR, "unterminated List structure");
390
0
            if (token[0] == ')')
391
0
              break;
392
0
            val = (Oid) strtoul(token, &endptr, 10);
393
0
            if (endptr != token + tok_len)
394
0
              elog(ERROR, "unrecognized OID: \"%.*s\"",
395
0
                 tok_len, token);
396
0
            l = lappend_oid(l, val);
397
0
          }
398
0
          result = (Node *) l;
399
0
        }
400
0
        else if (tok_len == 1 && token[0] == 'x')
401
0
        {
402
          /* List of TransactionIds */
403
0
          for (;;)
404
0
          {
405
0
            TransactionId val;
406
0
            char     *endptr;
407
408
0
            token = pg_strtok(&tok_len);
409
0
            if (token == NULL)
410
0
              elog(ERROR, "unterminated List structure");
411
0
            if (token[0] == ')')
412
0
              break;
413
0
            val = (TransactionId) strtoul(token, &endptr, 10);
414
0
            if (endptr != token + tok_len)
415
0
              elog(ERROR, "unrecognized Xid: \"%.*s\"",
416
0
                 tok_len, token);
417
0
            l = lappend_xid(l, val);
418
0
          }
419
0
          result = (Node *) l;
420
0
        }
421
0
        else if (tok_len == 1 && token[0] == 'b')
422
0
        {
423
          /* Bitmapset -- see also _readBitmapset() */
424
0
          Bitmapset  *bms = NULL;
425
426
0
          for (;;)
427
0
          {
428
0
            int     val;
429
0
            char     *endptr;
430
431
0
            token = pg_strtok(&tok_len);
432
0
            if (token == NULL)
433
0
              elog(ERROR, "unterminated Bitmapset structure");
434
0
            if (tok_len == 1 && token[0] == ')')
435
0
              break;
436
0
            val = (int) strtol(token, &endptr, 10);
437
0
            if (endptr != token + tok_len)
438
0
              elog(ERROR, "unrecognized integer: \"%.*s\"",
439
0
                 tok_len, token);
440
0
            bms = bms_add_member(bms, val);
441
0
          }
442
0
          result = (Node *) bms;
443
0
        }
444
0
        else
445
0
        {
446
          /* List of other node types */
447
0
          for (;;)
448
0
          {
449
            /* We have already scanned next token... */
450
0
            if (token[0] == ')')
451
0
              break;
452
0
            l = lappend(l, nodeRead(token, tok_len));
453
0
            token = pg_strtok(&tok_len);
454
0
            if (token == NULL)
455
0
              elog(ERROR, "unterminated List structure");
456
0
          }
457
0
          result = (Node *) l;
458
0
        }
459
0
        break;
460
0
      }
461
0
    case RIGHT_PAREN:
462
0
      elog(ERROR, "unexpected right parenthesis");
463
0
      result = NULL;    /* keep compiler happy */
464
0
      break;
465
0
    case OTHER_TOKEN:
466
0
      if (tok_len == 0)
467
0
      {
468
        /* must be "<>" --- represents a null pointer */
469
0
        result = NULL;
470
0
      }
471
0
      else
472
0
      {
473
0
        elog(ERROR, "unrecognized token: \"%.*s\"", tok_len, token);
474
0
        result = NULL;  /* keep compiler happy */
475
0
      }
476
0
      break;
477
0
    case T_Integer:
478
479
      /*
480
       * we know that the token terminates on a char atoi will stop at
481
       */
482
0
      result = (Node *) makeInteger(atoi(token));
483
0
      break;
484
0
    case T_Float:
485
0
      {
486
0
        char     *fval = (char *) palloc(tok_len + 1);
487
488
0
        memcpy(fval, token, tok_len);
489
0
        fval[tok_len] = '\0';
490
0
        result = (Node *) makeFloat(fval);
491
0
      }
492
0
      break;
493
0
    case T_Boolean:
494
0
      result = (Node *) makeBoolean(token[0] == 't');
495
0
      break;
496
0
    case T_String:
497
      /* need to remove leading and trailing quotes, and backslashes */
498
0
      result = (Node *) makeString(debackslash(token + 1, tok_len - 2));
499
0
      break;
500
0
    case T_BitString:
501
      /* need to remove backslashes, but there are no quotes */
502
0
      result = (Node *) makeBitString(debackslash(token, tok_len));
503
0
      break;
504
0
    default:
505
0
      elog(ERROR, "unrecognized node type: %d", (int) type);
506
0
      result = NULL;    /* keep compiler happy */
507
0
      break;
508
0
  }
509
510
0
  return result;
511
0
}