/src/postgres/src/backend/nodes/read.c

Source (jump to first uncovered line)
/*-------------------------------------------------------------------------
 *
 * read.c
 *    routines to convert a string (legal ascii representation of node) back
 *    to nodes
 *
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *    src/backend/nodes/read.c
 *
 * HISTORY
 *    AUTHOR      DATE      MAJOR EVENT
 *    Andrew Yu     Nov 2, 1994   file creation
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include <ctype.h>

#include "common/string.h"
#include "nodes/bitmapset.h"
#include "nodes/pg_list.h"
#include "nodes/readfuncs.h"
#include "nodes/value.h"


/* Static state for pg_strtok */
static const char *pg_strtok_ptr = NULL;

/* State flag that determines how readfuncs.c should treat location fields */
#ifdef DEBUG_NODE_TESTS_ENABLED
bool    restore_location_fields = false;
#endif


/*
 * stringToNode -
 *    builds a Node tree from its string representation (assumed valid)
 *
 * restore_loc_fields instructs readfuncs.c whether to restore location
 * fields rather than set them to -1.  This is currently only supported
 * in builds with DEBUG_NODE_TESTS_ENABLED defined.
 */
static void *
stringToNodeInternal(const char *str, bool restore_loc_fields)
{
  void     *retval;
  const char *save_strtok;
#ifdef DEBUG_NODE_TESTS_ENABLED
  bool    save_restore_location_fields;
#endif

  /*
   * We save and restore the pre-existing state of pg_strtok. This makes the
   * world safe for re-entrant invocation of stringToNode, without incurring
   * a lot of notational overhead by having to pass the next-character
   * pointer around through all the readfuncs.c code.
   */
  save_strtok = pg_strtok_ptr;

  pg_strtok_ptr = str;    /* point pg_strtok at the string to read */

  /*
   * If enabled, likewise save/restore the location field handling flag.
   */
#ifdef DEBUG_NODE_TESTS_ENABLED
  save_restore_location_fields = restore_location_fields;
  restore_location_fields = restore_loc_fields;
#endif

  retval = nodeRead(NULL, 0); /* do the reading */

  pg_strtok_ptr = save_strtok;

#ifdef DEBUG_NODE_TESTS_ENABLED
  restore_location_fields = save_restore_location_fields;
#endif

  return retval;
}

/*
 * Externally visible entry points
 */
void *
stringToNode(const char *str)
{
  return stringToNodeInternal(str, false);
}

#ifdef DEBUG_NODE_TESTS_ENABLED

void *
stringToNodeWithLocations(const char *str)
{
  return stringToNodeInternal(str, true);
}

#endif


/*****************************************************************************
 *
 * the lisp token parser
 *
 *****************************************************************************/

/*
 * pg_strtok --- retrieve next "token" from a string.
 *
 * Works kinda like strtok, except it never modifies the source string.
 * (Instead of storing nulls into the string, the length of the token
 * is returned to the caller.)
 * Also, the rules about what is a token are hard-wired rather than being
 * configured by passing a set of terminating characters.
 *
 * The string is assumed to have been initialized already by stringToNode.
 *
 * The rules for tokens are:
 *  * Whitespace (space, tab, newline) always separates tokens.
 *  * The characters '(', ')', '{', '}' form individual tokens even
 *    without any whitespace around them.
 *  * Otherwise, a token is all the characters up to the next whitespace
 *    or occurrence of one of the four special characters.
 *  * A backslash '\' can be used to quote whitespace or one of the four
 *    special characters, so that it is treated as a plain token character.
 *    Backslashes themselves must also be backslashed for consistency.
 *    Any other character can be, but need not be, backslashed as well.
 *  * If the resulting token is '<>' (with no backslash), it is returned
 *    as a non-NULL pointer to the token but with length == 0.  Note that
 *    there is no other way to get a zero-length token.
 *
 * Returns a pointer to the start of the next token, and the length of the
 * token (including any embedded backslashes!) in *length.  If there are
 * no more tokens, NULL and 0 are returned.
 *
 * NOTE: this routine doesn't remove backslashes; the caller must do so
 * if necessary (see "debackslash").
 *
 * NOTE: prior to release 7.0, this routine also had a special case to treat
 * a token starting with '"' as extending to the next '"'.  This code was
 * broken, however, since it would fail to cope with a string containing an
 * embedded '"'.  I have therefore removed this special case, and instead
 * introduced rules for using backslashes to quote characters.  Higher-level
 * code should add backslashes to a string constant to ensure it is treated
 * as a single token.
 */
const char *
pg_strtok(int *length)
{
  const char *local_str;    /* working pointer to string */
  const char *ret_str;    /* start of token to return */

  local_str = pg_strtok_ptr;

  while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t')
    local_str++;

  if (*local_str == '\0')
  {
    *length = 0;
    pg_strtok_ptr = local_str;
    return NULL;      /* no more tokens */
  }

  /*
   * Now pointing at start of next token.
   */
  ret_str = local_str;

  if (*local_str == '(' || *local_str == ')' ||
    *local_str == '{' || *local_str == '}')
  {
    /* special 1-character token */
    local_str++;
  }
  else
  {
    /* Normal token, possibly containing backslashes */
    while (*local_str != '\0' &&
         *local_str != ' ' && *local_str != '\n' &&
         *local_str != '\t' &&
         *local_str != '(' && *local_str != ')' &&
         *local_str != '{' && *local_str != '}')
    {
      if (*local_str == '\\' && local_str[1] != '\0')
        local_str += 2;
      else
        local_str++;
    }
  }

  *length = local_str - ret_str;

  /* Recognize special case for "empty" token */
  if (*length == 2 && ret_str[0] == '<' && ret_str[1] == '>')
    *length = 0;

  pg_strtok_ptr = local_str;

  return ret_str;
}

/*
 * debackslash -
 *    create a palloc'd string holding the given token.
 *    any protective backslashes in the token are removed.
 */
char *
debackslash(const char *token, int length)
{
  char     *result = palloc(length + 1);
  char     *ptr = result;

  while (length > 0)
  {
    if (*token == '\\' && length > 1)
      token++, length--;
    *ptr++ = *token++;
    length--;
  }
  *ptr = '\0';
  return result;
}

#define RIGHT_PAREN (1000000 + 1)
#define LEFT_PAREN  (1000000 + 2)
#define LEFT_BRACE  (1000000 + 3)
#define OTHER_TOKEN (1000000 + 4)

/*
 * nodeTokenType -
 *    returns the type of the node token contained in token.
 *    It returns one of the following valid NodeTags:
 *    T_Integer, T_Float, T_Boolean, T_String, T_BitString
 *    and some of its own:
 *    RIGHT_PAREN, LEFT_PAREN, LEFT_BRACE, OTHER_TOKEN
 *
 *    Assumption: the ascii representation is legal
 */
static NodeTag
nodeTokenType(const char *token, int length)
{
  NodeTag   retval;
  const char *numptr;
  int     numlen;

  /*
   * Check if the token is a number
   */
  numptr = token;
  numlen = length;
  if (*numptr == '+' || *numptr == '-')
    numptr++, numlen--;
  if ((numlen > 0 && isdigit((unsigned char) *numptr)) ||
    (numlen > 1 && *numptr == '.' && isdigit((unsigned char) numptr[1])))
  {
    /*
     * Yes.  Figure out whether it is integral or float; this requires
     * both a syntax check and a range check. strtoint() can do both for
     * us. We know the token will end at a character that strtoint will
     * stop at, so we do not need to modify the string.
     */
    char     *endptr;

    errno = 0;
    (void) strtoint(numptr, &endptr, 10);
    if (endptr != token + length || errno == ERANGE)
      return T_Float;
    return T_Integer;
  }

  /*
   * these three cases do not need length checks, since pg_strtok() will
   * always treat them as single-byte tokens
   */
  else if (*token == '(')
    retval = LEFT_PAREN;
  else if (*token == ')')
    retval = RIGHT_PAREN;
  else if (*token == '{')
    retval = LEFT_BRACE;
  else if ((length == 4 && strncmp(token, "true", 4) == 0) ||
       (length == 5 && strncmp(token, "false", 5) == 0))
    retval = T_Boolean;
  else if (*token == '"' && length > 1 && token[length - 1] == '"')
    retval = T_String;
  else if (*token == 'b' || *token == 'x')
    retval = T_BitString;
  else
    retval = OTHER_TOKEN;
  return retval;
}

/*
 * nodeRead -
 *    Slightly higher-level reader.
 *
 * This routine applies some semantic knowledge on top of the purely
 * lexical tokenizer pg_strtok().   It can read
 *  * Value token nodes (integers, floats, booleans, or strings);
 *  * General nodes (via parseNodeString() from readfuncs.c);
 *  * Lists of the above;
 *  * Lists of integers, OIDs, or TransactionIds.
 * The return value is declared void *, not Node *, to avoid having to
 * cast it explicitly in callers that assign to fields of different types.
 *
 * External callers should always pass NULL/0 for the arguments.  Internally
 * a non-NULL token may be passed when the upper recursion level has already
 * scanned the first token of a node's representation.
 *
 * We assume pg_strtok is already initialized with a string to read (hence
 * this should only be invoked from within a stringToNode operation).
 */
void *
nodeRead(const char *token, int tok_len)
{
  Node     *result;
  NodeTag   type;

  if (token == NULL)     /* need to read a token? */
  {
    token = pg_strtok(&tok_len);

    if (token == NULL)   /* end of input */
      return NULL;
  }

  type = nodeTokenType(token, tok_len);

  switch ((int) type)
  {
    case LEFT_BRACE:
      result = parseNodeString();
      token = pg_strtok(&tok_len);
      if (token == NULL || token[0] != '}')
        elog(ERROR, "did not find '}' at end of input node");
      break;
    case LEFT_PAREN:
      {
        List     *l = NIL;

        /*----------
         * Could be an integer list:  (i int int ...)
         * or an OID list:        (o int int ...)
         * or an XID list:        (x int int ...)
         * or a bitmapset:        (b int int ...)
         * or a list of nodes/values: (node node ...)
         *----------
         */
        token = pg_strtok(&tok_len);
        if (token == NULL)
          elog(ERROR, "unterminated List structure");
        if (tok_len == 1 && token[0] == 'i')
        {
          /* List of integers */
          for (;;)
          {
            int     val;
            char     *endptr;

            token = pg_strtok(&tok_len);
            if (token == NULL)
              elog(ERROR, "unterminated List structure");
            if (token[0] == ')')
              break;
            val = (int) strtol(token, &endptr, 10);
            if (endptr != token + tok_len)
              elog(ERROR, "unrecognized integer: \"%.*s\"",
                 tok_len, token);
            l = lappend_int(l, val);
          }
          result = (Node *) l;
        }
        else if (tok_len == 1 && token[0] == 'o')
        {
          /* List of OIDs */
          for (;;)
          {
            Oid     val;
            char     *endptr;

            token = pg_strtok(&tok_len);
            if (token == NULL)
              elog(ERROR, "unterminated List structure");
            if (token[0] == ')')
              break;
            val = (Oid) strtoul(token, &endptr, 10);
            if (endptr != token + tok_len)
              elog(ERROR, "unrecognized OID: \"%.*s\"",
                 tok_len, token);
            l = lappend_oid(l, val);
          }
          result = (Node *) l;
        }
        else if (tok_len == 1 && token[0] == 'x')
        {
          /* List of TransactionIds */
          for (;;)
          {
            TransactionId val;
            char     *endptr;

            token = pg_strtok(&tok_len);
            if (token == NULL)
              elog(ERROR, "unterminated List structure");
            if (token[0] == ')')
              break;
            val = (TransactionId) strtoul(token, &endptr, 10);
            if (endptr != token + tok_len)
              elog(ERROR, "unrecognized Xid: \"%.*s\"",
                 tok_len, token);
            l = lappend_xid(l, val);
          }
          result = (Node *) l;
        }
        else if (tok_len == 1 && token[0] == 'b')
        {
          /* Bitmapset -- see also _readBitmapset() */
          Bitmapset  *bms = NULL;

          for (;;)
          {
            int     val;
            char     *endptr;

            token = pg_strtok(&tok_len);
            if (token == NULL)
              elog(ERROR, "unterminated Bitmapset structure");
            if (tok_len == 1 && token[0] == ')')
              break;
            val = (int) strtol(token, &endptr, 10);
            if (endptr != token + tok_len)
              elog(ERROR, "unrecognized integer: \"%.*s\"",
                 tok_len, token);
            bms = bms_add_member(bms, val);
          }
          result = (Node *) bms;
        }
        else
        {
          /* List of other node types */
          for (;;)
          {
            /* We have already scanned next token... */
            if (token[0] == ')')
              break;
            l = lappend(l, nodeRead(token, tok_len));
            token = pg_strtok(&tok_len);
            if (token == NULL)
              elog(ERROR, "unterminated List structure");
          }
          result = (Node *) l;
        }
        break;
      }
    case RIGHT_PAREN:
      elog(ERROR, "unexpected right parenthesis");
      result = NULL;    /* keep compiler happy */
      break;
    case OTHER_TOKEN:
      if (tok_len == 0)
      {
        /* must be "<>" --- represents a null pointer */
        result = NULL;
      }
      else
      {
        elog(ERROR, "unrecognized token: \"%.*s\"", tok_len, token);
        result = NULL;  /* keep compiler happy */
      }
      break;
    case T_Integer:

      /*
       * we know that the token terminates on a char atoi will stop at
       */
      result = (Node *) makeInteger(atoi(token));
      break;
    case T_Float:
      {
        char     *fval = (char *) palloc(tok_len + 1);

        memcpy(fval, token, tok_len);
        fval[tok_len] = '\0';
        result = (Node *) makeFloat(fval);
      }
      break;
    case T_Boolean:
      result = (Node *) makeBoolean(token[0] == 't');
      break;
    case T_String:
      /* need to remove leading and trailing quotes, and backslashes */
      result = (Node *) makeString(debackslash(token + 1, tok_len - 2));
      break;
    case T_BitString:
      /* need to remove backslashes, but there are no quotes */
      result = (Node *) makeBitString(debackslash(token, tok_len));
      break;
    default:
      elog(ERROR, "unrecognized node type: %d", (int) type);
      result = NULL;    /* keep compiler happy */
      break;
  }

  return result;
}

Coverage Report

Created: 2025-06-13 06:06

Line	Count	Source (jump to first uncovered line)
1		/*-------------------------------------------------------------------------
2		*
3		* read.c
4		* routines to convert a string (legal ascii representation of node) back
5		* to nodes
6		*
7		* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8		* Portions Copyright (c) 1994, Regents of the University of California
9		*
10		*
11		* IDENTIFICATION
12		* src/backend/nodes/read.c
13		*
14		* HISTORY
15		* AUTHOR DATE MAJOR EVENT
16		* Andrew Yu Nov 2, 1994 file creation
17		*
18		*-------------------------------------------------------------------------
19		*/
20		#include "postgres.h"
21
22		#include <ctype.h>
23
24		#include "common/string.h"
25		#include "nodes/bitmapset.h"
26		#include "nodes/pg_list.h"
27		#include "nodes/readfuncs.h"
28		#include "nodes/value.h"
29
30
31		/* Static state for pg_strtok */
32		static const char *pg_strtok_ptr = NULL;
33
34		/* State flag that determines how readfuncs.c should treat location fields */
35		#ifdef DEBUG_NODE_TESTS_ENABLED
36		bool restore_location_fields = false;
37		#endif
38
39
40		/*
41		* stringToNode -
42		* builds a Node tree from its string representation (assumed valid)
43		*
44		* restore_loc_fields instructs readfuncs.c whether to restore location
45		* fields rather than set them to -1. This is currently only supported
46		* in builds with DEBUG_NODE_TESTS_ENABLED defined.
47		*/
48		static void *
49		stringToNodeInternal(const char *str, bool restore_loc_fields)
50	0	{
51	0	void *retval;
52	0	const char *save_strtok;
53		#ifdef DEBUG_NODE_TESTS_ENABLED
54		bool save_restore_location_fields;
55		#endif
56
57		/*
58		* We save and restore the pre-existing state of pg_strtok. This makes the
59		* world safe for re-entrant invocation of stringToNode, without incurring
60		* a lot of notational overhead by having to pass the next-character
61		* pointer around through all the readfuncs.c code.
62		*/
63	0	save_strtok = pg_strtok_ptr;
64
65	0	pg_strtok_ptr = str; /* point pg_strtok at the string to read */
66
67		/*
68		* If enabled, likewise save/restore the location field handling flag.
69		*/
70		#ifdef DEBUG_NODE_TESTS_ENABLED
71		save_restore_location_fields = restore_location_fields;
72		restore_location_fields = restore_loc_fields;
73		#endif
74
75	0	retval = nodeRead(NULL, 0); /* do the reading */
76
77	0	pg_strtok_ptr = save_strtok;
78
79		#ifdef DEBUG_NODE_TESTS_ENABLED
80		restore_location_fields = save_restore_location_fields;
81		#endif
82
83	0	return retval;
84	0	}
85
86		/*
87		* Externally visible entry points
88		*/
89		void *
90		stringToNode(const char *str)
91	0	{
92	0	return stringToNodeInternal(str, false);
93	0	}
94
95		#ifdef DEBUG_NODE_TESTS_ENABLED
96
97		void *
98		stringToNodeWithLocations(const char *str)
99		{
100		return stringToNodeInternal(str, true);
101		}
102
103		#endif
104
105
106		/*****************************************************************************
107		*
108		* the lisp token parser
109		*
110		*****************************************************************************/
111
112		/*
113		* pg_strtok --- retrieve next "token" from a string.
114		*
115		* Works kinda like strtok, except it never modifies the source string.
116		* (Instead of storing nulls into the string, the length of the token
117		* is returned to the caller.)
118		* Also, the rules about what is a token are hard-wired rather than being
119		* configured by passing a set of terminating characters.
120		*
121		* The string is assumed to have been initialized already by stringToNode.
122		*
123		* The rules for tokens are:
124		* * Whitespace (space, tab, newline) always separates tokens.
125		* * The characters '(', ')', '{', '}' form individual tokens even
126		* without any whitespace around them.
127		* * Otherwise, a token is all the characters up to the next whitespace
128		* or occurrence of one of the four special characters.
129		* * A backslash '\' can be used to quote whitespace or one of the four
130		* special characters, so that it is treated as a plain token character.
131		* Backslashes themselves must also be backslashed for consistency.
132		* Any other character can be, but need not be, backslashed as well.
133		* * If the resulting token is '<>' (with no backslash), it is returned
134		* as a non-NULL pointer to the token but with length == 0. Note that
135		* there is no other way to get a zero-length token.
136		*
137		* Returns a pointer to the start of the next token, and the length of the
138		* token (including any embedded backslashes!) in *length. If there are
139		* no more tokens, NULL and 0 are returned.
140		*
141		* NOTE: this routine doesn't remove backslashes; the caller must do so
142		* if necessary (see "debackslash").
143		*
144		* NOTE: prior to release 7.0, this routine also had a special case to treat
145		* a token starting with '"' as extending to the next '"'. This code was
146		* broken, however, since it would fail to cope with a string containing an
147		* embedded '"'. I have therefore removed this special case, and instead
148		* introduced rules for using backslashes to quote characters. Higher-level
149		* code should add backslashes to a string constant to ensure it is treated
150		* as a single token.
151		*/
152		const char *
153		pg_strtok(int *length)
154	0	{
155	0	const char local_str; / working pointer to string */
156	0	const char ret_str; / start of token to return */
157
158	0	local_str = pg_strtok_ptr;
159
160	0	while (local_str == ' ' \|\| local_str == '\n' \|\| *local_str == '\t')
161	0	local_str++;
162
163	0	if (*local_str == '\0')
164	0	{
165	0	*length = 0;
166	0	pg_strtok_ptr = local_str;
167	0	return NULL; /* no more tokens */
168	0	}
169
170		/*
171		* Now pointing at start of next token.
172		*/
173	0	ret_str = local_str;
174
175	0	if (local_str == '(' \|\| local_str == ')' \|\|
176	0	local_str == '{' \|\| local_str == '}')
177	0	{
178		/* special 1-character token */
179	0	local_str++;
180	0	}
181	0	else
182	0	{
183		/* Normal token, possibly containing backslashes */
184	0	while (*local_str != '\0' &&
185	0	local_str != ' ' && local_str != '\n' &&
186	0	*local_str != '\t' &&
187	0	local_str != '(' && local_str != ')' &&
188	0	local_str != '{' && local_str != '}')
189	0	{
190	0	if (*local_str == '\\' && local_str[1] != '\0')
191	0	local_str += 2;
192	0	else
193	0	local_str++;
194	0	}
195	0	}
196
197	0	*length = local_str - ret_str;
198
199		/* Recognize special case for "empty" token */
200	0	if (*length == 2 && ret_str[0] == '<' && ret_str[1] == '>')
201	0	*length = 0;
202
203	0	pg_strtok_ptr = local_str;
204
205	0	return ret_str;
206	0	}
207
208		/*
209		* debackslash -
210		* create a palloc'd string holding the given token.
211		* any protective backslashes in the token are removed.
212		*/
213		char *
214		debackslash(const char *token, int length)
215	0	{
216	0	char *result = palloc(length + 1);
217	0	char *ptr = result;
218
219	0	while (length > 0)
220	0	{
221	0	if (*token == '\\' && length > 1)
222	0	token++, length--;
223	0	ptr++ = token++;
224	0	length--;
225	0	}
226	0	*ptr = '\0';
227	0	return result;
228	0	}
229
230	0	#define RIGHT_PAREN (1000000 + 1)
231	0	#define LEFT_PAREN (1000000 + 2)
232	0	#define LEFT_BRACE (1000000 + 3)
233	0	#define OTHER_TOKEN (1000000 + 4)
234
235		/*
236		* nodeTokenType -
237		* returns the type of the node token contained in token.
238		* It returns one of the following valid NodeTags:
239		* T_Integer, T_Float, T_Boolean, T_String, T_BitString
240		* and some of its own:
241		* RIGHT_PAREN, LEFT_PAREN, LEFT_BRACE, OTHER_TOKEN
242		*
243		* Assumption: the ascii representation is legal
244		*/
245		static NodeTag
246		nodeTokenType(const char *token, int length)
247	0	{
248	0	NodeTag retval;
249	0	const char *numptr;
250	0	int numlen;
251
252		/*
253		* Check if the token is a number
254		*/
255	0	numptr = token;
256	0	numlen = length;
257	0	if (numptr == '+' \|\| numptr == '-')
258	0	numptr++, numlen--;
259	0	if ((numlen > 0 && isdigit((unsigned char) *numptr)) \|\|
260	0	(numlen > 1 && *numptr == '.' && isdigit((unsigned char) numptr[1])))
261	0	{
262		/*
263		* Yes. Figure out whether it is integral or float; this requires
264		* both a syntax check and a range check. strtoint() can do both for
265		* us. We know the token will end at a character that strtoint will
266		* stop at, so we do not need to modify the string.
267		*/
268	0	char *endptr;
269
270	0	errno = 0;
271	0	(void) strtoint(numptr, &endptr, 10);
272	0	if (endptr != token + length \|\| errno == ERANGE)
273	0	return T_Float;
274	0	return T_Integer;
275	0	}
276
277		/*
278		* these three cases do not need length checks, since pg_strtok() will
279		* always treat them as single-byte tokens
280		*/
281	0	else if (*token == '(')
282	0	retval = LEFT_PAREN;
283	0	else if (*token == ')')
284	0	retval = RIGHT_PAREN;
285	0	else if (*token == '{')
286	0	retval = LEFT_BRACE;
287	0	else if ((length == 4 && strncmp(token, "true", 4) == 0) \|\|
288	0	(length == 5 && strncmp(token, "false", 5) == 0))
289	0	retval = T_Boolean;
290	0	else if (*token == '"' && length > 1 && token[length - 1] == '"')
291	0	retval = T_String;
292	0	else if (token == 'b' \|\| token == 'x')
293	0	retval = T_BitString;
294	0	else
295	0	retval = OTHER_TOKEN;
296	0	return retval;
297	0	}
298
299		/*
300		* nodeRead -
301		* Slightly higher-level reader.
302		*
303		* This routine applies some semantic knowledge on top of the purely
304		* lexical tokenizer pg_strtok(). It can read
305		* * Value token nodes (integers, floats, booleans, or strings);
306		* * General nodes (via parseNodeString() from readfuncs.c);
307		* * Lists of the above;
308		* * Lists of integers, OIDs, or TransactionIds.
309		* The return value is declared void , not Node , to avoid having to
310		* cast it explicitly in callers that assign to fields of different types.
311		*
312		* External callers should always pass NULL/0 for the arguments. Internally
313		* a non-NULL token may be passed when the upper recursion level has already
314		* scanned the first token of a node's representation.
315		*
316		* We assume pg_strtok is already initialized with a string to read (hence
317		* this should only be invoked from within a stringToNode operation).
318		*/
319		void *
320		nodeRead(const char *token, int tok_len)
321	0	{
322	0	Node *result;
323	0	NodeTag type;
324
325	0	if (token == NULL) /* need to read a token? */
326	0	{
327	0	token = pg_strtok(&tok_len);
328
329	0	if (token == NULL) /* end of input */
330	0	return NULL;
331	0	}
332
333	0	type = nodeTokenType(token, tok_len);
334
335	0	switch ((int) type)
336	0	{
337	0	case LEFT_BRACE:
338	0	result = parseNodeString();
339	0	token = pg_strtok(&tok_len);
340	0	if (token == NULL \|\| token[0] != '}')
341	0	elog(ERROR, "did not find '}' at end of input node");
342	0	break;
343	0	case LEFT_PAREN:
344	0	{
345	0	List *l = NIL;
346
347		/*----------
348		* Could be an integer list: (i int int ...)
349		* or an OID list: (o int int ...)
350		* or an XID list: (x int int ...)
351		* or a bitmapset: (b int int ...)
352		* or a list of nodes/values: (node node ...)
353		*----------
354		*/
355	0	token = pg_strtok(&tok_len);
356	0	if (token == NULL)
357	0	elog(ERROR, "unterminated List structure");
358	0	if (tok_len == 1 && token[0] == 'i')
359	0	{
360		/* List of integers */
361	0	for (;;)
362	0	{
363	0	int val;
364	0	char *endptr;
365
366	0	token = pg_strtok(&tok_len);
367	0	if (token == NULL)
368	0	elog(ERROR, "unterminated List structure");
369	0	if (token[0] == ')')
370	0	break;
371	0	val = (int) strtol(token, &endptr, 10);
372	0	if (endptr != token + tok_len)
373	0	elog(ERROR, "unrecognized integer: \"%.*s\"",
374	0	tok_len, token);
375	0	l = lappend_int(l, val);
376	0	}
377	0	result = (Node *) l;
378	0	}
379	0	else if (tok_len == 1 && token[0] == 'o')
380	0	{
381		/* List of OIDs */
382	0	for (;;)
383	0	{
384	0	Oid val;
385	0	char *endptr;
386
387	0	token = pg_strtok(&tok_len);
388	0	if (token == NULL)
389	0	elog(ERROR, "unterminated List structure");
390	0	if (token[0] == ')')
391	0	break;
392	0	val = (Oid) strtoul(token, &endptr, 10);
393	0	if (endptr != token + tok_len)
394	0	elog(ERROR, "unrecognized OID: \"%.*s\"",
395	0	tok_len, token);
396	0	l = lappend_oid(l, val);
397	0	}
398	0	result = (Node *) l;
399	0	}
400	0	else if (tok_len == 1 && token[0] == 'x')
401	0	{
402		/* List of TransactionIds */
403	0	for (;;)
404	0	{
405	0	TransactionId val;
406	0	char *endptr;
407
408	0	token = pg_strtok(&tok_len);
409	0	if (token == NULL)
410	0	elog(ERROR, "unterminated List structure");
411	0	if (token[0] == ')')
412	0	break;
413	0	val = (TransactionId) strtoul(token, &endptr, 10);
414	0	if (endptr != token + tok_len)
415	0	elog(ERROR, "unrecognized Xid: \"%.*s\"",
416	0	tok_len, token);
417	0	l = lappend_xid(l, val);
418	0	}
419	0	result = (Node *) l;
420	0	}
421	0	else if (tok_len == 1 && token[0] == 'b')
422	0	{
423		/* Bitmapset -- see also _readBitmapset() */
424	0	Bitmapset *bms = NULL;
425
426	0	for (;;)
427	0	{
428	0	int val;
429	0	char *endptr;
430
431	0	token = pg_strtok(&tok_len);
432	0	if (token == NULL)
433	0	elog(ERROR, "unterminated Bitmapset structure");
434	0	if (tok_len == 1 && token[0] == ')')
435	0	break;
436	0	val = (int) strtol(token, &endptr, 10);
437	0	if (endptr != token + tok_len)
438	0	elog(ERROR, "unrecognized integer: \"%.*s\"",
439	0	tok_len, token);
440	0	bms = bms_add_member(bms, val);
441	0	}
442	0	result = (Node *) bms;
443	0	}
444	0	else
445	0	{
446		/* List of other node types */
447	0	for (;;)
448	0	{
449		/* We have already scanned next token... */
450	0	if (token[0] == ')')
451	0	break;
452	0	l = lappend(l, nodeRead(token, tok_len));
453	0	token = pg_strtok(&tok_len);
454	0	if (token == NULL)
455	0	elog(ERROR, "unterminated List structure");
456	0	}
457	0	result = (Node *) l;
458	0	}
459	0	break;
460	0	}
461	0	case RIGHT_PAREN:
462	0	elog(ERROR, "unexpected right parenthesis");
463	0	result = NULL; /* keep compiler happy */
464	0	break;
465	0	case OTHER_TOKEN:
466	0	if (tok_len == 0)
467	0	{
468		/* must be "<>" --- represents a null pointer */
469	0	result = NULL;
470	0	}
471	0	else
472	0	{
473	0	elog(ERROR, "unrecognized token: \"%.*s\"", tok_len, token);
474	0	result = NULL; /* keep compiler happy */
475	0	}
476	0	break;
477	0	case T_Integer:
478
479		/*
480		* we know that the token terminates on a char atoi will stop at
481		*/
482	0	result = (Node *) makeInteger(atoi(token));
483	0	break;
484	0	case T_Float:
485	0	{
486	0	char fval = (char ) palloc(tok_len + 1);
487
488	0	memcpy(fval, token, tok_len);
489	0	fval[tok_len] = '\0';
490	0	result = (Node *) makeFloat(fval);
491	0	}
492	0	break;
493	0	case T_Boolean:
494	0	result = (Node *) makeBoolean(token[0] == 't');
495	0	break;
496	0	case T_String:
497		/* need to remove leading and trailing quotes, and backslashes */
498	0	result = (Node *) makeString(debackslash(token + 1, tok_len - 2));
499	0	break;
500	0	case T_BitString:
501		/* need to remove backslashes, but there are no quotes */
502	0	result = (Node *) makeBitString(debackslash(token, tok_len));
503	0	break;
504	0	default:
505	0	elog(ERROR, "unrecognized node type: %d", (int) type);
506	0	result = NULL; /* keep compiler happy */
507	0	break;
508	0	}
509
510	0	return result;
511	0	}