/src/postgres/src/backend/nodes/read.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * read.c |
4 | | * routines to convert a string (legal ascii representation of node) back |
5 | | * to nodes |
6 | | * |
7 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
8 | | * Portions Copyright (c) 1994, Regents of the University of California |
9 | | * |
10 | | * |
11 | | * IDENTIFICATION |
12 | | * src/backend/nodes/read.c |
13 | | * |
14 | | * HISTORY |
15 | | * AUTHOR DATE MAJOR EVENT |
16 | | * Andrew Yu Nov 2, 1994 file creation |
17 | | * |
18 | | *------------------------------------------------------------------------- |
19 | | */ |
20 | | #include "postgres.h" |
21 | | |
22 | | #include <ctype.h> |
23 | | |
24 | | #include "common/string.h" |
25 | | #include "nodes/bitmapset.h" |
26 | | #include "nodes/pg_list.h" |
27 | | #include "nodes/readfuncs.h" |
28 | | #include "nodes/value.h" |
29 | | |
30 | | |
31 | | /* Static state for pg_strtok */ |
32 | | static const char *pg_strtok_ptr = NULL; |
33 | | |
34 | | /* State flag that determines how readfuncs.c should treat location fields */ |
35 | | #ifdef DEBUG_NODE_TESTS_ENABLED |
36 | | bool restore_location_fields = false; |
37 | | #endif |
38 | | |
39 | | |
40 | | /* |
41 | | * stringToNode - |
42 | | * builds a Node tree from its string representation (assumed valid) |
43 | | * |
44 | | * restore_loc_fields instructs readfuncs.c whether to restore location |
45 | | * fields rather than set them to -1. This is currently only supported |
46 | | * in builds with DEBUG_NODE_TESTS_ENABLED defined. |
47 | | */ |
48 | | static void * |
49 | | stringToNodeInternal(const char *str, bool restore_loc_fields) |
50 | 0 | { |
51 | 0 | void *retval; |
52 | 0 | const char *save_strtok; |
53 | | #ifdef DEBUG_NODE_TESTS_ENABLED |
54 | | bool save_restore_location_fields; |
55 | | #endif |
56 | | |
57 | | /* |
58 | | * We save and restore the pre-existing state of pg_strtok. This makes the |
59 | | * world safe for re-entrant invocation of stringToNode, without incurring |
60 | | * a lot of notational overhead by having to pass the next-character |
61 | | * pointer around through all the readfuncs.c code. |
62 | | */ |
63 | 0 | save_strtok = pg_strtok_ptr; |
64 | |
|
65 | 0 | pg_strtok_ptr = str; /* point pg_strtok at the string to read */ |
66 | | |
67 | | /* |
68 | | * If enabled, likewise save/restore the location field handling flag. |
69 | | */ |
70 | | #ifdef DEBUG_NODE_TESTS_ENABLED |
71 | | save_restore_location_fields = restore_location_fields; |
72 | | restore_location_fields = restore_loc_fields; |
73 | | #endif |
74 | |
|
75 | 0 | retval = nodeRead(NULL, 0); /* do the reading */ |
76 | |
|
77 | 0 | pg_strtok_ptr = save_strtok; |
78 | |
|
79 | | #ifdef DEBUG_NODE_TESTS_ENABLED |
80 | | restore_location_fields = save_restore_location_fields; |
81 | | #endif |
82 | |
|
83 | 0 | return retval; |
84 | 0 | } |
85 | | |
86 | | /* |
87 | | * Externally visible entry points |
88 | | */ |
89 | | void * |
90 | | stringToNode(const char *str) |
91 | 0 | { |
92 | 0 | return stringToNodeInternal(str, false); |
93 | 0 | } |
94 | | |
95 | | #ifdef DEBUG_NODE_TESTS_ENABLED |
96 | | |
97 | | void * |
98 | | stringToNodeWithLocations(const char *str) |
99 | | { |
100 | | return stringToNodeInternal(str, true); |
101 | | } |
102 | | |
103 | | #endif |
104 | | |
105 | | |
106 | | /***************************************************************************** |
107 | | * |
108 | | * the lisp token parser |
109 | | * |
110 | | *****************************************************************************/ |
111 | | |
112 | | /* |
113 | | * pg_strtok --- retrieve next "token" from a string. |
114 | | * |
115 | | * Works kinda like strtok, except it never modifies the source string. |
116 | | * (Instead of storing nulls into the string, the length of the token |
117 | | * is returned to the caller.) |
118 | | * Also, the rules about what is a token are hard-wired rather than being |
119 | | * configured by passing a set of terminating characters. |
120 | | * |
121 | | * The string is assumed to have been initialized already by stringToNode. |
122 | | * |
123 | | * The rules for tokens are: |
124 | | * * Whitespace (space, tab, newline) always separates tokens. |
125 | | * * The characters '(', ')', '{', '}' form individual tokens even |
126 | | * without any whitespace around them. |
127 | | * * Otherwise, a token is all the characters up to the next whitespace |
128 | | * or occurrence of one of the four special characters. |
129 | | * * A backslash '\' can be used to quote whitespace or one of the four |
130 | | * special characters, so that it is treated as a plain token character. |
131 | | * Backslashes themselves must also be backslashed for consistency. |
132 | | * Any other character can be, but need not be, backslashed as well. |
133 | | * * If the resulting token is '<>' (with no backslash), it is returned |
134 | | * as a non-NULL pointer to the token but with length == 0. Note that |
135 | | * there is no other way to get a zero-length token. |
136 | | * |
137 | | * Returns a pointer to the start of the next token, and the length of the |
138 | | * token (including any embedded backslashes!) in *length. If there are |
139 | | * no more tokens, NULL and 0 are returned. |
140 | | * |
141 | | * NOTE: this routine doesn't remove backslashes; the caller must do so |
142 | | * if necessary (see "debackslash"). |
143 | | * |
144 | | * NOTE: prior to release 7.0, this routine also had a special case to treat |
145 | | * a token starting with '"' as extending to the next '"'. This code was |
146 | | * broken, however, since it would fail to cope with a string containing an |
147 | | * embedded '"'. I have therefore removed this special case, and instead |
148 | | * introduced rules for using backslashes to quote characters. Higher-level |
149 | | * code should add backslashes to a string constant to ensure it is treated |
150 | | * as a single token. |
151 | | */ |
152 | | const char * |
153 | | pg_strtok(int *length) |
154 | 0 | { |
155 | 0 | const char *local_str; /* working pointer to string */ |
156 | 0 | const char *ret_str; /* start of token to return */ |
157 | |
|
158 | 0 | local_str = pg_strtok_ptr; |
159 | |
|
160 | 0 | while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t') |
161 | 0 | local_str++; |
162 | |
|
163 | 0 | if (*local_str == '\0') |
164 | 0 | { |
165 | 0 | *length = 0; |
166 | 0 | pg_strtok_ptr = local_str; |
167 | 0 | return NULL; /* no more tokens */ |
168 | 0 | } |
169 | | |
170 | | /* |
171 | | * Now pointing at start of next token. |
172 | | */ |
173 | 0 | ret_str = local_str; |
174 | |
|
175 | 0 | if (*local_str == '(' || *local_str == ')' || |
176 | 0 | *local_str == '{' || *local_str == '}') |
177 | 0 | { |
178 | | /* special 1-character token */ |
179 | 0 | local_str++; |
180 | 0 | } |
181 | 0 | else |
182 | 0 | { |
183 | | /* Normal token, possibly containing backslashes */ |
184 | 0 | while (*local_str != '\0' && |
185 | 0 | *local_str != ' ' && *local_str != '\n' && |
186 | 0 | *local_str != '\t' && |
187 | 0 | *local_str != '(' && *local_str != ')' && |
188 | 0 | *local_str != '{' && *local_str != '}') |
189 | 0 | { |
190 | 0 | if (*local_str == '\\' && local_str[1] != '\0') |
191 | 0 | local_str += 2; |
192 | 0 | else |
193 | 0 | local_str++; |
194 | 0 | } |
195 | 0 | } |
196 | |
|
197 | 0 | *length = local_str - ret_str; |
198 | | |
199 | | /* Recognize special case for "empty" token */ |
200 | 0 | if (*length == 2 && ret_str[0] == '<' && ret_str[1] == '>') |
201 | 0 | *length = 0; |
202 | |
|
203 | 0 | pg_strtok_ptr = local_str; |
204 | |
|
205 | 0 | return ret_str; |
206 | 0 | } |
207 | | |
208 | | /* |
209 | | * debackslash - |
210 | | * create a palloc'd string holding the given token. |
211 | | * any protective backslashes in the token are removed. |
212 | | */ |
213 | | char * |
214 | | debackslash(const char *token, int length) |
215 | 0 | { |
216 | 0 | char *result = palloc(length + 1); |
217 | 0 | char *ptr = result; |
218 | |
|
219 | 0 | while (length > 0) |
220 | 0 | { |
221 | 0 | if (*token == '\\' && length > 1) |
222 | 0 | token++, length--; |
223 | 0 | *ptr++ = *token++; |
224 | 0 | length--; |
225 | 0 | } |
226 | 0 | *ptr = '\0'; |
227 | 0 | return result; |
228 | 0 | } |
229 | | |
230 | 0 | #define RIGHT_PAREN (1000000 + 1) |
231 | 0 | #define LEFT_PAREN (1000000 + 2) |
232 | 0 | #define LEFT_BRACE (1000000 + 3) |
233 | 0 | #define OTHER_TOKEN (1000000 + 4) |
234 | | |
235 | | /* |
236 | | * nodeTokenType - |
237 | | * returns the type of the node token contained in token. |
238 | | * It returns one of the following valid NodeTags: |
239 | | * T_Integer, T_Float, T_Boolean, T_String, T_BitString |
240 | | * and some of its own: |
241 | | * RIGHT_PAREN, LEFT_PAREN, LEFT_BRACE, OTHER_TOKEN |
242 | | * |
243 | | * Assumption: the ascii representation is legal |
244 | | */ |
245 | | static NodeTag |
246 | | nodeTokenType(const char *token, int length) |
247 | 0 | { |
248 | 0 | NodeTag retval; |
249 | 0 | const char *numptr; |
250 | 0 | int numlen; |
251 | | |
252 | | /* |
253 | | * Check if the token is a number |
254 | | */ |
255 | 0 | numptr = token; |
256 | 0 | numlen = length; |
257 | 0 | if (*numptr == '+' || *numptr == '-') |
258 | 0 | numptr++, numlen--; |
259 | 0 | if ((numlen > 0 && isdigit((unsigned char) *numptr)) || |
260 | 0 | (numlen > 1 && *numptr == '.' && isdigit((unsigned char) numptr[1]))) |
261 | 0 | { |
262 | | /* |
263 | | * Yes. Figure out whether it is integral or float; this requires |
264 | | * both a syntax check and a range check. strtoint() can do both for |
265 | | * us. We know the token will end at a character that strtoint will |
266 | | * stop at, so we do not need to modify the string. |
267 | | */ |
268 | 0 | char *endptr; |
269 | |
|
270 | 0 | errno = 0; |
271 | 0 | (void) strtoint(numptr, &endptr, 10); |
272 | 0 | if (endptr != token + length || errno == ERANGE) |
273 | 0 | return T_Float; |
274 | 0 | return T_Integer; |
275 | 0 | } |
276 | | |
277 | | /* |
278 | | * these three cases do not need length checks, since pg_strtok() will |
279 | | * always treat them as single-byte tokens |
280 | | */ |
281 | 0 | else if (*token == '(') |
282 | 0 | retval = LEFT_PAREN; |
283 | 0 | else if (*token == ')') |
284 | 0 | retval = RIGHT_PAREN; |
285 | 0 | else if (*token == '{') |
286 | 0 | retval = LEFT_BRACE; |
287 | 0 | else if ((length == 4 && strncmp(token, "true", 4) == 0) || |
288 | 0 | (length == 5 && strncmp(token, "false", 5) == 0)) |
289 | 0 | retval = T_Boolean; |
290 | 0 | else if (*token == '"' && length > 1 && token[length - 1] == '"') |
291 | 0 | retval = T_String; |
292 | 0 | else if (*token == 'b' || *token == 'x') |
293 | 0 | retval = T_BitString; |
294 | 0 | else |
295 | 0 | retval = OTHER_TOKEN; |
296 | 0 | return retval; |
297 | 0 | } |
298 | | |
299 | | /* |
300 | | * nodeRead - |
301 | | * Slightly higher-level reader. |
302 | | * |
303 | | * This routine applies some semantic knowledge on top of the purely |
304 | | * lexical tokenizer pg_strtok(). It can read |
305 | | * * Value token nodes (integers, floats, booleans, or strings); |
306 | | * * General nodes (via parseNodeString() from readfuncs.c); |
307 | | * * Lists of the above; |
308 | | * * Lists of integers, OIDs, or TransactionIds. |
309 | | * The return value is declared void *, not Node *, to avoid having to |
310 | | * cast it explicitly in callers that assign to fields of different types. |
311 | | * |
312 | | * External callers should always pass NULL/0 for the arguments. Internally |
313 | | * a non-NULL token may be passed when the upper recursion level has already |
314 | | * scanned the first token of a node's representation. |
315 | | * |
316 | | * We assume pg_strtok is already initialized with a string to read (hence |
317 | | * this should only be invoked from within a stringToNode operation). |
318 | | */ |
319 | | void * |
320 | | nodeRead(const char *token, int tok_len) |
321 | 0 | { |
322 | 0 | Node *result; |
323 | 0 | NodeTag type; |
324 | |
|
325 | 0 | if (token == NULL) /* need to read a token? */ |
326 | 0 | { |
327 | 0 | token = pg_strtok(&tok_len); |
328 | |
|
329 | 0 | if (token == NULL) /* end of input */ |
330 | 0 | return NULL; |
331 | 0 | } |
332 | | |
333 | 0 | type = nodeTokenType(token, tok_len); |
334 | |
|
335 | 0 | switch ((int) type) |
336 | 0 | { |
337 | 0 | case LEFT_BRACE: |
338 | 0 | result = parseNodeString(); |
339 | 0 | token = pg_strtok(&tok_len); |
340 | 0 | if (token == NULL || token[0] != '}') |
341 | 0 | elog(ERROR, "did not find '}' at end of input node"); |
342 | 0 | break; |
343 | 0 | case LEFT_PAREN: |
344 | 0 | { |
345 | 0 | List *l = NIL; |
346 | | |
347 | | /*---------- |
348 | | * Could be an integer list: (i int int ...) |
349 | | * or an OID list: (o int int ...) |
350 | | * or an XID list: (x int int ...) |
351 | | * or a bitmapset: (b int int ...) |
352 | | * or a list of nodes/values: (node node ...) |
353 | | *---------- |
354 | | */ |
355 | 0 | token = pg_strtok(&tok_len); |
356 | 0 | if (token == NULL) |
357 | 0 | elog(ERROR, "unterminated List structure"); |
358 | 0 | if (tok_len == 1 && token[0] == 'i') |
359 | 0 | { |
360 | | /* List of integers */ |
361 | 0 | for (;;) |
362 | 0 | { |
363 | 0 | int val; |
364 | 0 | char *endptr; |
365 | |
|
366 | 0 | token = pg_strtok(&tok_len); |
367 | 0 | if (token == NULL) |
368 | 0 | elog(ERROR, "unterminated List structure"); |
369 | 0 | if (token[0] == ')') |
370 | 0 | break; |
371 | 0 | val = (int) strtol(token, &endptr, 10); |
372 | 0 | if (endptr != token + tok_len) |
373 | 0 | elog(ERROR, "unrecognized integer: \"%.*s\"", |
374 | 0 | tok_len, token); |
375 | 0 | l = lappend_int(l, val); |
376 | 0 | } |
377 | 0 | result = (Node *) l; |
378 | 0 | } |
379 | 0 | else if (tok_len == 1 && token[0] == 'o') |
380 | 0 | { |
381 | | /* List of OIDs */ |
382 | 0 | for (;;) |
383 | 0 | { |
384 | 0 | Oid val; |
385 | 0 | char *endptr; |
386 | |
|
387 | 0 | token = pg_strtok(&tok_len); |
388 | 0 | if (token == NULL) |
389 | 0 | elog(ERROR, "unterminated List structure"); |
390 | 0 | if (token[0] == ')') |
391 | 0 | break; |
392 | 0 | val = (Oid) strtoul(token, &endptr, 10); |
393 | 0 | if (endptr != token + tok_len) |
394 | 0 | elog(ERROR, "unrecognized OID: \"%.*s\"", |
395 | 0 | tok_len, token); |
396 | 0 | l = lappend_oid(l, val); |
397 | 0 | } |
398 | 0 | result = (Node *) l; |
399 | 0 | } |
400 | 0 | else if (tok_len == 1 && token[0] == 'x') |
401 | 0 | { |
402 | | /* List of TransactionIds */ |
403 | 0 | for (;;) |
404 | 0 | { |
405 | 0 | TransactionId val; |
406 | 0 | char *endptr; |
407 | |
|
408 | 0 | token = pg_strtok(&tok_len); |
409 | 0 | if (token == NULL) |
410 | 0 | elog(ERROR, "unterminated List structure"); |
411 | 0 | if (token[0] == ')') |
412 | 0 | break; |
413 | 0 | val = (TransactionId) strtoul(token, &endptr, 10); |
414 | 0 | if (endptr != token + tok_len) |
415 | 0 | elog(ERROR, "unrecognized Xid: \"%.*s\"", |
416 | 0 | tok_len, token); |
417 | 0 | l = lappend_xid(l, val); |
418 | 0 | } |
419 | 0 | result = (Node *) l; |
420 | 0 | } |
421 | 0 | else if (tok_len == 1 && token[0] == 'b') |
422 | 0 | { |
423 | | /* Bitmapset -- see also _readBitmapset() */ |
424 | 0 | Bitmapset *bms = NULL; |
425 | |
|
426 | 0 | for (;;) |
427 | 0 | { |
428 | 0 | int val; |
429 | 0 | char *endptr; |
430 | |
|
431 | 0 | token = pg_strtok(&tok_len); |
432 | 0 | if (token == NULL) |
433 | 0 | elog(ERROR, "unterminated Bitmapset structure"); |
434 | 0 | if (tok_len == 1 && token[0] == ')') |
435 | 0 | break; |
436 | 0 | val = (int) strtol(token, &endptr, 10); |
437 | 0 | if (endptr != token + tok_len) |
438 | 0 | elog(ERROR, "unrecognized integer: \"%.*s\"", |
439 | 0 | tok_len, token); |
440 | 0 | bms = bms_add_member(bms, val); |
441 | 0 | } |
442 | 0 | result = (Node *) bms; |
443 | 0 | } |
444 | 0 | else |
445 | 0 | { |
446 | | /* List of other node types */ |
447 | 0 | for (;;) |
448 | 0 | { |
449 | | /* We have already scanned next token... */ |
450 | 0 | if (token[0] == ')') |
451 | 0 | break; |
452 | 0 | l = lappend(l, nodeRead(token, tok_len)); |
453 | 0 | token = pg_strtok(&tok_len); |
454 | 0 | if (token == NULL) |
455 | 0 | elog(ERROR, "unterminated List structure"); |
456 | 0 | } |
457 | 0 | result = (Node *) l; |
458 | 0 | } |
459 | 0 | break; |
460 | 0 | } |
461 | 0 | case RIGHT_PAREN: |
462 | 0 | elog(ERROR, "unexpected right parenthesis"); |
463 | 0 | result = NULL; /* keep compiler happy */ |
464 | 0 | break; |
465 | 0 | case OTHER_TOKEN: |
466 | 0 | if (tok_len == 0) |
467 | 0 | { |
468 | | /* must be "<>" --- represents a null pointer */ |
469 | 0 | result = NULL; |
470 | 0 | } |
471 | 0 | else |
472 | 0 | { |
473 | 0 | elog(ERROR, "unrecognized token: \"%.*s\"", tok_len, token); |
474 | 0 | result = NULL; /* keep compiler happy */ |
475 | 0 | } |
476 | 0 | break; |
477 | 0 | case T_Integer: |
478 | | |
479 | | /* |
480 | | * we know that the token terminates on a char atoi will stop at |
481 | | */ |
482 | 0 | result = (Node *) makeInteger(atoi(token)); |
483 | 0 | break; |
484 | 0 | case T_Float: |
485 | 0 | { |
486 | 0 | char *fval = (char *) palloc(tok_len + 1); |
487 | |
|
488 | 0 | memcpy(fval, token, tok_len); |
489 | 0 | fval[tok_len] = '\0'; |
490 | 0 | result = (Node *) makeFloat(fval); |
491 | 0 | } |
492 | 0 | break; |
493 | 0 | case T_Boolean: |
494 | 0 | result = (Node *) makeBoolean(token[0] == 't'); |
495 | 0 | break; |
496 | 0 | case T_String: |
497 | | /* need to remove leading and trailing quotes, and backslashes */ |
498 | 0 | result = (Node *) makeString(debackslash(token + 1, tok_len - 2)); |
499 | 0 | break; |
500 | 0 | case T_BitString: |
501 | | /* need to remove backslashes, but there are no quotes */ |
502 | 0 | result = (Node *) makeBitString(debackslash(token, tok_len)); |
503 | 0 | break; |
504 | 0 | default: |
505 | 0 | elog(ERROR, "unrecognized node type: %d", (int) type); |
506 | 0 | result = NULL; /* keep compiler happy */ |
507 | 0 | break; |
508 | 0 | } |
509 | | |
510 | 0 | return result; |
511 | 0 | } |