/src/postgres/src/backend/utils/adt/tsquery.c
Line | Count | Source |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * tsquery.c |
4 | | * I/O functions for tsquery |
5 | | * |
6 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
7 | | * |
8 | | * |
9 | | * IDENTIFICATION |
10 | | * src/backend/utils/adt/tsquery.c |
11 | | * |
12 | | *------------------------------------------------------------------------- |
13 | | */ |
14 | | |
15 | | #include "postgres.h" |
16 | | |
17 | | #include "libpq/pqformat.h" |
18 | | #include "miscadmin.h" |
19 | | #include "nodes/miscnodes.h" |
20 | | #include "tsearch/ts_locale.h" |
21 | | #include "tsearch/ts_type.h" |
22 | | #include "tsearch/ts_utils.h" |
23 | | #include "utils/builtins.h" |
24 | | #include "utils/memutils.h" |
25 | | #include "utils/pg_crc.h" |
26 | | #include "varatt.h" |
27 | | |
28 | | /* FTS operator priorities, see ts_type.h */ |
29 | | const int tsearch_op_priority[OP_COUNT] = |
30 | | { |
31 | | 4, /* OP_NOT */ |
32 | | 2, /* OP_AND */ |
33 | | 1, /* OP_OR */ |
34 | | 3 /* OP_PHRASE */ |
35 | | }; |
36 | | |
37 | | /* |
38 | | * parser's states |
39 | | */ |
40 | | typedef enum |
41 | | { |
42 | | WAITOPERAND = 1, |
43 | | WAITOPERATOR = 2, |
44 | | WAITFIRSTOPERAND = 3, |
45 | | } ts_parserstate; |
46 | | |
47 | | /* |
48 | | * token types for parsing |
49 | | */ |
50 | | typedef enum |
51 | | { |
52 | | PT_END = 0, |
53 | | PT_ERR = 1, |
54 | | PT_VAL = 2, |
55 | | PT_OPR = 3, |
56 | | PT_OPEN = 4, |
57 | | PT_CLOSE = 5, |
58 | | } ts_tokentype; |
59 | | |
60 | | /* |
61 | | * get token from query string |
62 | | * |
63 | | * All arguments except "state" are output arguments. |
64 | | * |
65 | | * If return value is PT_OPR, then *operator is filled with an OP_* code |
66 | | * and *weight will contain a distance value in case of phrase operator. |
67 | | * |
68 | | * If return value is PT_VAL, then *lenval, *strval, *weight, and *prefix |
69 | | * are filled. |
70 | | * |
71 | | * If PT_ERR is returned then a soft error has occurred. If state->escontext |
72 | | * isn't already filled then this should be reported as a generic parse error. |
73 | | */ |
74 | | typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator, |
75 | | int *lenval, char **strval, |
76 | | int16 *weight, bool *prefix); |
77 | | |
78 | | struct TSQueryParserStateData |
79 | | { |
80 | | /* Tokenizer used for parsing tsquery */ |
81 | | ts_tokenizer gettoken; |
82 | | |
83 | | /* State of tokenizer function */ |
84 | | char *buffer; /* entire string we are scanning */ |
85 | | char *buf; /* current scan point */ |
86 | | int count; /* nesting count, incremented by (, |
87 | | * decremented by ) */ |
88 | | ts_parserstate state; |
89 | | |
90 | | /* polish (prefix) notation in list, filled in by push* functions */ |
91 | | List *polstr; |
92 | | |
93 | | /* |
94 | | * Strings from operands are collected in op. curop is a pointer to the |
95 | | * end of used space of op. |
96 | | */ |
97 | | char *op; |
98 | | char *curop; |
99 | | int lenop; /* allocated size of op */ |
100 | | int sumlen; /* used size of op */ |
101 | | |
102 | | /* state for value's parser */ |
103 | | TSVectorParseState valstate; |
104 | | |
105 | | /* context object for soft errors - must match valstate's escontext */ |
106 | | Node *escontext; |
107 | | }; |
108 | | |
109 | | /* |
110 | | * subroutine to parse the modifiers (weight and prefix flag currently) |
111 | | * part, like ':AB*' of a query. |
112 | | */ |
113 | | static char * |
114 | | get_modifiers(char *buf, int16 *weight, bool *prefix) |
115 | 0 | { |
116 | 0 | *weight = 0; |
117 | 0 | *prefix = false; |
118 | |
|
119 | 0 | if (!t_iseq(buf, ':')) |
120 | 0 | return buf; |
121 | | |
122 | 0 | buf++; |
123 | 0 | while (*buf && pg_mblen(buf) == 1) |
124 | 0 | { |
125 | 0 | switch (*buf) |
126 | 0 | { |
127 | 0 | case 'a': |
128 | 0 | case 'A': |
129 | 0 | *weight |= 1 << 3; |
130 | 0 | break; |
131 | 0 | case 'b': |
132 | 0 | case 'B': |
133 | 0 | *weight |= 1 << 2; |
134 | 0 | break; |
135 | 0 | case 'c': |
136 | 0 | case 'C': |
137 | 0 | *weight |= 1 << 1; |
138 | 0 | break; |
139 | 0 | case 'd': |
140 | 0 | case 'D': |
141 | 0 | *weight |= 1; |
142 | 0 | break; |
143 | 0 | case '*': |
144 | 0 | *prefix = true; |
145 | 0 | break; |
146 | 0 | default: |
147 | 0 | return buf; |
148 | 0 | } |
149 | 0 | buf++; |
150 | 0 | } |
151 | | |
152 | 0 | return buf; |
153 | 0 | } |
154 | | |
155 | | /* |
156 | | * Parse phrase operator. The operator |
157 | | * may take the following forms: |
158 | | * |
159 | | * a <N> b (distance is exactly N lexemes) |
160 | | * a <-> b (default distance = 1) |
161 | | * |
162 | | * The buffer should begin with '<' char |
163 | | */ |
164 | | static bool |
165 | | parse_phrase_operator(TSQueryParserState pstate, int16 *distance) |
166 | 0 | { |
167 | 0 | enum |
168 | 0 | { |
169 | 0 | PHRASE_OPEN = 0, |
170 | 0 | PHRASE_DIST, |
171 | 0 | PHRASE_CLOSE, |
172 | 0 | PHRASE_FINISH |
173 | 0 | } state = PHRASE_OPEN; |
174 | 0 | char *ptr = pstate->buf; |
175 | 0 | char *endptr; |
176 | 0 | long l = 1; /* default distance */ |
177 | |
|
178 | 0 | while (*ptr) |
179 | 0 | { |
180 | 0 | switch (state) |
181 | 0 | { |
182 | 0 | case PHRASE_OPEN: |
183 | 0 | if (t_iseq(ptr, '<')) |
184 | 0 | { |
185 | 0 | state = PHRASE_DIST; |
186 | 0 | ptr++; |
187 | 0 | } |
188 | 0 | else |
189 | 0 | return false; |
190 | 0 | break; |
191 | | |
192 | 0 | case PHRASE_DIST: |
193 | 0 | if (t_iseq(ptr, '-')) |
194 | 0 | { |
195 | 0 | state = PHRASE_CLOSE; |
196 | 0 | ptr++; |
197 | 0 | continue; |
198 | 0 | } |
199 | | |
200 | 0 | if (!isdigit((unsigned char) *ptr)) |
201 | 0 | return false; |
202 | | |
203 | 0 | errno = 0; |
204 | 0 | l = strtol(ptr, &endptr, 10); |
205 | 0 | if (ptr == endptr) |
206 | 0 | return false; |
207 | 0 | else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS) |
208 | 0 | ereturn(pstate->escontext, false, |
209 | 0 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
210 | 0 | errmsg("distance in phrase operator must be an integer value between zero and %d inclusive", |
211 | 0 | MAXENTRYPOS))); |
212 | 0 | else |
213 | 0 | { |
214 | 0 | state = PHRASE_CLOSE; |
215 | 0 | ptr = endptr; |
216 | 0 | } |
217 | 0 | break; |
218 | | |
219 | 0 | case PHRASE_CLOSE: |
220 | 0 | if (t_iseq(ptr, '>')) |
221 | 0 | { |
222 | 0 | state = PHRASE_FINISH; |
223 | 0 | ptr++; |
224 | 0 | } |
225 | 0 | else |
226 | 0 | return false; |
227 | 0 | break; |
228 | | |
229 | 0 | case PHRASE_FINISH: |
230 | 0 | *distance = (int16) l; |
231 | 0 | pstate->buf = ptr; |
232 | 0 | return true; |
233 | 0 | } |
234 | 0 | } |
235 | | |
236 | 0 | return false; |
237 | 0 | } |
238 | | |
239 | | /* |
240 | | * Parse OR operator used in websearch_to_tsquery(), returns true if we |
241 | | * believe that "OR" literal could be an operator OR |
242 | | */ |
243 | | static bool |
244 | | parse_or_operator(TSQueryParserState pstate) |
245 | 0 | { |
246 | 0 | char *ptr = pstate->buf; |
247 | | |
248 | | /* it should begin with "OR" literal */ |
249 | 0 | if (pg_strncasecmp(ptr, "or", 2) != 0) |
250 | 0 | return false; |
251 | | |
252 | 0 | ptr += 2; |
253 | | |
254 | | /* |
255 | | * it shouldn't be a part of any word but somewhere later it should be |
256 | | * some operand |
257 | | */ |
258 | 0 | if (*ptr == '\0') /* no operand */ |
259 | 0 | return false; |
260 | | |
261 | | /* it shouldn't be a part of any word */ |
262 | 0 | if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum(ptr)) |
263 | 0 | return false; |
264 | | |
265 | 0 | for (;;) |
266 | 0 | { |
267 | 0 | ptr += pg_mblen(ptr); |
268 | |
|
269 | 0 | if (*ptr == '\0') /* got end of string without operand */ |
270 | 0 | return false; |
271 | | |
272 | | /* |
273 | | * Suppose, we found an operand, but could be a not correct operand. |
274 | | * So we still treat OR literal as operation with possibly incorrect |
275 | | * operand and will not search it as lexeme |
276 | | */ |
277 | 0 | if (!isspace((unsigned char) *ptr)) |
278 | 0 | break; |
279 | 0 | } |
280 | | |
281 | 0 | pstate->buf += 2; |
282 | 0 | return true; |
283 | 0 | } |
284 | | |
285 | | static ts_tokentype |
286 | | gettoken_query_standard(TSQueryParserState state, int8 *operator, |
287 | | int *lenval, char **strval, |
288 | | int16 *weight, bool *prefix) |
289 | 0 | { |
290 | 0 | *weight = 0; |
291 | 0 | *prefix = false; |
292 | |
|
293 | 0 | while (true) |
294 | 0 | { |
295 | 0 | switch (state->state) |
296 | 0 | { |
297 | 0 | case WAITFIRSTOPERAND: |
298 | 0 | case WAITOPERAND: |
299 | 0 | if (t_iseq(state->buf, '!')) |
300 | 0 | { |
301 | 0 | state->buf++; |
302 | 0 | state->state = WAITOPERAND; |
303 | 0 | *operator = OP_NOT; |
304 | 0 | return PT_OPR; |
305 | 0 | } |
306 | 0 | else if (t_iseq(state->buf, '(')) |
307 | 0 | { |
308 | 0 | state->buf++; |
309 | 0 | state->state = WAITOPERAND; |
310 | 0 | state->count++; |
311 | 0 | return PT_OPEN; |
312 | 0 | } |
313 | 0 | else if (t_iseq(state->buf, ':')) |
314 | 0 | { |
315 | | /* generic syntax error message is fine */ |
316 | 0 | return PT_ERR; |
317 | 0 | } |
318 | 0 | else if (!isspace((unsigned char) *state->buf)) |
319 | 0 | { |
320 | | /* |
321 | | * We rely on the tsvector parser to parse the value for |
322 | | * us |
323 | | */ |
324 | 0 | reset_tsvector_parser(state->valstate, state->buf); |
325 | 0 | if (gettoken_tsvector(state->valstate, strval, lenval, |
326 | 0 | NULL, NULL, &state->buf)) |
327 | 0 | { |
328 | 0 | state->buf = get_modifiers(state->buf, weight, prefix); |
329 | 0 | state->state = WAITOPERATOR; |
330 | 0 | return PT_VAL; |
331 | 0 | } |
332 | 0 | else if (SOFT_ERROR_OCCURRED(state->escontext)) |
333 | 0 | { |
334 | | /* gettoken_tsvector reported a soft error */ |
335 | 0 | return PT_ERR; |
336 | 0 | } |
337 | 0 | else if (state->state == WAITFIRSTOPERAND) |
338 | 0 | { |
339 | 0 | return PT_END; |
340 | 0 | } |
341 | 0 | else |
342 | 0 | ereturn(state->escontext, PT_ERR, |
343 | 0 | (errcode(ERRCODE_SYNTAX_ERROR), |
344 | 0 | errmsg("no operand in tsquery: \"%s\"", |
345 | 0 | state->buffer))); |
346 | 0 | } |
347 | 0 | break; |
348 | | |
349 | 0 | case WAITOPERATOR: |
350 | 0 | if (t_iseq(state->buf, '&')) |
351 | 0 | { |
352 | 0 | state->buf++; |
353 | 0 | state->state = WAITOPERAND; |
354 | 0 | *operator = OP_AND; |
355 | 0 | return PT_OPR; |
356 | 0 | } |
357 | 0 | else if (t_iseq(state->buf, '|')) |
358 | 0 | { |
359 | 0 | state->buf++; |
360 | 0 | state->state = WAITOPERAND; |
361 | 0 | *operator = OP_OR; |
362 | 0 | return PT_OPR; |
363 | 0 | } |
364 | 0 | else if (parse_phrase_operator(state, weight)) |
365 | 0 | { |
366 | | /* weight var is used as storage for distance */ |
367 | 0 | state->state = WAITOPERAND; |
368 | 0 | *operator = OP_PHRASE; |
369 | 0 | return PT_OPR; |
370 | 0 | } |
371 | 0 | else if (SOFT_ERROR_OCCURRED(state->escontext)) |
372 | 0 | { |
373 | | /* parse_phrase_operator reported a soft error */ |
374 | 0 | return PT_ERR; |
375 | 0 | } |
376 | 0 | else if (t_iseq(state->buf, ')')) |
377 | 0 | { |
378 | 0 | state->buf++; |
379 | 0 | state->count--; |
380 | 0 | return (state->count < 0) ? PT_ERR : PT_CLOSE; |
381 | 0 | } |
382 | 0 | else if (*state->buf == '\0') |
383 | 0 | { |
384 | 0 | return (state->count) ? PT_ERR : PT_END; |
385 | 0 | } |
386 | 0 | else if (!isspace((unsigned char) *state->buf)) |
387 | 0 | { |
388 | 0 | return PT_ERR; |
389 | 0 | } |
390 | 0 | break; |
391 | 0 | } |
392 | | |
393 | 0 | state->buf += pg_mblen(state->buf); |
394 | 0 | } |
395 | 0 | } |
396 | | |
397 | | static ts_tokentype |
398 | | gettoken_query_websearch(TSQueryParserState state, int8 *operator, |
399 | | int *lenval, char **strval, |
400 | | int16 *weight, bool *prefix) |
401 | 0 | { |
402 | 0 | *weight = 0; |
403 | 0 | *prefix = false; |
404 | |
|
405 | 0 | while (true) |
406 | 0 | { |
407 | 0 | switch (state->state) |
408 | 0 | { |
409 | 0 | case WAITFIRSTOPERAND: |
410 | 0 | case WAITOPERAND: |
411 | 0 | if (t_iseq(state->buf, '-')) |
412 | 0 | { |
413 | 0 | state->buf++; |
414 | 0 | state->state = WAITOPERAND; |
415 | |
|
416 | 0 | *operator = OP_NOT; |
417 | 0 | return PT_OPR; |
418 | 0 | } |
419 | 0 | else if (t_iseq(state->buf, '"')) |
420 | 0 | { |
421 | | /* Everything in quotes is processed as a single token */ |
422 | | |
423 | | /* skip opening quote */ |
424 | 0 | state->buf++; |
425 | 0 | *strval = state->buf; |
426 | | |
427 | | /* iterate to the closing quote or end of the string */ |
428 | 0 | while (*state->buf != '\0' && !t_iseq(state->buf, '"')) |
429 | 0 | state->buf++; |
430 | 0 | *lenval = state->buf - *strval; |
431 | | |
432 | | /* skip closing quote if not end of the string */ |
433 | 0 | if (*state->buf != '\0') |
434 | 0 | state->buf++; |
435 | |
|
436 | 0 | state->state = WAITOPERATOR; |
437 | 0 | state->count++; |
438 | 0 | return PT_VAL; |
439 | 0 | } |
440 | 0 | else if (ISOPERATOR(state->buf)) |
441 | 0 | { |
442 | | /* ignore, else gettoken_tsvector() will raise an error */ |
443 | 0 | state->buf++; |
444 | 0 | state->state = WAITOPERAND; |
445 | 0 | continue; |
446 | 0 | } |
447 | 0 | else if (!isspace((unsigned char) *state->buf)) |
448 | 0 | { |
449 | | /* |
450 | | * We rely on the tsvector parser to parse the value for |
451 | | * us |
452 | | */ |
453 | 0 | reset_tsvector_parser(state->valstate, state->buf); |
454 | 0 | if (gettoken_tsvector(state->valstate, strval, lenval, |
455 | 0 | NULL, NULL, &state->buf)) |
456 | 0 | { |
457 | 0 | state->state = WAITOPERATOR; |
458 | 0 | return PT_VAL; |
459 | 0 | } |
460 | 0 | else if (SOFT_ERROR_OCCURRED(state->escontext)) |
461 | 0 | { |
462 | | /* gettoken_tsvector reported a soft error */ |
463 | 0 | return PT_ERR; |
464 | 0 | } |
465 | 0 | else if (state->state == WAITFIRSTOPERAND) |
466 | 0 | { |
467 | 0 | return PT_END; |
468 | 0 | } |
469 | 0 | else |
470 | 0 | { |
471 | | /* finally, we have to provide an operand */ |
472 | 0 | pushStop(state); |
473 | 0 | return PT_END; |
474 | 0 | } |
475 | 0 | } |
476 | 0 | break; |
477 | | |
478 | 0 | case WAITOPERATOR: |
479 | 0 | if (*state->buf == '\0') |
480 | 0 | { |
481 | 0 | return PT_END; |
482 | 0 | } |
483 | 0 | else if (parse_or_operator(state)) |
484 | 0 | { |
485 | 0 | state->state = WAITOPERAND; |
486 | 0 | *operator = OP_OR; |
487 | 0 | return PT_OPR; |
488 | 0 | } |
489 | 0 | else if (ISOPERATOR(state->buf)) |
490 | 0 | { |
491 | | /* ignore other operators in this state too */ |
492 | 0 | state->buf++; |
493 | 0 | continue; |
494 | 0 | } |
495 | 0 | else if (!isspace((unsigned char) *state->buf)) |
496 | 0 | { |
497 | | /* insert implicit AND between operands */ |
498 | 0 | state->state = WAITOPERAND; |
499 | 0 | *operator = OP_AND; |
500 | 0 | return PT_OPR; |
501 | 0 | } |
502 | 0 | break; |
503 | 0 | } |
504 | | |
505 | 0 | state->buf += pg_mblen(state->buf); |
506 | 0 | } |
507 | 0 | } |
508 | | |
509 | | static ts_tokentype |
510 | | gettoken_query_plain(TSQueryParserState state, int8 *operator, |
511 | | int *lenval, char **strval, |
512 | | int16 *weight, bool *prefix) |
513 | 0 | { |
514 | 0 | *weight = 0; |
515 | 0 | *prefix = false; |
516 | |
|
517 | 0 | if (*state->buf == '\0') |
518 | 0 | return PT_END; |
519 | | |
520 | 0 | *strval = state->buf; |
521 | 0 | *lenval = strlen(state->buf); |
522 | 0 | state->buf += *lenval; |
523 | 0 | state->count++; |
524 | 0 | return PT_VAL; |
525 | 0 | } |
526 | | |
527 | | /* |
528 | | * Push an operator to state->polstr |
529 | | */ |
530 | | void |
531 | | pushOperator(TSQueryParserState state, int8 oper, int16 distance) |
532 | 0 | { |
533 | 0 | QueryOperator *tmp; |
534 | |
|
535 | 0 | Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE); |
536 | |
|
537 | 0 | tmp = (QueryOperator *) palloc0(sizeof(QueryOperator)); |
538 | 0 | tmp->type = QI_OPR; |
539 | 0 | tmp->oper = oper; |
540 | 0 | tmp->distance = (oper == OP_PHRASE) ? distance : 0; |
541 | | /* left is filled in later with findoprnd */ |
542 | |
|
543 | 0 | state->polstr = lcons(tmp, state->polstr); |
544 | 0 | } |
545 | | |
546 | | static void |
547 | | pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix) |
548 | 0 | { |
549 | 0 | QueryOperand *tmp; |
550 | |
|
551 | 0 | if (distance >= MAXSTRPOS) |
552 | 0 | ereturn(state->escontext,, |
553 | 0 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
554 | 0 | errmsg("value is too big in tsquery: \"%s\"", |
555 | 0 | state->buffer))); |
556 | 0 | if (lenval >= MAXSTRLEN) |
557 | 0 | ereturn(state->escontext,, |
558 | 0 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
559 | 0 | errmsg("operand is too long in tsquery: \"%s\"", |
560 | 0 | state->buffer))); |
561 | | |
562 | 0 | tmp = (QueryOperand *) palloc0(sizeof(QueryOperand)); |
563 | 0 | tmp->type = QI_VAL; |
564 | 0 | tmp->weight = weight; |
565 | 0 | tmp->prefix = prefix; |
566 | 0 | tmp->valcrc = (int32) valcrc; |
567 | 0 | tmp->length = lenval; |
568 | 0 | tmp->distance = distance; |
569 | |
|
570 | 0 | state->polstr = lcons(tmp, state->polstr); |
571 | 0 | } |
572 | | |
573 | | /* |
574 | | * Push an operand to state->polstr. |
575 | | * |
576 | | * strval must point to a string equal to state->curop. lenval is the length |
577 | | * of the string. |
578 | | */ |
579 | | void |
580 | | pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix) |
581 | 0 | { |
582 | 0 | pg_crc32 valcrc; |
583 | |
|
584 | 0 | if (lenval >= MAXSTRLEN) |
585 | 0 | ereturn(state->escontext,, |
586 | 0 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
587 | 0 | errmsg("word is too long in tsquery: \"%s\"", |
588 | 0 | state->buffer))); |
589 | | |
590 | 0 | INIT_LEGACY_CRC32(valcrc); |
591 | 0 | COMP_LEGACY_CRC32(valcrc, strval, lenval); |
592 | 0 | FIN_LEGACY_CRC32(valcrc); |
593 | 0 | pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix); |
594 | | |
595 | | /* append the value string to state.op, enlarging buffer if needed first */ |
596 | 0 | while (state->curop - state->op + lenval + 1 >= state->lenop) |
597 | 0 | { |
598 | 0 | int used = state->curop - state->op; |
599 | |
|
600 | 0 | state->lenop *= 2; |
601 | 0 | state->op = (char *) repalloc(state->op, state->lenop); |
602 | 0 | state->curop = state->op + used; |
603 | 0 | } |
604 | 0 | memcpy(state->curop, strval, lenval); |
605 | 0 | state->curop += lenval; |
606 | 0 | *(state->curop) = '\0'; |
607 | 0 | state->curop++; |
608 | 0 | state->sumlen += lenval + 1 /* \0 */ ; |
609 | 0 | } |
610 | | |
611 | | |
612 | | /* |
613 | | * Push a stopword placeholder to state->polstr |
614 | | */ |
615 | | void |
616 | | pushStop(TSQueryParserState state) |
617 | 0 | { |
618 | 0 | QueryOperand *tmp; |
619 | |
|
620 | 0 | tmp = (QueryOperand *) palloc0(sizeof(QueryOperand)); |
621 | 0 | tmp->type = QI_VALSTOP; |
622 | |
|
623 | 0 | state->polstr = lcons(tmp, state->polstr); |
624 | 0 | } |
625 | | |
626 | | |
627 | 0 | #define STACKDEPTH 32 |
628 | | |
629 | | typedef struct OperatorElement |
630 | | { |
631 | | int8 op; |
632 | | int16 distance; |
633 | | } OperatorElement; |
634 | | |
635 | | static void |
636 | | pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance) |
637 | 0 | { |
638 | 0 | if (*lenstack == STACKDEPTH) /* internal error */ |
639 | 0 | elog(ERROR, "tsquery stack too small"); |
640 | | |
641 | 0 | stack[*lenstack].op = op; |
642 | 0 | stack[*lenstack].distance = distance; |
643 | |
|
644 | 0 | (*lenstack)++; |
645 | 0 | } |
646 | | |
647 | | static void |
648 | | cleanOpStack(TSQueryParserState state, |
649 | | OperatorElement *stack, int *lenstack, int8 op) |
650 | 0 | { |
651 | 0 | int opPriority = OP_PRIORITY(op); |
652 | |
|
653 | 0 | while (*lenstack) |
654 | 0 | { |
655 | | /* NOT is right associative unlike to others */ |
656 | 0 | if ((op != OP_NOT && opPriority > OP_PRIORITY(stack[*lenstack - 1].op)) || |
657 | 0 | (op == OP_NOT && opPriority >= OP_PRIORITY(stack[*lenstack - 1].op))) |
658 | 0 | break; |
659 | | |
660 | 0 | (*lenstack)--; |
661 | 0 | pushOperator(state, stack[*lenstack].op, |
662 | 0 | stack[*lenstack].distance); |
663 | 0 | } |
664 | 0 | } |
665 | | |
666 | | /* |
667 | | * Make polish (prefix) notation of query. |
668 | | * |
669 | | * See parse_tsquery for explanation of pushval. |
670 | | */ |
671 | | static void |
672 | | makepol(TSQueryParserState state, |
673 | | PushFunction pushval, |
674 | | Datum opaque) |
675 | 0 | { |
676 | 0 | int8 operator = 0; |
677 | 0 | ts_tokentype type; |
678 | 0 | int lenval = 0; |
679 | 0 | char *strval = NULL; |
680 | 0 | OperatorElement opstack[STACKDEPTH]; |
681 | 0 | int lenstack = 0; |
682 | 0 | int16 weight = 0; |
683 | 0 | bool prefix; |
684 | | |
685 | | /* since this function recurses, it could be driven to stack overflow */ |
686 | 0 | check_stack_depth(); |
687 | |
|
688 | 0 | while ((type = state->gettoken(state, &operator, |
689 | 0 | &lenval, &strval, |
690 | 0 | &weight, &prefix)) != PT_END) |
691 | 0 | { |
692 | 0 | switch (type) |
693 | 0 | { |
694 | 0 | case PT_VAL: |
695 | 0 | pushval(opaque, state, strval, lenval, weight, prefix); |
696 | 0 | break; |
697 | 0 | case PT_OPR: |
698 | 0 | cleanOpStack(state, opstack, &lenstack, operator); |
699 | 0 | pushOpStack(opstack, &lenstack, operator, weight); |
700 | 0 | break; |
701 | 0 | case PT_OPEN: |
702 | 0 | makepol(state, pushval, opaque); |
703 | 0 | break; |
704 | 0 | case PT_CLOSE: |
705 | 0 | cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ ); |
706 | 0 | return; |
707 | 0 | case PT_ERR: |
708 | 0 | default: |
709 | | /* don't overwrite a soft error saved by gettoken function */ |
710 | 0 | if (!SOFT_ERROR_OCCURRED(state->escontext)) |
711 | 0 | errsave(state->escontext, |
712 | 0 | (errcode(ERRCODE_SYNTAX_ERROR), |
713 | 0 | errmsg("syntax error in tsquery: \"%s\"", |
714 | 0 | state->buffer))); |
715 | 0 | return; |
716 | 0 | } |
717 | | /* detect soft error in pushval or recursion */ |
718 | 0 | if (SOFT_ERROR_OCCURRED(state->escontext)) |
719 | 0 | return; |
720 | 0 | } |
721 | | |
722 | 0 | cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ ); |
723 | 0 | } |
724 | | |
725 | | static void |
726 | | findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup) |
727 | 0 | { |
728 | | /* since this function recurses, it could be driven to stack overflow. */ |
729 | 0 | check_stack_depth(); |
730 | |
|
731 | 0 | if (*pos >= nnodes) |
732 | 0 | elog(ERROR, "malformed tsquery: operand not found"); |
733 | | |
734 | 0 | if (ptr[*pos].type == QI_VAL) |
735 | 0 | { |
736 | 0 | (*pos)++; |
737 | 0 | } |
738 | 0 | else if (ptr[*pos].type == QI_VALSTOP) |
739 | 0 | { |
740 | 0 | *needcleanup = true; /* we'll have to remove stop words */ |
741 | 0 | (*pos)++; |
742 | 0 | } |
743 | 0 | else |
744 | 0 | { |
745 | 0 | Assert(ptr[*pos].type == QI_OPR); |
746 | |
|
747 | 0 | if (ptr[*pos].qoperator.oper == OP_NOT) |
748 | 0 | { |
749 | 0 | ptr[*pos].qoperator.left = 1; /* fixed offset */ |
750 | 0 | (*pos)++; |
751 | | |
752 | | /* process the only argument */ |
753 | 0 | findoprnd_recurse(ptr, pos, nnodes, needcleanup); |
754 | 0 | } |
755 | 0 | else |
756 | 0 | { |
757 | 0 | QueryOperator *curitem = &ptr[*pos].qoperator; |
758 | 0 | int tmp = *pos; /* save current position */ |
759 | |
|
760 | 0 | Assert(curitem->oper == OP_AND || |
761 | 0 | curitem->oper == OP_OR || |
762 | 0 | curitem->oper == OP_PHRASE); |
763 | |
|
764 | 0 | (*pos)++; |
765 | | |
766 | | /* process RIGHT argument */ |
767 | 0 | findoprnd_recurse(ptr, pos, nnodes, needcleanup); |
768 | |
|
769 | 0 | curitem->left = *pos - tmp; /* set LEFT arg's offset */ |
770 | | |
771 | | /* process LEFT argument */ |
772 | 0 | findoprnd_recurse(ptr, pos, nnodes, needcleanup); |
773 | 0 | } |
774 | 0 | } |
775 | 0 | } |
776 | | |
777 | | |
778 | | /* |
779 | | * Fill in the left-fields previously left unfilled. |
780 | | * The input QueryItems must be in polish (prefix) notation. |
781 | | * Also, set *needcleanup to true if there are any QI_VALSTOP nodes. |
782 | | */ |
783 | | static void |
784 | | findoprnd(QueryItem *ptr, int size, bool *needcleanup) |
785 | 0 | { |
786 | 0 | uint32 pos; |
787 | |
|
788 | 0 | *needcleanup = false; |
789 | 0 | pos = 0; |
790 | 0 | findoprnd_recurse(ptr, &pos, size, needcleanup); |
791 | |
|
792 | 0 | if (pos != size) |
793 | 0 | elog(ERROR, "malformed tsquery: extra nodes"); |
794 | 0 | } |
795 | | |
796 | | |
797 | | /* |
798 | | * Parse the tsquery stored in "buf". |
799 | | * |
800 | | * Each value (operand) in the query is passed to pushval. pushval can |
801 | | * transform the simple value to an arbitrarily complex expression using |
802 | | * pushValue and pushOperator. It must push a single value with pushValue, |
803 | | * a complete expression with all operands, or a stopword placeholder |
804 | | * with pushStop, otherwise the prefix notation representation will be broken, |
805 | | * having an operator with no operand. |
806 | | * |
807 | | * opaque is passed on to pushval as is, pushval can use it to store its |
808 | | * private state. |
809 | | * |
810 | | * The pushval function can record soft errors via escontext. |
811 | | * Callers must check SOFT_ERROR_OCCURRED to detect that. |
812 | | * |
813 | | * A bitmask of flags (see ts_utils.h) and an error context object |
814 | | * can be provided as well. If a soft error occurs, NULL is returned. |
815 | | */ |
816 | | TSQuery |
817 | | parse_tsquery(char *buf, |
818 | | PushFunction pushval, |
819 | | Datum opaque, |
820 | | int flags, |
821 | | Node *escontext) |
822 | 0 | { |
823 | 0 | struct TSQueryParserStateData state; |
824 | 0 | int i; |
825 | 0 | TSQuery query; |
826 | 0 | int commonlen; |
827 | 0 | QueryItem *ptr; |
828 | 0 | ListCell *cell; |
829 | 0 | bool noisy; |
830 | 0 | bool needcleanup; |
831 | 0 | int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY; |
832 | | |
833 | | /* plain should not be used with web */ |
834 | 0 | Assert((flags & (P_TSQ_PLAIN | P_TSQ_WEB)) != (P_TSQ_PLAIN | P_TSQ_WEB)); |
835 | | |
836 | | /* select suitable tokenizer */ |
837 | 0 | if (flags & P_TSQ_PLAIN) |
838 | 0 | state.gettoken = gettoken_query_plain; |
839 | 0 | else if (flags & P_TSQ_WEB) |
840 | 0 | { |
841 | 0 | state.gettoken = gettoken_query_websearch; |
842 | 0 | tsv_flags |= P_TSV_IS_WEB; |
843 | 0 | } |
844 | 0 | else |
845 | 0 | state.gettoken = gettoken_query_standard; |
846 | | |
847 | | /* emit nuisance NOTICEs only if not doing soft errors */ |
848 | 0 | noisy = !(escontext && IsA(escontext, ErrorSaveContext)); |
849 | | |
850 | | /* init state */ |
851 | 0 | state.buffer = buf; |
852 | 0 | state.buf = buf; |
853 | 0 | state.count = 0; |
854 | 0 | state.state = WAITFIRSTOPERAND; |
855 | 0 | state.polstr = NIL; |
856 | 0 | state.escontext = escontext; |
857 | | |
858 | | /* init value parser's state */ |
859 | 0 | state.valstate = init_tsvector_parser(state.buffer, tsv_flags, escontext); |
860 | | |
861 | | /* init list of operand */ |
862 | 0 | state.sumlen = 0; |
863 | 0 | state.lenop = 64; |
864 | 0 | state.curop = state.op = (char *) palloc(state.lenop); |
865 | 0 | *(state.curop) = '\0'; |
866 | | |
867 | | /* parse query & make polish notation (postfix, but in reverse order) */ |
868 | 0 | makepol(&state, pushval, opaque); |
869 | |
|
870 | 0 | close_tsvector_parser(state.valstate); |
871 | |
|
872 | 0 | if (SOFT_ERROR_OCCURRED(escontext)) |
873 | 0 | return NULL; |
874 | | |
875 | 0 | if (state.polstr == NIL) |
876 | 0 | { |
877 | 0 | if (noisy) |
878 | 0 | ereport(NOTICE, |
879 | 0 | (errmsg("text-search query doesn't contain lexemes: \"%s\"", |
880 | 0 | state.buffer))); |
881 | 0 | query = (TSQuery) palloc(HDRSIZETQ); |
882 | 0 | SET_VARSIZE(query, HDRSIZETQ); |
883 | 0 | query->size = 0; |
884 | 0 | return query; |
885 | 0 | } |
886 | | |
887 | 0 | if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen)) |
888 | 0 | ereturn(escontext, NULL, |
889 | 0 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
890 | 0 | errmsg("tsquery is too large"))); |
891 | 0 | commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen); |
892 | | |
893 | | /* Pack the QueryItems in the final TSQuery struct to return to caller */ |
894 | 0 | query = (TSQuery) palloc0(commonlen); |
895 | 0 | SET_VARSIZE(query, commonlen); |
896 | 0 | query->size = list_length(state.polstr); |
897 | 0 | ptr = GETQUERY(query); |
898 | | |
899 | | /* Copy QueryItems to TSQuery */ |
900 | 0 | i = 0; |
901 | 0 | foreach(cell, state.polstr) |
902 | 0 | { |
903 | 0 | QueryItem *item = (QueryItem *) lfirst(cell); |
904 | |
|
905 | 0 | switch (item->type) |
906 | 0 | { |
907 | 0 | case QI_VAL: |
908 | 0 | memcpy(&ptr[i], item, sizeof(QueryOperand)); |
909 | 0 | break; |
910 | 0 | case QI_VALSTOP: |
911 | 0 | ptr[i].type = QI_VALSTOP; |
912 | 0 | break; |
913 | 0 | case QI_OPR: |
914 | 0 | memcpy(&ptr[i], item, sizeof(QueryOperator)); |
915 | 0 | break; |
916 | 0 | default: |
917 | 0 | elog(ERROR, "unrecognized QueryItem type: %d", item->type); |
918 | 0 | } |
919 | 0 | i++; |
920 | 0 | } |
921 | | |
922 | | /* Copy all the operand strings to TSQuery */ |
923 | 0 | memcpy(GETOPERAND(query), state.op, state.sumlen); |
924 | 0 | pfree(state.op); |
925 | | |
926 | | /* |
927 | | * Set left operand pointers for every operator. While we're at it, |
928 | | * detect whether there are any QI_VALSTOP nodes. |
929 | | */ |
930 | 0 | findoprnd(ptr, query->size, &needcleanup); |
931 | | |
932 | | /* |
933 | | * If there are QI_VALSTOP nodes, delete them and simplify the tree. |
934 | | */ |
935 | 0 | if (needcleanup) |
936 | 0 | query = cleanup_tsquery_stopwords(query, noisy); |
937 | |
|
938 | 0 | return query; |
939 | 0 | } |
940 | | |
941 | | static void |
942 | | pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval, |
943 | | int16 weight, bool prefix) |
944 | 0 | { |
945 | 0 | pushValue(state, strval, lenval, weight, prefix); |
946 | 0 | } |
947 | | |
948 | | /* |
949 | | * in without morphology |
950 | | */ |
951 | | Datum |
952 | | tsqueryin(PG_FUNCTION_ARGS) |
953 | 0 | { |
954 | 0 | char *in = PG_GETARG_CSTRING(0); |
955 | 0 | Node *escontext = fcinfo->context; |
956 | |
|
957 | 0 | PG_RETURN_TSQUERY(parse_tsquery(in, |
958 | 0 | pushval_asis, |
959 | 0 | PointerGetDatum(NULL), |
960 | 0 | 0, |
961 | 0 | escontext)); |
962 | 0 | } |
963 | | |
964 | | /* |
965 | | * out function |
966 | | */ |
967 | | typedef struct |
968 | | { |
969 | | QueryItem *curpol; |
970 | | char *buf; |
971 | | char *cur; |
972 | | char *op; |
973 | | int buflen; |
974 | | } INFIX; |
975 | | |
976 | | /* Makes sure inf->buf is large enough for adding 'addsize' bytes */ |
977 | 0 | #define RESIZEBUF(inf, addsize) \ |
978 | 0 | while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \ |
979 | 0 | { \ |
980 | 0 | int len = (inf)->cur - (inf)->buf; \ |
981 | 0 | (inf)->buflen *= 2; \ |
982 | 0 | (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \ |
983 | 0 | (inf)->cur = (inf)->buf + len; \ |
984 | 0 | } |
985 | | |
986 | | /* |
987 | | * recursively traverse the tree and |
988 | | * print it in infix (human-readable) form |
989 | | */ |
990 | | static void |
991 | | infix(INFIX *in, int parentPriority, bool rightPhraseOp) |
992 | 0 | { |
993 | | /* since this function recurses, it could be driven to stack overflow. */ |
994 | 0 | check_stack_depth(); |
995 | |
|
996 | 0 | if (in->curpol->type == QI_VAL) |
997 | 0 | { |
998 | 0 | QueryOperand *curpol = &in->curpol->qoperand; |
999 | 0 | char *op = in->op + curpol->distance; |
1000 | 0 | int clen; |
1001 | |
|
1002 | 0 | RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6); |
1003 | 0 | *(in->cur) = '\''; |
1004 | 0 | in->cur++; |
1005 | 0 | while (*op) |
1006 | 0 | { |
1007 | 0 | if (t_iseq(op, '\'')) |
1008 | 0 | { |
1009 | 0 | *(in->cur) = '\''; |
1010 | 0 | in->cur++; |
1011 | 0 | } |
1012 | 0 | else if (t_iseq(op, '\\')) |
1013 | 0 | { |
1014 | 0 | *(in->cur) = '\\'; |
1015 | 0 | in->cur++; |
1016 | 0 | } |
1017 | 0 | COPYCHAR(in->cur, op); |
1018 | |
|
1019 | 0 | clen = pg_mblen(op); |
1020 | 0 | op += clen; |
1021 | 0 | in->cur += clen; |
1022 | 0 | } |
1023 | 0 | *(in->cur) = '\''; |
1024 | 0 | in->cur++; |
1025 | 0 | if (curpol->weight || curpol->prefix) |
1026 | 0 | { |
1027 | 0 | *(in->cur) = ':'; |
1028 | 0 | in->cur++; |
1029 | 0 | if (curpol->prefix) |
1030 | 0 | { |
1031 | 0 | *(in->cur) = '*'; |
1032 | 0 | in->cur++; |
1033 | 0 | } |
1034 | 0 | if (curpol->weight & (1 << 3)) |
1035 | 0 | { |
1036 | 0 | *(in->cur) = 'A'; |
1037 | 0 | in->cur++; |
1038 | 0 | } |
1039 | 0 | if (curpol->weight & (1 << 2)) |
1040 | 0 | { |
1041 | 0 | *(in->cur) = 'B'; |
1042 | 0 | in->cur++; |
1043 | 0 | } |
1044 | 0 | if (curpol->weight & (1 << 1)) |
1045 | 0 | { |
1046 | 0 | *(in->cur) = 'C'; |
1047 | 0 | in->cur++; |
1048 | 0 | } |
1049 | 0 | if (curpol->weight & 1) |
1050 | 0 | { |
1051 | 0 | *(in->cur) = 'D'; |
1052 | 0 | in->cur++; |
1053 | 0 | } |
1054 | 0 | } |
1055 | 0 | *(in->cur) = '\0'; |
1056 | 0 | in->curpol++; |
1057 | 0 | } |
1058 | 0 | else if (in->curpol->qoperator.oper == OP_NOT) |
1059 | 0 | { |
1060 | 0 | int priority = QO_PRIORITY(in->curpol); |
1061 | |
|
1062 | 0 | if (priority < parentPriority) |
1063 | 0 | { |
1064 | 0 | RESIZEBUF(in, 2); |
1065 | 0 | sprintf(in->cur, "( "); |
1066 | 0 | in->cur = strchr(in->cur, '\0'); |
1067 | 0 | } |
1068 | 0 | RESIZEBUF(in, 1); |
1069 | 0 | *(in->cur) = '!'; |
1070 | 0 | in->cur++; |
1071 | 0 | *(in->cur) = '\0'; |
1072 | 0 | in->curpol++; |
1073 | |
|
1074 | 0 | infix(in, priority, false); |
1075 | 0 | if (priority < parentPriority) |
1076 | 0 | { |
1077 | 0 | RESIZEBUF(in, 2); |
1078 | 0 | sprintf(in->cur, " )"); |
1079 | 0 | in->cur = strchr(in->cur, '\0'); |
1080 | 0 | } |
1081 | 0 | } |
1082 | 0 | else |
1083 | 0 | { |
1084 | 0 | int8 op = in->curpol->qoperator.oper; |
1085 | 0 | int priority = QO_PRIORITY(in->curpol); |
1086 | 0 | int16 distance = in->curpol->qoperator.distance; |
1087 | 0 | INFIX nrm; |
1088 | 0 | bool needParenthesis = false; |
1089 | |
|
1090 | 0 | in->curpol++; |
1091 | 0 | if (priority < parentPriority || |
1092 | | /* phrase operator depends on order */ |
1093 | 0 | (op == OP_PHRASE && rightPhraseOp)) |
1094 | 0 | { |
1095 | 0 | needParenthesis = true; |
1096 | 0 | RESIZEBUF(in, 2); |
1097 | 0 | sprintf(in->cur, "( "); |
1098 | 0 | in->cur = strchr(in->cur, '\0'); |
1099 | 0 | } |
1100 | |
|
1101 | 0 | nrm.curpol = in->curpol; |
1102 | 0 | nrm.op = in->op; |
1103 | 0 | nrm.buflen = 16; |
1104 | 0 | nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); |
1105 | | |
1106 | | /* get right operand */ |
1107 | 0 | infix(&nrm, priority, (op == OP_PHRASE)); |
1108 | | |
1109 | | /* get & print left operand */ |
1110 | 0 | in->curpol = nrm.curpol; |
1111 | 0 | infix(in, priority, false); |
1112 | | |
1113 | | /* print operator & right operand */ |
1114 | 0 | RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf)); |
1115 | 0 | switch (op) |
1116 | 0 | { |
1117 | 0 | case OP_OR: |
1118 | 0 | sprintf(in->cur, " | %s", nrm.buf); |
1119 | 0 | break; |
1120 | 0 | case OP_AND: |
1121 | 0 | sprintf(in->cur, " & %s", nrm.buf); |
1122 | 0 | break; |
1123 | 0 | case OP_PHRASE: |
1124 | 0 | if (distance != 1) |
1125 | 0 | sprintf(in->cur, " <%d> %s", distance, nrm.buf); |
1126 | 0 | else |
1127 | 0 | sprintf(in->cur, " <-> %s", nrm.buf); |
1128 | 0 | break; |
1129 | 0 | default: |
1130 | | /* OP_NOT is handled in above if-branch */ |
1131 | 0 | elog(ERROR, "unrecognized operator type: %d", op); |
1132 | 0 | } |
1133 | 0 | in->cur = strchr(in->cur, '\0'); |
1134 | 0 | pfree(nrm.buf); |
1135 | |
|
1136 | 0 | if (needParenthesis) |
1137 | 0 | { |
1138 | 0 | RESIZEBUF(in, 2); |
1139 | 0 | sprintf(in->cur, " )"); |
1140 | 0 | in->cur = strchr(in->cur, '\0'); |
1141 | 0 | } |
1142 | 0 | } |
1143 | 0 | } |
1144 | | |
1145 | | Datum |
1146 | | tsqueryout(PG_FUNCTION_ARGS) |
1147 | 0 | { |
1148 | 0 | TSQuery query = PG_GETARG_TSQUERY(0); |
1149 | 0 | INFIX nrm; |
1150 | |
|
1151 | 0 | if (query->size == 0) |
1152 | 0 | { |
1153 | 0 | char *b = palloc(1); |
1154 | |
|
1155 | 0 | *b = '\0'; |
1156 | 0 | PG_RETURN_POINTER(b); |
1157 | 0 | } |
1158 | 0 | nrm.curpol = GETQUERY(query); |
1159 | 0 | nrm.buflen = 32; |
1160 | 0 | nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); |
1161 | 0 | *(nrm.cur) = '\0'; |
1162 | 0 | nrm.op = GETOPERAND(query); |
1163 | 0 | infix(&nrm, -1 /* lowest priority */ , false); |
1164 | |
|
1165 | 0 | PG_FREE_IF_COPY(query, 0); |
1166 | 0 | PG_RETURN_CSTRING(nrm.buf); |
1167 | 0 | } |
1168 | | |
1169 | | /* |
1170 | | * Binary Input / Output functions. The binary format is as follows: |
1171 | | * |
1172 | | * uint32 number of operators/operands in the query |
1173 | | * |
1174 | | * Followed by the operators and operands, in prefix notation. For each |
1175 | | * operand: |
1176 | | * |
1177 | | * uint8 type, QI_VAL |
1178 | | * uint8 weight |
1179 | | * uint8 prefix |
1180 | | * operand text in client encoding, null-terminated |
1181 | | * |
1182 | | * For each operator: |
1183 | | * |
1184 | | * uint8 type, QI_OPR |
1185 | | * uint8 operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT. |
1186 | | * uint16 distance (only for OP_PHRASE) |
1187 | | */ |
1188 | | Datum |
1189 | | tsquerysend(PG_FUNCTION_ARGS) |
1190 | 0 | { |
1191 | 0 | TSQuery query = PG_GETARG_TSQUERY(0); |
1192 | 0 | StringInfoData buf; |
1193 | 0 | int i; |
1194 | 0 | QueryItem *item = GETQUERY(query); |
1195 | |
|
1196 | 0 | pq_begintypsend(&buf); |
1197 | |
|
1198 | 0 | pq_sendint32(&buf, query->size); |
1199 | 0 | for (i = 0; i < query->size; i++) |
1200 | 0 | { |
1201 | 0 | pq_sendint8(&buf, item->type); |
1202 | |
|
1203 | 0 | switch (item->type) |
1204 | 0 | { |
1205 | 0 | case QI_VAL: |
1206 | 0 | pq_sendint8(&buf, item->qoperand.weight); |
1207 | 0 | pq_sendint8(&buf, item->qoperand.prefix); |
1208 | 0 | pq_sendstring(&buf, GETOPERAND(query) + item->qoperand.distance); |
1209 | 0 | break; |
1210 | 0 | case QI_OPR: |
1211 | 0 | pq_sendint8(&buf, item->qoperator.oper); |
1212 | 0 | if (item->qoperator.oper == OP_PHRASE) |
1213 | 0 | pq_sendint16(&buf, item->qoperator.distance); |
1214 | 0 | break; |
1215 | 0 | default: |
1216 | 0 | elog(ERROR, "unrecognized tsquery node type: %d", item->type); |
1217 | 0 | } |
1218 | 0 | item++; |
1219 | 0 | } |
1220 | | |
1221 | 0 | PG_FREE_IF_COPY(query, 0); |
1222 | |
|
1223 | 0 | PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); |
1224 | 0 | } |
1225 | | |
1226 | | Datum |
1227 | | tsqueryrecv(PG_FUNCTION_ARGS) |
1228 | 0 | { |
1229 | 0 | StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); |
1230 | 0 | TSQuery query; |
1231 | 0 | int i, |
1232 | 0 | len; |
1233 | 0 | QueryItem *item; |
1234 | 0 | int datalen; |
1235 | 0 | char *ptr; |
1236 | 0 | uint32 size; |
1237 | 0 | const char **operands; |
1238 | 0 | bool needcleanup; |
1239 | |
|
1240 | 0 | size = pq_getmsgint(buf, sizeof(uint32)); |
1241 | 0 | if (size > (MaxAllocSize / sizeof(QueryItem))) |
1242 | 0 | elog(ERROR, "invalid size of tsquery"); |
1243 | | |
1244 | | /* Allocate space to temporarily hold operand strings */ |
1245 | 0 | operands = palloc(size * sizeof(char *)); |
1246 | | |
1247 | | /* Allocate space for all the QueryItems. */ |
1248 | 0 | len = HDRSIZETQ + sizeof(QueryItem) * size; |
1249 | 0 | query = (TSQuery) palloc0(len); |
1250 | 0 | query->size = size; |
1251 | 0 | item = GETQUERY(query); |
1252 | |
|
1253 | 0 | datalen = 0; |
1254 | 0 | for (i = 0; i < size; i++) |
1255 | 0 | { |
1256 | 0 | item->type = (int8) pq_getmsgint(buf, sizeof(int8)); |
1257 | |
|
1258 | 0 | if (item->type == QI_VAL) |
1259 | 0 | { |
1260 | 0 | size_t val_len; /* length after recoding to server |
1261 | | * encoding */ |
1262 | 0 | uint8 weight; |
1263 | 0 | uint8 prefix; |
1264 | 0 | const char *val; |
1265 | 0 | pg_crc32 valcrc; |
1266 | |
|
1267 | 0 | weight = (uint8) pq_getmsgint(buf, sizeof(uint8)); |
1268 | 0 | prefix = (uint8) pq_getmsgint(buf, sizeof(uint8)); |
1269 | 0 | val = pq_getmsgstring(buf); |
1270 | 0 | val_len = strlen(val); |
1271 | | |
1272 | | /* Sanity checks */ |
1273 | |
|
1274 | 0 | if (weight > 0xF) |
1275 | 0 | elog(ERROR, "invalid tsquery: invalid weight bitmap"); |
1276 | | |
1277 | 0 | if (val_len > MAXSTRLEN) |
1278 | 0 | elog(ERROR, "invalid tsquery: operand too long"); |
1279 | | |
1280 | 0 | if (datalen > MAXSTRPOS) |
1281 | 0 | elog(ERROR, "invalid tsquery: total operand length exceeded"); |
1282 | | |
1283 | | /* Looks valid. */ |
1284 | | |
1285 | 0 | INIT_LEGACY_CRC32(valcrc); |
1286 | 0 | COMP_LEGACY_CRC32(valcrc, val, val_len); |
1287 | 0 | FIN_LEGACY_CRC32(valcrc); |
1288 | |
|
1289 | 0 | item->qoperand.weight = weight; |
1290 | 0 | item->qoperand.prefix = (prefix) ? true : false; |
1291 | 0 | item->qoperand.valcrc = (int32) valcrc; |
1292 | 0 | item->qoperand.length = val_len; |
1293 | 0 | item->qoperand.distance = datalen; |
1294 | | |
1295 | | /* |
1296 | | * Operand strings are copied to the final struct after this loop; |
1297 | | * here we just collect them to an array |
1298 | | */ |
1299 | 0 | operands[i] = val; |
1300 | |
|
1301 | 0 | datalen += val_len + 1; /* + 1 for the '\0' terminator */ |
1302 | 0 | } |
1303 | 0 | else if (item->type == QI_OPR) |
1304 | 0 | { |
1305 | 0 | int8 oper; |
1306 | |
|
1307 | 0 | oper = (int8) pq_getmsgint(buf, sizeof(int8)); |
1308 | 0 | if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE) |
1309 | 0 | elog(ERROR, "invalid tsquery: unrecognized operator type %d", |
1310 | 0 | (int) oper); |
1311 | 0 | if (i == size - 1) |
1312 | 0 | elog(ERROR, "invalid pointer to right operand"); |
1313 | | |
1314 | 0 | item->qoperator.oper = oper; |
1315 | 0 | if (oper == OP_PHRASE) |
1316 | 0 | item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16)); |
1317 | 0 | } |
1318 | 0 | else |
1319 | 0 | elog(ERROR, "unrecognized tsquery node type: %d", item->type); |
1320 | | |
1321 | 0 | item++; |
1322 | 0 | } |
1323 | | |
1324 | | /* Enlarge buffer to make room for the operand values. */ |
1325 | 0 | query = (TSQuery) repalloc(query, len + datalen); |
1326 | 0 | item = GETQUERY(query); |
1327 | 0 | ptr = GETOPERAND(query); |
1328 | | |
1329 | | /* |
1330 | | * Fill in the left-pointers. Checks that the tree is well-formed as a |
1331 | | * side-effect. |
1332 | | */ |
1333 | 0 | findoprnd(item, size, &needcleanup); |
1334 | | |
1335 | | /* Can't have found any QI_VALSTOP nodes */ |
1336 | 0 | Assert(!needcleanup); |
1337 | | |
1338 | | /* Copy operands to output struct */ |
1339 | 0 | for (i = 0; i < size; i++) |
1340 | 0 | { |
1341 | 0 | if (item->type == QI_VAL) |
1342 | 0 | { |
1343 | 0 | memcpy(ptr, operands[i], item->qoperand.length + 1); |
1344 | 0 | ptr += item->qoperand.length + 1; |
1345 | 0 | } |
1346 | 0 | item++; |
1347 | 0 | } |
1348 | |
|
1349 | 0 | pfree(operands); |
1350 | |
|
1351 | 0 | Assert(ptr - GETOPERAND(query) == datalen); |
1352 | |
|
1353 | 0 | SET_VARSIZE(query, len + datalen); |
1354 | |
|
1355 | 0 | PG_RETURN_TSQUERY(query); |
1356 | 0 | } |
1357 | | |
1358 | | /* |
1359 | | * debug function, used only for view query |
1360 | | * which will be executed in non-leaf pages in index |
1361 | | */ |
1362 | | Datum |
1363 | | tsquerytree(PG_FUNCTION_ARGS) |
1364 | 0 | { |
1365 | 0 | TSQuery query = PG_GETARG_TSQUERY(0); |
1366 | 0 | INFIX nrm; |
1367 | 0 | text *res; |
1368 | 0 | QueryItem *q; |
1369 | 0 | int len; |
1370 | |
|
1371 | 0 | if (query->size == 0) |
1372 | 0 | { |
1373 | 0 | res = (text *) palloc(VARHDRSZ); |
1374 | 0 | SET_VARSIZE(res, VARHDRSZ); |
1375 | 0 | PG_RETURN_POINTER(res); |
1376 | 0 | } |
1377 | | |
1378 | 0 | q = clean_NOT(GETQUERY(query), &len); |
1379 | |
|
1380 | 0 | if (!q) |
1381 | 0 | { |
1382 | 0 | res = cstring_to_text("T"); |
1383 | 0 | } |
1384 | 0 | else |
1385 | 0 | { |
1386 | 0 | nrm.curpol = q; |
1387 | 0 | nrm.buflen = 32; |
1388 | 0 | nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); |
1389 | 0 | *(nrm.cur) = '\0'; |
1390 | 0 | nrm.op = GETOPERAND(query); |
1391 | 0 | infix(&nrm, -1, false); |
1392 | 0 | res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf); |
1393 | 0 | pfree(q); |
1394 | 0 | } |
1395 | |
|
1396 | 0 | PG_FREE_IF_COPY(query, 0); |
1397 | |
|
1398 | 0 | PG_RETURN_TEXT_P(res); |
1399 | 0 | } |